mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
feat[Go]: api datasets/<dataset_id>/documents/<document_id>/metadata/… (#15846)
### What problem does this PR solve? As title ``` /api/v1/datasets/<dataset_id>/documents/<document_id>/metadata/config PUT ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
This commit is contained in:
@@ -146,6 +146,13 @@ func (dao *DocumentDAO) GetByIDs(ids []string) ([]*entity.Document, error) {
|
||||
return documents, nil
|
||||
}
|
||||
|
||||
// GetByDocumentIDAndDatasetID retrieves a document by document ID and dataset/KB ID.
|
||||
func (dao *DocumentDAO) GetByDocumentIDAndDatasetID(documentID, datasetID string) (*entity.Document, error) {
|
||||
var document entity.Document
|
||||
err := DB.Where("id = ? AND kb_id = ?", documentID, datasetID).First(&document).Error
|
||||
return &document, err
|
||||
}
|
||||
|
||||
// CountByTenantID counts documents by tenant ID
|
||||
func (dao *DocumentDAO) CountByTenantID(tenantID string) (int64, error) {
|
||||
var count int64
|
||||
|
||||
@@ -118,4 +118,24 @@ func TestDocumentGetByIDs_NoMatch(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDocumentGetByDocumentIDAndDatasetIDUsesKBID(t *testing.T) {
|
||||
db := setupDocumentTestDB(t)
|
||||
pushDocDB(t, db)
|
||||
|
||||
db.Create(&entity.Document{ID: "doc1", KbID: "kb1", Name: sp("Doc 1"), CreatedBy: "user1", ParserConfig: entity.JSONMap{}})
|
||||
db.Create(&entity.Document{ID: "doc1-other", KbID: "kb2", Name: sp("Doc 2"), CreatedBy: "user1", ParserConfig: entity.JSONMap{}})
|
||||
|
||||
doc, err := NewDocumentDAO().GetByDocumentIDAndDatasetID("doc1", "kb1")
|
||||
if err != nil {
|
||||
t.Fatalf("GetByDocumentIDAndDatasetID failed: %v", err)
|
||||
}
|
||||
if doc.ID != "doc1" || doc.KbID != "kb1" {
|
||||
t.Fatalf("unexpected document: id=%s kb_id=%s", doc.ID, doc.KbID)
|
||||
}
|
||||
|
||||
if _, err := NewDocumentDAO().GetByDocumentIDAndDatasetID("doc1", "kb2"); err == nil {
|
||||
t.Fatal("expected no match when document does not belong to dataset")
|
||||
}
|
||||
}
|
||||
|
||||
func sp(s string) *string { return &s }
|
||||
|
||||
@@ -602,6 +602,48 @@ func (h *DatasetsHandler) ListMetadataFlattened(c *gin.Context) {
|
||||
jsonResponse(c, common.CodeSuccess, flattenedMeta, "success")
|
||||
}
|
||||
|
||||
func (h *DatasetsHandler) UpdateDocumentMetadataConfig(c *gin.Context) {
|
||||
user, errorCode, errorMessage := GetUser(c)
|
||||
if errorCode != common.CodeSuccess {
|
||||
jsonError(c, errorCode, errorMessage)
|
||||
return
|
||||
}
|
||||
|
||||
datasetID := strings.TrimSpace(c.Param("dataset_id"))
|
||||
if datasetID == "" {
|
||||
jsonError(c, common.CodeArgumentError, "dataset_id is required")
|
||||
return
|
||||
}
|
||||
documentID := strings.TrimSpace(c.Param("document_id"))
|
||||
if documentID == "" {
|
||||
jsonError(c, common.CodeArgumentError, "document_id is required")
|
||||
return
|
||||
}
|
||||
userID := strings.TrimSpace(user.ID)
|
||||
if userID == "" {
|
||||
jsonError(c, common.CodeArgumentError, "user_id is required")
|
||||
return
|
||||
}
|
||||
|
||||
var req map[string]interface{}
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
jsonError(c, common.CodeDataError, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
data, code, err := h.datasetsService.UpdateDocumentMetadataConfig(userID, datasetID, documentID, req)
|
||||
if err != nil {
|
||||
jsonError(c, code, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"code": code,
|
||||
"data": data,
|
||||
"message": "success",
|
||||
})
|
||||
}
|
||||
|
||||
// SearchDatasets searches chunks across datasets based on a question
|
||||
// @Summary Search Datasets
|
||||
// @Description Search for relevant chunks across one or more datasets based on a question
|
||||
|
||||
@@ -266,6 +266,7 @@ func (r *Router) Setup(engine *gin.Engine) {
|
||||
datasets.POST("/:dataset_id/documents/parse", r.documentHandler.ParseDocuments)
|
||||
datasets.POST("/:dataset_id/documents/stop", r.documentHandler.StopParseDocuments)
|
||||
datasets.DELETE("/:dataset_id/documents/:document_id/chunks", r.chunkHandler.RemoveChunks)
|
||||
datasets.PUT("/:dataset_id/documents/:document_id/metadata/config", r.datasetsHandler.UpdateDocumentMetadataConfig)
|
||||
}
|
||||
|
||||
// Search routes
|
||||
|
||||
@@ -107,6 +107,48 @@ func NewDatasetService() *DatasetService {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *DatasetService) UpdateDocumentMetadataConfig(userID, datasetID, documentID string, req map[string]interface{}) (*entity.Document, common.ErrorCode, error) {
|
||||
if _, err := s.kbDAO.GetByIDAndTenantID(datasetID, userID); err != nil {
|
||||
if dao.IsNotFoundErr(err) {
|
||||
return nil, common.CodeDataError, errors.New("You don't own the dataset.")
|
||||
}
|
||||
return nil, common.CodeServerError, errors.New("Database operation failed")
|
||||
}
|
||||
|
||||
doc, err := s.documentDAO.GetByDocumentIDAndDatasetID(documentID, datasetID)
|
||||
if err != nil {
|
||||
if dao.IsNotFoundErr(err) {
|
||||
return nil, common.CodeDataError, fmt.Errorf("Document %s not found in dataset %s", documentID, datasetID)
|
||||
}
|
||||
return nil, common.CodeServerError, err
|
||||
}
|
||||
|
||||
metadata, ok := req["metadata"]
|
||||
if !ok {
|
||||
return nil, common.CodeArgumentError, errors.New("metadata is required")
|
||||
}
|
||||
|
||||
parserConfig := doc.ParserConfig
|
||||
if parserConfig == nil {
|
||||
parserConfig = entity.JSONMap{}
|
||||
}
|
||||
parserConfig["metadata"] = metadata
|
||||
|
||||
if err := s.documentDAO.UpdateByID(doc.ID, map[string]interface{}{"parser_config": parserConfig}); err != nil {
|
||||
return nil, common.CodeExceptionError, err
|
||||
}
|
||||
|
||||
updatedDoc, err := s.documentDAO.GetByID(doc.ID)
|
||||
if err != nil {
|
||||
if dao.IsNotFoundErr(err) {
|
||||
return nil, common.CodeDataError, errors.New("Document not found!")
|
||||
}
|
||||
return nil, common.CodeExceptionError, err
|
||||
}
|
||||
|
||||
return updatedDoc, common.CodeSuccess, nil
|
||||
}
|
||||
|
||||
// SearchDatasetsRequest is the request structure for searching chunks across datasets.
|
||||
type SearchDatasetsRequest struct {
|
||||
DatasetIDs []string `json:"dataset_ids" binding:"required"`
|
||||
|
||||
158
internal/service/dataset_document_metadata_config_test.go
Normal file
158
internal/service/dataset_document_metadata_config_test.go
Normal file
@@ -0,0 +1,158 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package service
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"ragflow/internal/common"
|
||||
"ragflow/internal/dao"
|
||||
"ragflow/internal/entity"
|
||||
)
|
||||
|
||||
func testDatasetServiceForDocumentMetadataConfig(t *testing.T) *DatasetService {
|
||||
t.Helper()
|
||||
return &DatasetService{
|
||||
kbDAO: dao.NewKnowledgebaseDAO(),
|
||||
documentDAO: dao.NewDocumentDAO(),
|
||||
}
|
||||
}
|
||||
|
||||
func insertDatasetMetadataConfigKB(t *testing.T, datasetID, tenantID string) {
|
||||
t.Helper()
|
||||
kb := &entity.Knowledgebase{
|
||||
ID: datasetID,
|
||||
TenantID: tenantID,
|
||||
Name: "test-kb",
|
||||
EmbdID: "embedding@OpenAI",
|
||||
CreatedBy: tenantID,
|
||||
Permission: string(entity.TenantPermissionMe),
|
||||
ParserID: "naive",
|
||||
ParserConfig: entity.JSONMap{},
|
||||
Status: sptr("1"),
|
||||
}
|
||||
if err := dao.DB.Create(kb).Error; err != nil {
|
||||
t.Fatalf("insert test kb: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func insertDatasetMetadataConfigDoc(t *testing.T, docID, datasetID string, parserConfig entity.JSONMap) {
|
||||
t.Helper()
|
||||
doc := &entity.Document{
|
||||
ID: docID,
|
||||
KbID: datasetID,
|
||||
ParserID: "naive",
|
||||
ParserConfig: parserConfig,
|
||||
SourceType: "local",
|
||||
Type: "pdf",
|
||||
CreatedBy: "user-1",
|
||||
Suffix: ".pdf",
|
||||
Status: sptr("1"),
|
||||
}
|
||||
if err := dao.DB.Create(doc).Error; err != nil {
|
||||
t.Fatalf("insert test doc: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDatasetServiceUpdateDocumentMetadataConfig(t *testing.T) {
|
||||
db := setupServiceTestDB(t)
|
||||
pushServiceDB(t, db)
|
||||
insertDatasetMetadataConfigKB(t, "kb-1", "user-1")
|
||||
insertDatasetMetadataConfigDoc(t, "doc-1", "kb-1", entity.JSONMap{"pages": []interface{}{1, 2}})
|
||||
|
||||
metadata := map[string]interface{}{"author": "Alice", "year": float64(2026)}
|
||||
doc, code, err := testDatasetServiceForDocumentMetadataConfig(t).UpdateDocumentMetadataConfig(
|
||||
"user-1",
|
||||
"kb-1",
|
||||
"doc-1",
|
||||
map[string]interface{}{"metadata": metadata},
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("UpdateDocumentMetadataConfig failed: %v", err)
|
||||
}
|
||||
if code != common.CodeSuccess {
|
||||
t.Fatalf("expected success code, got %d", code)
|
||||
}
|
||||
if doc == nil {
|
||||
t.Fatal("expected updated document")
|
||||
}
|
||||
if doc.ParserConfig["pages"] == nil {
|
||||
t.Fatalf("existing parser_config fields should be preserved: %#v", doc.ParserConfig)
|
||||
}
|
||||
|
||||
updatedMetadata, ok := doc.ParserConfig["metadata"].(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("expected metadata map, got %#v", doc.ParserConfig["metadata"])
|
||||
}
|
||||
if updatedMetadata["author"] != "Alice" || updatedMetadata["year"] != float64(2026) {
|
||||
t.Fatalf("unexpected metadata: %#v", updatedMetadata)
|
||||
}
|
||||
|
||||
persisted, err := dao.NewDocumentDAO().GetByID("doc-1")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to fetch persisted document: %v", err)
|
||||
}
|
||||
if persisted.ParserConfig["metadata"] == nil {
|
||||
t.Fatalf("metadata was not persisted: %#v", persisted.ParserConfig)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDatasetServiceUpdateDocumentMetadataConfigRequiresMetadata(t *testing.T) {
|
||||
db := setupServiceTestDB(t)
|
||||
pushServiceDB(t, db)
|
||||
insertDatasetMetadataConfigKB(t, "kb-1", "user-1")
|
||||
insertDatasetMetadataConfigDoc(t, "doc-1", "kb-1", entity.JSONMap{})
|
||||
|
||||
_, code, err := testDatasetServiceForDocumentMetadataConfig(t).UpdateDocumentMetadataConfig(
|
||||
"user-1",
|
||||
"kb-1",
|
||||
"doc-1",
|
||||
map[string]interface{}{},
|
||||
)
|
||||
if err == nil {
|
||||
t.Fatal("expected metadata required error")
|
||||
}
|
||||
if code != common.CodeArgumentError {
|
||||
t.Fatalf("expected argument error code, got %d", code)
|
||||
}
|
||||
if err.Error() != "metadata is required" {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDatasetServiceUpdateDocumentMetadataConfigRejectsNonOwner(t *testing.T) {
|
||||
db := setupServiceTestDB(t)
|
||||
pushServiceDB(t, db)
|
||||
insertDatasetMetadataConfigKB(t, "kb-1", "owner-1")
|
||||
insertDatasetMetadataConfigDoc(t, "doc-1", "kb-1", entity.JSONMap{})
|
||||
|
||||
_, code, err := testDatasetServiceForDocumentMetadataConfig(t).UpdateDocumentMetadataConfig(
|
||||
"user-1",
|
||||
"kb-1",
|
||||
"doc-1",
|
||||
map[string]interface{}{"metadata": map[string]interface{}{"author": "Alice"}},
|
||||
)
|
||||
if err == nil {
|
||||
t.Fatal("expected ownership error")
|
||||
}
|
||||
if code != common.CodeDataError {
|
||||
t.Fatalf("expected data error code, got %d", code)
|
||||
}
|
||||
if err.Error() != "You don't own the dataset." {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user