diff --git a/internal/dao/document.go b/internal/dao/document.go index 6d69c0e559..5ac325f112 100644 --- a/internal/dao/document.go +++ b/internal/dao/document.go @@ -146,6 +146,13 @@ func (dao *DocumentDAO) GetByIDs(ids []string) ([]*entity.Document, error) { return documents, nil } +// GetByDocumentIDAndDatasetID retrieves a document by document ID and dataset/KB ID. +func (dao *DocumentDAO) GetByDocumentIDAndDatasetID(documentID, datasetID string) (*entity.Document, error) { + var document entity.Document + err := DB.Where("id = ? AND kb_id = ?", documentID, datasetID).First(&document).Error + return &document, err +} + // CountByTenantID counts documents by tenant ID func (dao *DocumentDAO) CountByTenantID(tenantID string) (int64, error) { var count int64 diff --git a/internal/dao/document_test.go b/internal/dao/document_test.go index 2bec2b2358..5cef89baac 100644 --- a/internal/dao/document_test.go +++ b/internal/dao/document_test.go @@ -118,4 +118,24 @@ func TestDocumentGetByIDs_NoMatch(t *testing.T) { } } +func TestDocumentGetByDocumentIDAndDatasetIDUsesKBID(t *testing.T) { + db := setupDocumentTestDB(t) + pushDocDB(t, db) + + db.Create(&entity.Document{ID: "doc1", KbID: "kb1", Name: sp("Doc 1"), CreatedBy: "user1", ParserConfig: entity.JSONMap{}}) + db.Create(&entity.Document{ID: "doc1-other", KbID: "kb2", Name: sp("Doc 2"), CreatedBy: "user1", ParserConfig: entity.JSONMap{}}) + + doc, err := NewDocumentDAO().GetByDocumentIDAndDatasetID("doc1", "kb1") + if err != nil { + t.Fatalf("GetByDocumentIDAndDatasetID failed: %v", err) + } + if doc.ID != "doc1" || doc.KbID != "kb1" { + t.Fatalf("unexpected document: id=%s kb_id=%s", doc.ID, doc.KbID) + } + + if _, err := NewDocumentDAO().GetByDocumentIDAndDatasetID("doc1", "kb2"); err == nil { + t.Fatal("expected no match when document does not belong to dataset") + } +} + func sp(s string) *string { return &s } diff --git a/internal/handler/datasets.go b/internal/handler/datasets.go index b1fd260eac..426b7c1318 100644 --- a/internal/handler/datasets.go +++ b/internal/handler/datasets.go @@ -602,6 +602,48 @@ func (h *DatasetsHandler) ListMetadataFlattened(c *gin.Context) { jsonResponse(c, common.CodeSuccess, flattenedMeta, "success") } +func (h *DatasetsHandler) UpdateDocumentMetadataConfig(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + datasetID := strings.TrimSpace(c.Param("dataset_id")) + if datasetID == "" { + jsonError(c, common.CodeArgumentError, "dataset_id is required") + return + } + documentID := strings.TrimSpace(c.Param("document_id")) + if documentID == "" { + jsonError(c, common.CodeArgumentError, "document_id is required") + return + } + userID := strings.TrimSpace(user.ID) + if userID == "" { + jsonError(c, common.CodeArgumentError, "user_id is required") + return + } + + var req map[string]interface{} + if err := c.ShouldBindJSON(&req); err != nil { + jsonError(c, common.CodeDataError, err.Error()) + return + } + + data, code, err := h.datasetsService.UpdateDocumentMetadataConfig(userID, datasetID, documentID, req) + if err != nil { + jsonError(c, code, err.Error()) + return + } + + c.JSON(http.StatusOK, gin.H{ + "code": code, + "data": data, + "message": "success", + }) +} + // SearchDatasets searches chunks across datasets based on a question // @Summary Search Datasets // @Description Search for relevant chunks across one or more datasets based on a question diff --git a/internal/router/router.go b/internal/router/router.go index d8ac7f0bdd..c6b8ac87c8 100644 --- a/internal/router/router.go +++ b/internal/router/router.go @@ -266,6 +266,7 @@ func (r *Router) Setup(engine *gin.Engine) { datasets.POST("/:dataset_id/documents/parse", r.documentHandler.ParseDocuments) datasets.POST("/:dataset_id/documents/stop", r.documentHandler.StopParseDocuments) datasets.DELETE("/:dataset_id/documents/:document_id/chunks", r.chunkHandler.RemoveChunks) + datasets.PUT("/:dataset_id/documents/:document_id/metadata/config", r.datasetsHandler.UpdateDocumentMetadataConfig) } // Search routes diff --git a/internal/service/dataset.go b/internal/service/dataset.go index a50f4c9a1c..c0a619e9cb 100644 --- a/internal/service/dataset.go +++ b/internal/service/dataset.go @@ -107,6 +107,48 @@ func NewDatasetService() *DatasetService { } } +func (s *DatasetService) UpdateDocumentMetadataConfig(userID, datasetID, documentID string, req map[string]interface{}) (*entity.Document, common.ErrorCode, error) { + if _, err := s.kbDAO.GetByIDAndTenantID(datasetID, userID); err != nil { + if dao.IsNotFoundErr(err) { + return nil, common.CodeDataError, errors.New("You don't own the dataset.") + } + return nil, common.CodeServerError, errors.New("Database operation failed") + } + + doc, err := s.documentDAO.GetByDocumentIDAndDatasetID(documentID, datasetID) + if err != nil { + if dao.IsNotFoundErr(err) { + return nil, common.CodeDataError, fmt.Errorf("Document %s not found in dataset %s", documentID, datasetID) + } + return nil, common.CodeServerError, err + } + + metadata, ok := req["metadata"] + if !ok { + return nil, common.CodeArgumentError, errors.New("metadata is required") + } + + parserConfig := doc.ParserConfig + if parserConfig == nil { + parserConfig = entity.JSONMap{} + } + parserConfig["metadata"] = metadata + + if err := s.documentDAO.UpdateByID(doc.ID, map[string]interface{}{"parser_config": parserConfig}); err != nil { + return nil, common.CodeExceptionError, err + } + + updatedDoc, err := s.documentDAO.GetByID(doc.ID) + if err != nil { + if dao.IsNotFoundErr(err) { + return nil, common.CodeDataError, errors.New("Document not found!") + } + return nil, common.CodeExceptionError, err + } + + return updatedDoc, common.CodeSuccess, nil +} + // SearchDatasetsRequest is the request structure for searching chunks across datasets. type SearchDatasetsRequest struct { DatasetIDs []string `json:"dataset_ids" binding:"required"` diff --git a/internal/service/dataset_document_metadata_config_test.go b/internal/service/dataset_document_metadata_config_test.go new file mode 100644 index 0000000000..d0dd0a84ea --- /dev/null +++ b/internal/service/dataset_document_metadata_config_test.go @@ -0,0 +1,158 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package service + +import ( + "testing" + + "ragflow/internal/common" + "ragflow/internal/dao" + "ragflow/internal/entity" +) + +func testDatasetServiceForDocumentMetadataConfig(t *testing.T) *DatasetService { + t.Helper() + return &DatasetService{ + kbDAO: dao.NewKnowledgebaseDAO(), + documentDAO: dao.NewDocumentDAO(), + } +} + +func insertDatasetMetadataConfigKB(t *testing.T, datasetID, tenantID string) { + t.Helper() + kb := &entity.Knowledgebase{ + ID: datasetID, + TenantID: tenantID, + Name: "test-kb", + EmbdID: "embedding@OpenAI", + CreatedBy: tenantID, + Permission: string(entity.TenantPermissionMe), + ParserID: "naive", + ParserConfig: entity.JSONMap{}, + Status: sptr("1"), + } + if err := dao.DB.Create(kb).Error; err != nil { + t.Fatalf("insert test kb: %v", err) + } +} + +func insertDatasetMetadataConfigDoc(t *testing.T, docID, datasetID string, parserConfig entity.JSONMap) { + t.Helper() + doc := &entity.Document{ + ID: docID, + KbID: datasetID, + ParserID: "naive", + ParserConfig: parserConfig, + SourceType: "local", + Type: "pdf", + CreatedBy: "user-1", + Suffix: ".pdf", + Status: sptr("1"), + } + if err := dao.DB.Create(doc).Error; err != nil { + t.Fatalf("insert test doc: %v", err) + } +} + +func TestDatasetServiceUpdateDocumentMetadataConfig(t *testing.T) { + db := setupServiceTestDB(t) + pushServiceDB(t, db) + insertDatasetMetadataConfigKB(t, "kb-1", "user-1") + insertDatasetMetadataConfigDoc(t, "doc-1", "kb-1", entity.JSONMap{"pages": []interface{}{1, 2}}) + + metadata := map[string]interface{}{"author": "Alice", "year": float64(2026)} + doc, code, err := testDatasetServiceForDocumentMetadataConfig(t).UpdateDocumentMetadataConfig( + "user-1", + "kb-1", + "doc-1", + map[string]interface{}{"metadata": metadata}, + ) + if err != nil { + t.Fatalf("UpdateDocumentMetadataConfig failed: %v", err) + } + if code != common.CodeSuccess { + t.Fatalf("expected success code, got %d", code) + } + if doc == nil { + t.Fatal("expected updated document") + } + if doc.ParserConfig["pages"] == nil { + t.Fatalf("existing parser_config fields should be preserved: %#v", doc.ParserConfig) + } + + updatedMetadata, ok := doc.ParserConfig["metadata"].(map[string]interface{}) + if !ok { + t.Fatalf("expected metadata map, got %#v", doc.ParserConfig["metadata"]) + } + if updatedMetadata["author"] != "Alice" || updatedMetadata["year"] != float64(2026) { + t.Fatalf("unexpected metadata: %#v", updatedMetadata) + } + + persisted, err := dao.NewDocumentDAO().GetByID("doc-1") + if err != nil { + t.Fatalf("failed to fetch persisted document: %v", err) + } + if persisted.ParserConfig["metadata"] == nil { + t.Fatalf("metadata was not persisted: %#v", persisted.ParserConfig) + } +} + +func TestDatasetServiceUpdateDocumentMetadataConfigRequiresMetadata(t *testing.T) { + db := setupServiceTestDB(t) + pushServiceDB(t, db) + insertDatasetMetadataConfigKB(t, "kb-1", "user-1") + insertDatasetMetadataConfigDoc(t, "doc-1", "kb-1", entity.JSONMap{}) + + _, code, err := testDatasetServiceForDocumentMetadataConfig(t).UpdateDocumentMetadataConfig( + "user-1", + "kb-1", + "doc-1", + map[string]interface{}{}, + ) + if err == nil { + t.Fatal("expected metadata required error") + } + if code != common.CodeArgumentError { + t.Fatalf("expected argument error code, got %d", code) + } + if err.Error() != "metadata is required" { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestDatasetServiceUpdateDocumentMetadataConfigRejectsNonOwner(t *testing.T) { + db := setupServiceTestDB(t) + pushServiceDB(t, db) + insertDatasetMetadataConfigKB(t, "kb-1", "owner-1") + insertDatasetMetadataConfigDoc(t, "doc-1", "kb-1", entity.JSONMap{}) + + _, code, err := testDatasetServiceForDocumentMetadataConfig(t).UpdateDocumentMetadataConfig( + "user-1", + "kb-1", + "doc-1", + map[string]interface{}{"metadata": map[string]interface{}{"author": "Alice"}}, + ) + if err == nil { + t.Fatal("expected ownership error") + } + if code != common.CodeDataError { + t.Fatalf("expected data error code, got %d", code) + } + if err.Error() != "You don't own the dataset." { + t.Fatalf("unexpected error: %v", err) + } +}