feat[Go]: api datasets/<dataset_id>/documents/<document_id>/metadata/… (#15846)

### What problem does this PR solve?

As title

```
/api/v1/datasets/<dataset_id>/documents/<document_id>/metadata/config PUT
```

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
This commit is contained in:
Haruko386
2026-06-10 09:57:11 +08:00
committed by GitHub
parent a396b1ace2
commit d56aeb2f5d
6 changed files with 270 additions and 0 deletions

View File

@@ -146,6 +146,13 @@ func (dao *DocumentDAO) GetByIDs(ids []string) ([]*entity.Document, error) {
return documents, nil
}
// GetByDocumentIDAndDatasetID retrieves a document by document ID and dataset/KB ID.
func (dao *DocumentDAO) GetByDocumentIDAndDatasetID(documentID, datasetID string) (*entity.Document, error) {
var document entity.Document
err := DB.Where("id = ? AND kb_id = ?", documentID, datasetID).First(&document).Error
return &document, err
}
// CountByTenantID counts documents by tenant ID
func (dao *DocumentDAO) CountByTenantID(tenantID string) (int64, error) {
var count int64

View File

@@ -118,4 +118,24 @@ func TestDocumentGetByIDs_NoMatch(t *testing.T) {
}
}
func TestDocumentGetByDocumentIDAndDatasetIDUsesKBID(t *testing.T) {
db := setupDocumentTestDB(t)
pushDocDB(t, db)
db.Create(&entity.Document{ID: "doc1", KbID: "kb1", Name: sp("Doc 1"), CreatedBy: "user1", ParserConfig: entity.JSONMap{}})
db.Create(&entity.Document{ID: "doc1-other", KbID: "kb2", Name: sp("Doc 2"), CreatedBy: "user1", ParserConfig: entity.JSONMap{}})
doc, err := NewDocumentDAO().GetByDocumentIDAndDatasetID("doc1", "kb1")
if err != nil {
t.Fatalf("GetByDocumentIDAndDatasetID failed: %v", err)
}
if doc.ID != "doc1" || doc.KbID != "kb1" {
t.Fatalf("unexpected document: id=%s kb_id=%s", doc.ID, doc.KbID)
}
if _, err := NewDocumentDAO().GetByDocumentIDAndDatasetID("doc1", "kb2"); err == nil {
t.Fatal("expected no match when document does not belong to dataset")
}
}
func sp(s string) *string { return &s }

View File

@@ -602,6 +602,48 @@ func (h *DatasetsHandler) ListMetadataFlattened(c *gin.Context) {
jsonResponse(c, common.CodeSuccess, flattenedMeta, "success")
}
func (h *DatasetsHandler) UpdateDocumentMetadataConfig(c *gin.Context) {
user, errorCode, errorMessage := GetUser(c)
if errorCode != common.CodeSuccess {
jsonError(c, errorCode, errorMessage)
return
}
datasetID := strings.TrimSpace(c.Param("dataset_id"))
if datasetID == "" {
jsonError(c, common.CodeArgumentError, "dataset_id is required")
return
}
documentID := strings.TrimSpace(c.Param("document_id"))
if documentID == "" {
jsonError(c, common.CodeArgumentError, "document_id is required")
return
}
userID := strings.TrimSpace(user.ID)
if userID == "" {
jsonError(c, common.CodeArgumentError, "user_id is required")
return
}
var req map[string]interface{}
if err := c.ShouldBindJSON(&req); err != nil {
jsonError(c, common.CodeDataError, err.Error())
return
}
data, code, err := h.datasetsService.UpdateDocumentMetadataConfig(userID, datasetID, documentID, req)
if err != nil {
jsonError(c, code, err.Error())
return
}
c.JSON(http.StatusOK, gin.H{
"code": code,
"data": data,
"message": "success",
})
}
// SearchDatasets searches chunks across datasets based on a question
// @Summary Search Datasets
// @Description Search for relevant chunks across one or more datasets based on a question

View File

@@ -266,6 +266,7 @@ func (r *Router) Setup(engine *gin.Engine) {
datasets.POST("/:dataset_id/documents/parse", r.documentHandler.ParseDocuments)
datasets.POST("/:dataset_id/documents/stop", r.documentHandler.StopParseDocuments)
datasets.DELETE("/:dataset_id/documents/:document_id/chunks", r.chunkHandler.RemoveChunks)
datasets.PUT("/:dataset_id/documents/:document_id/metadata/config", r.datasetsHandler.UpdateDocumentMetadataConfig)
}
// Search routes

View File

@@ -107,6 +107,48 @@ func NewDatasetService() *DatasetService {
}
}
func (s *DatasetService) UpdateDocumentMetadataConfig(userID, datasetID, documentID string, req map[string]interface{}) (*entity.Document, common.ErrorCode, error) {
if _, err := s.kbDAO.GetByIDAndTenantID(datasetID, userID); err != nil {
if dao.IsNotFoundErr(err) {
return nil, common.CodeDataError, errors.New("You don't own the dataset.")
}
return nil, common.CodeServerError, errors.New("Database operation failed")
}
doc, err := s.documentDAO.GetByDocumentIDAndDatasetID(documentID, datasetID)
if err != nil {
if dao.IsNotFoundErr(err) {
return nil, common.CodeDataError, fmt.Errorf("Document %s not found in dataset %s", documentID, datasetID)
}
return nil, common.CodeServerError, err
}
metadata, ok := req["metadata"]
if !ok {
return nil, common.CodeArgumentError, errors.New("metadata is required")
}
parserConfig := doc.ParserConfig
if parserConfig == nil {
parserConfig = entity.JSONMap{}
}
parserConfig["metadata"] = metadata
if err := s.documentDAO.UpdateByID(doc.ID, map[string]interface{}{"parser_config": parserConfig}); err != nil {
return nil, common.CodeExceptionError, err
}
updatedDoc, err := s.documentDAO.GetByID(doc.ID)
if err != nil {
if dao.IsNotFoundErr(err) {
return nil, common.CodeDataError, errors.New("Document not found!")
}
return nil, common.CodeExceptionError, err
}
return updatedDoc, common.CodeSuccess, nil
}
// SearchDatasetsRequest is the request structure for searching chunks across datasets.
type SearchDatasetsRequest struct {
DatasetIDs []string `json:"dataset_ids" binding:"required"`

View File

@@ -0,0 +1,158 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package service
import (
"testing"
"ragflow/internal/common"
"ragflow/internal/dao"
"ragflow/internal/entity"
)
func testDatasetServiceForDocumentMetadataConfig(t *testing.T) *DatasetService {
t.Helper()
return &DatasetService{
kbDAO: dao.NewKnowledgebaseDAO(),
documentDAO: dao.NewDocumentDAO(),
}
}
func insertDatasetMetadataConfigKB(t *testing.T, datasetID, tenantID string) {
t.Helper()
kb := &entity.Knowledgebase{
ID: datasetID,
TenantID: tenantID,
Name: "test-kb",
EmbdID: "embedding@OpenAI",
CreatedBy: tenantID,
Permission: string(entity.TenantPermissionMe),
ParserID: "naive",
ParserConfig: entity.JSONMap{},
Status: sptr("1"),
}
if err := dao.DB.Create(kb).Error; err != nil {
t.Fatalf("insert test kb: %v", err)
}
}
func insertDatasetMetadataConfigDoc(t *testing.T, docID, datasetID string, parserConfig entity.JSONMap) {
t.Helper()
doc := &entity.Document{
ID: docID,
KbID: datasetID,
ParserID: "naive",
ParserConfig: parserConfig,
SourceType: "local",
Type: "pdf",
CreatedBy: "user-1",
Suffix: ".pdf",
Status: sptr("1"),
}
if err := dao.DB.Create(doc).Error; err != nil {
t.Fatalf("insert test doc: %v", err)
}
}
func TestDatasetServiceUpdateDocumentMetadataConfig(t *testing.T) {
db := setupServiceTestDB(t)
pushServiceDB(t, db)
insertDatasetMetadataConfigKB(t, "kb-1", "user-1")
insertDatasetMetadataConfigDoc(t, "doc-1", "kb-1", entity.JSONMap{"pages": []interface{}{1, 2}})
metadata := map[string]interface{}{"author": "Alice", "year": float64(2026)}
doc, code, err := testDatasetServiceForDocumentMetadataConfig(t).UpdateDocumentMetadataConfig(
"user-1",
"kb-1",
"doc-1",
map[string]interface{}{"metadata": metadata},
)
if err != nil {
t.Fatalf("UpdateDocumentMetadataConfig failed: %v", err)
}
if code != common.CodeSuccess {
t.Fatalf("expected success code, got %d", code)
}
if doc == nil {
t.Fatal("expected updated document")
}
if doc.ParserConfig["pages"] == nil {
t.Fatalf("existing parser_config fields should be preserved: %#v", doc.ParserConfig)
}
updatedMetadata, ok := doc.ParserConfig["metadata"].(map[string]interface{})
if !ok {
t.Fatalf("expected metadata map, got %#v", doc.ParserConfig["metadata"])
}
if updatedMetadata["author"] != "Alice" || updatedMetadata["year"] != float64(2026) {
t.Fatalf("unexpected metadata: %#v", updatedMetadata)
}
persisted, err := dao.NewDocumentDAO().GetByID("doc-1")
if err != nil {
t.Fatalf("failed to fetch persisted document: %v", err)
}
if persisted.ParserConfig["metadata"] == nil {
t.Fatalf("metadata was not persisted: %#v", persisted.ParserConfig)
}
}
func TestDatasetServiceUpdateDocumentMetadataConfigRequiresMetadata(t *testing.T) {
db := setupServiceTestDB(t)
pushServiceDB(t, db)
insertDatasetMetadataConfigKB(t, "kb-1", "user-1")
insertDatasetMetadataConfigDoc(t, "doc-1", "kb-1", entity.JSONMap{})
_, code, err := testDatasetServiceForDocumentMetadataConfig(t).UpdateDocumentMetadataConfig(
"user-1",
"kb-1",
"doc-1",
map[string]interface{}{},
)
if err == nil {
t.Fatal("expected metadata required error")
}
if code != common.CodeArgumentError {
t.Fatalf("expected argument error code, got %d", code)
}
if err.Error() != "metadata is required" {
t.Fatalf("unexpected error: %v", err)
}
}
func TestDatasetServiceUpdateDocumentMetadataConfigRejectsNonOwner(t *testing.T) {
db := setupServiceTestDB(t)
pushServiceDB(t, db)
insertDatasetMetadataConfigKB(t, "kb-1", "owner-1")
insertDatasetMetadataConfigDoc(t, "doc-1", "kb-1", entity.JSONMap{})
_, code, err := testDatasetServiceForDocumentMetadataConfig(t).UpdateDocumentMetadataConfig(
"user-1",
"kb-1",
"doc-1",
map[string]interface{}{"metadata": map[string]interface{}{"author": "Alice"}},
)
if err == nil {
t.Fatal("expected ownership error")
}
if code != common.CodeDataError {
t.Fatalf("expected data error code, got %d", code)
}
if err.Error() != "You don't own the dataset." {
t.Fatalf("unexpected error: %v", err)
}
}