From d4fe3bb1483ff1b61765d843aa6a0197bb51109e Mon Sep 17 00:00:00 2001 From: Hz_ Date: Tue, 9 Jun 2026 19:27:47 +0800 Subject: [PATCH] feat(go-api): Add GET dataset metadata summary API (#15843) ## What Adds the RESTful dataset metadata summary endpoint: `GET /api/v1/datasets/{dataset_id}/metadata/summary` The endpoint supports optional document filtering through: `?doc_ids=doc_id_1,doc_id_2` --- internal/handler/document.go | 44 +++++++++++++++++++ internal/handler/document_test.go | 73 ++++++++++++++++++++++++++++--- internal/router/router.go | 1 + 3 files changed, 111 insertions(+), 7 deletions(-) diff --git a/internal/handler/document.go b/internal/handler/document.go index 87f5369cb1..dcbcbf5670 100644 --- a/internal/handler/document.go +++ b/internal/handler/document.go @@ -986,3 +986,47 @@ func (h *DocumentHandler) StopParseDocuments(c *gin.Context) { "data": result, }) } + +func (h *DocumentHandler) MetadataSummaryByDataset(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + datasetID := c.Param("dataset_id") + if datasetID == "" { + c.JSON(http.StatusOK, gin.H{ + "code": common.CodeServerError, + "message": "dataset_id is required", + }) + return + } + if !h.datasetService.Accessible(datasetID, user.ID) { + c.JSON(http.StatusOK, gin.H{ + "code": common.CodeServerError, + "message": "You don't own the dataset " + datasetID, + }) + return + } + + var docIDS []string + if docIDsParam := c.Query("doc_ids"); docIDsParam != "" { + docIDS = strings.Split(docIDsParam, ",") + } + + summary, err := h.documentService.GetMetadataSummary(datasetID, docIDS) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "code": common.CodeServerError, + "message": "Failed to get metadata summary" + err.Error(), + }) + return + } + + c.JSON(http.StatusOK, gin.H{ + "code": 0, + "message": "success", + "data": gin.H{"summary": summary}, + }) +} diff --git a/internal/handler/document_test.go b/internal/handler/document_test.go index ed59faf4cc..5ac370f6f5 100644 --- a/internal/handler/document_test.go +++ b/internal/handler/document_test.go @@ -24,8 +24,8 @@ import ( "strings" "testing" - "github.com/glebarez/sqlite" "github.com/gin-gonic/gin" + "github.com/glebarez/sqlite" "gorm.io/gorm" "ragflow/internal/common" @@ -36,10 +36,14 @@ import ( // fakeDocumentService implements documentServiceIface for handler tests. type fakeDocumentService struct { - deleted int - err error - stopResult map[string]interface{} - stopErr error + deleted int + err error + stopResult map[string]interface{} + stopErr error + metadataSummary map[string]interface{} + metadataErr error + metadataKBID string + metadataDocIDs []string } func (f *fakeDocumentService) GetDocumentArtifact(filename string) (*service.ArtifactResponse, error) { @@ -113,7 +117,9 @@ func (f *fakeDocumentService) GetDocumentsByAuthorID(authorID, page, pageSize in return nil, 0, nil } func (f *fakeDocumentService) GetMetadataSummary(kbID string, docIDs []string) (map[string]interface{}, error) { - return nil, nil + f.metadataKBID = kbID + f.metadataDocIDs = docIDs + return f.metadataSummary, f.metadataErr } func (f *fakeDocumentService) SetDocumentMetadata(docID string, meta map[string]interface{}) error { return nil @@ -368,7 +374,7 @@ func setupHandlerAccessDB(t *testing.T) *gorm.DB { db.Create(&entity.Knowledgebase{ ID: "ds-1", TenantID: "tenant-1", Name: "test-kb", EmbdID: "embd-1", CreatedBy: "user-1", Permission: string(entity.TenantPermissionTeam), - Status: sptr(string(entity.StatusValid)), + Status: sptr(string(entity.StatusValid)), }) return db @@ -477,6 +483,59 @@ func TestStopParseDocumentsHandler_NotAccessible(t *testing.T) { } } +func TestMetadataSummaryByDataset_Success(t *testing.T) { + db := setupHandlerAccessDB(t) + orig := dao.DB + dao.DB = db + t.Cleanup(func() { dao.DB = orig }) + + gin.SetMode(gin.TestMode) + + fake := &fakeDocumentService{ + metadataSummary: map[string]interface{}{ + "author": map[string]interface{}{ + "type": "string", + "values": []interface{}{ + []interface{}{"alice", 2}, + }, + }, + }, + } + h := &DocumentHandler{ + documentService: fake, + datasetService: service.NewDatasetService(), + } + + c, w := setupGinContextWithUser("GET", "/api/v1/datasets/ds-1/metadata/summary?doc_ids=doc-1,doc-2", "") + c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}} + + h.MetadataSummaryByDataset(c) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String()) + } + if fake.metadataKBID != "ds-1" { + t.Fatalf("expected kbID ds-1, got %q", fake.metadataKBID) + } + if len(fake.metadataDocIDs) != 2 || fake.metadataDocIDs[0] != "doc-1" || fake.metadataDocIDs[1] != "doc-2" { + t.Fatalf("unexpected docIDs: %#v", fake.metadataDocIDs) + } + + var resp map[string]interface{} + if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil { + t.Fatalf("failed to unmarshal response: %v", err) + } + if resp["code"] != float64(common.CodeSuccess) { + t.Fatalf("expected code 0, got %v: %v", resp["code"], resp) + } + data := resp["data"].(map[string]interface{}) + summary := data["summary"].(map[string]interface{}) + author := summary["author"].(map[string]interface{}) + if author["type"] != "string" { + t.Fatalf("expected author type string, got %v", author["type"]) + } +} + func TestGetDocumentArtifact_Success(t *testing.T) { gin.SetMode(gin.TestMode) h := &DocumentHandler{ diff --git a/internal/router/router.go b/internal/router/router.go index dad4d8c3bb..9910b540d2 100644 --- a/internal/router/router.go +++ b/internal/router/router.go @@ -244,6 +244,7 @@ func (r *Router) Setup(engine *gin.Engine) { datasets.DELETE("", r.datasetsHandler.DeleteDatasets) datasets.POST("/search", r.datasetsHandler.SearchDatasets) datasets.GET("/metadata/flattened", r.datasetsHandler.ListMetadataFlattened) + datasets.GET("/:dataset_id/metadata/summary", r.documentHandler.MetadataSummaryByDataset) // Dataset ingestion logs datasets.GET("/:dataset_id/ingestions/summary", r.datasetsHandler.GetIngestionSummary)