feat(go-api): Add GET dataset metadata summary API (#15843)

## What

Adds the RESTful dataset metadata summary endpoint:

`GET /api/v1/datasets/{dataset_id}/metadata/summary`

The endpoint supports optional document filtering through:

`?doc_ids=doc_id_1,doc_id_2`
This commit is contained in:
Hz_
2026-06-09 19:27:47 +08:00
committed by GitHub
parent e050f1816e
commit d4fe3bb148
3 changed files with 111 additions and 7 deletions

View File

@@ -986,3 +986,47 @@ func (h *DocumentHandler) StopParseDocuments(c *gin.Context) {
"data": result,
})
}
func (h *DocumentHandler) MetadataSummaryByDataset(c *gin.Context) {
user, errorCode, errorMessage := GetUser(c)
if errorCode != common.CodeSuccess {
jsonError(c, errorCode, errorMessage)
return
}
datasetID := c.Param("dataset_id")
if datasetID == "" {
c.JSON(http.StatusOK, gin.H{
"code": common.CodeServerError,
"message": "dataset_id is required",
})
return
}
if !h.datasetService.Accessible(datasetID, user.ID) {
c.JSON(http.StatusOK, gin.H{
"code": common.CodeServerError,
"message": "You don't own the dataset " + datasetID,
})
return
}
var docIDS []string
if docIDsParam := c.Query("doc_ids"); docIDsParam != "" {
docIDS = strings.Split(docIDsParam, ",")
}
summary, err := h.documentService.GetMetadataSummary(datasetID, docIDS)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{
"code": common.CodeServerError,
"message": "Failed to get metadata summary" + err.Error(),
})
return
}
c.JSON(http.StatusOK, gin.H{
"code": 0,
"message": "success",
"data": gin.H{"summary": summary},
})
}

View File

@@ -24,8 +24,8 @@ import (
"strings"
"testing"
"github.com/glebarez/sqlite"
"github.com/gin-gonic/gin"
"github.com/glebarez/sqlite"
"gorm.io/gorm"
"ragflow/internal/common"
@@ -36,10 +36,14 @@ import (
// fakeDocumentService implements documentServiceIface for handler tests.
type fakeDocumentService struct {
deleted int
err error
stopResult map[string]interface{}
stopErr error
deleted int
err error
stopResult map[string]interface{}
stopErr error
metadataSummary map[string]interface{}
metadataErr error
metadataKBID string
metadataDocIDs []string
}
func (f *fakeDocumentService) GetDocumentArtifact(filename string) (*service.ArtifactResponse, error) {
@@ -113,7 +117,9 @@ func (f *fakeDocumentService) GetDocumentsByAuthorID(authorID, page, pageSize in
return nil, 0, nil
}
func (f *fakeDocumentService) GetMetadataSummary(kbID string, docIDs []string) (map[string]interface{}, error) {
return nil, nil
f.metadataKBID = kbID
f.metadataDocIDs = docIDs
return f.metadataSummary, f.metadataErr
}
func (f *fakeDocumentService) SetDocumentMetadata(docID string, meta map[string]interface{}) error {
return nil
@@ -368,7 +374,7 @@ func setupHandlerAccessDB(t *testing.T) *gorm.DB {
db.Create(&entity.Knowledgebase{
ID: "ds-1", TenantID: "tenant-1", Name: "test-kb", EmbdID: "embd-1",
CreatedBy: "user-1", Permission: string(entity.TenantPermissionTeam),
Status: sptr(string(entity.StatusValid)),
Status: sptr(string(entity.StatusValid)),
})
return db
@@ -477,6 +483,59 @@ func TestStopParseDocumentsHandler_NotAccessible(t *testing.T) {
}
}
func TestMetadataSummaryByDataset_Success(t *testing.T) {
db := setupHandlerAccessDB(t)
orig := dao.DB
dao.DB = db
t.Cleanup(func() { dao.DB = orig })
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{
metadataSummary: map[string]interface{}{
"author": map[string]interface{}{
"type": "string",
"values": []interface{}{
[]interface{}{"alice", 2},
},
},
},
}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("GET", "/api/v1/datasets/ds-1/metadata/summary?doc_ids=doc-1,doc-2", "")
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.MetadataSummaryByDataset(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
if fake.metadataKBID != "ds-1" {
t.Fatalf("expected kbID ds-1, got %q", fake.metadataKBID)
}
if len(fake.metadataDocIDs) != 2 || fake.metadataDocIDs[0] != "doc-1" || fake.metadataDocIDs[1] != "doc-2" {
t.Fatalf("unexpected docIDs: %#v", fake.metadataDocIDs)
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("failed to unmarshal response: %v", err)
}
if resp["code"] != float64(common.CodeSuccess) {
t.Fatalf("expected code 0, got %v: %v", resp["code"], resp)
}
data := resp["data"].(map[string]interface{})
summary := data["summary"].(map[string]interface{})
author := summary["author"].(map[string]interface{})
if author["type"] != "string" {
t.Fatalf("expected author type string, got %v", author["type"])
}
}
func TestGetDocumentArtifact_Success(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{

View File

@@ -244,6 +244,7 @@ func (r *Router) Setup(engine *gin.Engine) {
datasets.DELETE("", r.datasetsHandler.DeleteDatasets)
datasets.POST("/search", r.datasetsHandler.SearchDatasets)
datasets.GET("/metadata/flattened", r.datasetsHandler.ListMetadataFlattened)
datasets.GET("/:dataset_id/metadata/summary", r.documentHandler.MetadataSummaryByDataset)
// Dataset ingestion logs
datasets.GET("/:dataset_id/ingestions/summary", r.datasetsHandler.GetIngestionSummary)