mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
feat(go-api): Add GET dataset metadata summary API (#15843)
## What
Adds the RESTful dataset metadata summary endpoint:
`GET /api/v1/datasets/{dataset_id}/metadata/summary`
The endpoint supports optional document filtering through:
`?doc_ids=doc_id_1,doc_id_2`
This commit is contained in:
@@ -986,3 +986,47 @@ func (h *DocumentHandler) StopParseDocuments(c *gin.Context) {
|
||||
"data": result,
|
||||
})
|
||||
}
|
||||
|
||||
func (h *DocumentHandler) MetadataSummaryByDataset(c *gin.Context) {
|
||||
user, errorCode, errorMessage := GetUser(c)
|
||||
if errorCode != common.CodeSuccess {
|
||||
jsonError(c, errorCode, errorMessage)
|
||||
return
|
||||
}
|
||||
|
||||
datasetID := c.Param("dataset_id")
|
||||
if datasetID == "" {
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"code": common.CodeServerError,
|
||||
"message": "dataset_id is required",
|
||||
})
|
||||
return
|
||||
}
|
||||
if !h.datasetService.Accessible(datasetID, user.ID) {
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"code": common.CodeServerError,
|
||||
"message": "You don't own the dataset " + datasetID,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
var docIDS []string
|
||||
if docIDsParam := c.Query("doc_ids"); docIDsParam != "" {
|
||||
docIDS = strings.Split(docIDsParam, ",")
|
||||
}
|
||||
|
||||
summary, err := h.documentService.GetMetadataSummary(datasetID, docIDS)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{
|
||||
"code": common.CodeServerError,
|
||||
"message": "Failed to get metadata summary" + err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"code": 0,
|
||||
"message": "success",
|
||||
"data": gin.H{"summary": summary},
|
||||
})
|
||||
}
|
||||
|
||||
@@ -24,8 +24,8 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/glebarez/sqlite"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/glebarez/sqlite"
|
||||
"gorm.io/gorm"
|
||||
|
||||
"ragflow/internal/common"
|
||||
@@ -36,10 +36,14 @@ import (
|
||||
|
||||
// fakeDocumentService implements documentServiceIface for handler tests.
|
||||
type fakeDocumentService struct {
|
||||
deleted int
|
||||
err error
|
||||
stopResult map[string]interface{}
|
||||
stopErr error
|
||||
deleted int
|
||||
err error
|
||||
stopResult map[string]interface{}
|
||||
stopErr error
|
||||
metadataSummary map[string]interface{}
|
||||
metadataErr error
|
||||
metadataKBID string
|
||||
metadataDocIDs []string
|
||||
}
|
||||
|
||||
func (f *fakeDocumentService) GetDocumentArtifact(filename string) (*service.ArtifactResponse, error) {
|
||||
@@ -113,7 +117,9 @@ func (f *fakeDocumentService) GetDocumentsByAuthorID(authorID, page, pageSize in
|
||||
return nil, 0, nil
|
||||
}
|
||||
func (f *fakeDocumentService) GetMetadataSummary(kbID string, docIDs []string) (map[string]interface{}, error) {
|
||||
return nil, nil
|
||||
f.metadataKBID = kbID
|
||||
f.metadataDocIDs = docIDs
|
||||
return f.metadataSummary, f.metadataErr
|
||||
}
|
||||
func (f *fakeDocumentService) SetDocumentMetadata(docID string, meta map[string]interface{}) error {
|
||||
return nil
|
||||
@@ -368,7 +374,7 @@ func setupHandlerAccessDB(t *testing.T) *gorm.DB {
|
||||
db.Create(&entity.Knowledgebase{
|
||||
ID: "ds-1", TenantID: "tenant-1", Name: "test-kb", EmbdID: "embd-1",
|
||||
CreatedBy: "user-1", Permission: string(entity.TenantPermissionTeam),
|
||||
Status: sptr(string(entity.StatusValid)),
|
||||
Status: sptr(string(entity.StatusValid)),
|
||||
})
|
||||
|
||||
return db
|
||||
@@ -477,6 +483,59 @@ func TestStopParseDocumentsHandler_NotAccessible(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetadataSummaryByDataset_Success(t *testing.T) {
|
||||
db := setupHandlerAccessDB(t)
|
||||
orig := dao.DB
|
||||
dao.DB = db
|
||||
t.Cleanup(func() { dao.DB = orig })
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
fake := &fakeDocumentService{
|
||||
metadataSummary: map[string]interface{}{
|
||||
"author": map[string]interface{}{
|
||||
"type": "string",
|
||||
"values": []interface{}{
|
||||
[]interface{}{"alice", 2},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
h := &DocumentHandler{
|
||||
documentService: fake,
|
||||
datasetService: service.NewDatasetService(),
|
||||
}
|
||||
|
||||
c, w := setupGinContextWithUser("GET", "/api/v1/datasets/ds-1/metadata/summary?doc_ids=doc-1,doc-2", "")
|
||||
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
|
||||
|
||||
h.MetadataSummaryByDataset(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if fake.metadataKBID != "ds-1" {
|
||||
t.Fatalf("expected kbID ds-1, got %q", fake.metadataKBID)
|
||||
}
|
||||
if len(fake.metadataDocIDs) != 2 || fake.metadataDocIDs[0] != "doc-1" || fake.metadataDocIDs[1] != "doc-2" {
|
||||
t.Fatalf("unexpected docIDs: %#v", fake.metadataDocIDs)
|
||||
}
|
||||
|
||||
var resp map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal response: %v", err)
|
||||
}
|
||||
if resp["code"] != float64(common.CodeSuccess) {
|
||||
t.Fatalf("expected code 0, got %v: %v", resp["code"], resp)
|
||||
}
|
||||
data := resp["data"].(map[string]interface{})
|
||||
summary := data["summary"].(map[string]interface{})
|
||||
author := summary["author"].(map[string]interface{})
|
||||
if author["type"] != "string" {
|
||||
t.Fatalf("expected author type string, got %v", author["type"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDocumentArtifact_Success(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
h := &DocumentHandler{
|
||||
|
||||
@@ -244,6 +244,7 @@ func (r *Router) Setup(engine *gin.Engine) {
|
||||
datasets.DELETE("", r.datasetsHandler.DeleteDatasets)
|
||||
datasets.POST("/search", r.datasetsHandler.SearchDatasets)
|
||||
datasets.GET("/metadata/flattened", r.datasetsHandler.ListMetadataFlattened)
|
||||
datasets.GET("/:dataset_id/metadata/summary", r.documentHandler.MetadataSummaryByDataset)
|
||||
|
||||
// Dataset ingestion logs
|
||||
datasets.GET("/:dataset_id/ingestions/summary", r.datasetsHandler.GetIngestionSummary)
|
||||
|
||||
Reference in New Issue
Block a user