mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
feat(go-api): Align document metadata batch APIs and upload_info with Python (#16269)
## Summary
Align the Go implementations of these APIs with the Python behavior:
- `POST /api/v1/datasets/:dataset_id/metadata/update`
- `PATCH /api/v1/datasets/:dataset_id/documents/metadatas`
- `POST /api/v1/documents/upload`
## What changed
- Added the Go routes and handlers for the 3 APIs.
- Aligned batch document metadata updates with Python semantics:
- support `match` in update items
- support list append / replace behavior
- support deleting specific list values
- remove metadata entirely when it becomes empty
- create metadata for documents that previously had none when updates
apply
- count `updated` only when a document actually changes
- Aligned `documents/upload` file uploads with Python-style
`upload_info` behavior:
- store upload-info blobs in the per-user downloads bucket
- return lightweight upload descriptors instead of normal
file-management responses
- Improved URL upload behavior:
- SSRF-guarded fetch with redirect validation
- redirect limit aligned to Python behavior
- normalize filename and MIME type
- add `.pdf` when the fetched content is PDF
- normalize HTML content into readable text instead of storing raw HTML
shells
## Validation
### Unit tests
Passed:
- `go test ./internal/service`
- `go test ./internal/handler`
Also verified targeted cases for:
- batch metadata update semantics
- upload_info URL handling
- upload_info download bucket behavior
### curl checks
Verified the new Go endpoints with `curl` and compared the response
shape and behavior with Python for:
- `POST /api/v1/datasets/{dataset_id}/metadata/update`
- `PATCH /api/v1/datasets/{dataset_id}/documents/metadatas`
- `POST /api/v1/documents/upload`
The Go responses were checked against Python for:
- argument validation
- success response shape
- metadata update results
- upload_info result structure
- file vs URL input handling
This commit is contained in:
@@ -21,6 +21,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"mime"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"path/filepath"
|
||||
"ragflow/internal/common"
|
||||
@@ -60,6 +61,9 @@ type documentServiceIface interface {
|
||||
GetDocumentPreview(docID string) (*service.DocumentPreview, error)
|
||||
DownloadDocument(datasetID, docID string) (*service.DownloadDocumentResp, error)
|
||||
UpdateDatasetDocument(userID, datasetID, documentID string, req *service.UpdateDatasetDocumentRequest, present map[string]bool) (*service.UpdateDatasetDocumentResponse, common.ErrorCode, error)
|
||||
BatchUpdateDocumentMetadatas(datasetID string, selector *service.DocumentMetadataSelector, updates []service.DocumentMetadataUpdate, deletes []service.DocumentMetadataDelete) (*service.BatchUpdateDocumentMetadatasResponse, common.ErrorCode, error)
|
||||
UploadDocumentInfos(userID string, files []*multipart.FileHeader) ([]map[string]interface{}, common.ErrorCode, error)
|
||||
UploadDocumentInfoByURL(userID, rawURL string) (map[string]interface{}, common.ErrorCode, error)
|
||||
ListIngestionTasks(userID string, datasetID *string, page, pageSize int) ([]*entity.IngestionTask, error)
|
||||
IngestDocuments(datasetID, userID string, docIDs []string) ([]*service.ParseDocumentResponse, error)
|
||||
StopIngestionTasks(tasks []string, userID string) ([]*entity.IngestionTask, error)
|
||||
@@ -1296,3 +1300,123 @@ func (h *DocumentHandler) UpdateDatasetDocument(c *gin.Context) {
|
||||
"data": data,
|
||||
})
|
||||
}
|
||||
|
||||
func (h *DocumentHandler) UploadInfo(c *gin.Context) {
|
||||
user, errorCode, errorMessage := GetUser(c)
|
||||
if errorCode != common.CodeSuccess {
|
||||
jsonError(c, errorCode, errorMessage)
|
||||
return
|
||||
}
|
||||
|
||||
form, err := c.MultipartForm()
|
||||
if err != nil && !strings.Contains(err.Error(), "request Content-Type isn't multipart/form-data") {
|
||||
jsonError(c, common.CodeArgumentError, "Failed to parse multipart form: "+err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
var fileHeaders []*multipart.FileHeader
|
||||
if form != nil && form.File != nil {
|
||||
fileHeaders = form.File["file"]
|
||||
}
|
||||
rawURL := strings.TrimSpace(c.Query("url"))
|
||||
|
||||
if len(fileHeaders) > 0 && rawURL != "" {
|
||||
jsonError(c, common.CodeArgumentError, "Provide either multipart file(s) or ?url=..., not both.")
|
||||
return
|
||||
}
|
||||
if len(fileHeaders) == 0 && rawURL == "" {
|
||||
jsonError(c, common.CodeArgumentError, "Missing input: provide multipart file(s) or url")
|
||||
return
|
||||
}
|
||||
|
||||
if rawURL != "" {
|
||||
data, code, err := h.documentService.UploadDocumentInfoByURL(user.ID, rawURL)
|
||||
if err != nil {
|
||||
jsonError(c, code, err.Error())
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"code": common.CodeSuccess,
|
||||
"data": data,
|
||||
"message": "success",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
data, code, err := h.documentService.UploadDocumentInfos(user.ID, fileHeaders)
|
||||
if err != nil {
|
||||
jsonError(c, code, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
var payload interface{}
|
||||
if len(data) == 1 {
|
||||
payload = data[0]
|
||||
} else {
|
||||
payload = data
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"code": common.CodeSuccess,
|
||||
"data": payload,
|
||||
"message": "success",
|
||||
})
|
||||
}
|
||||
|
||||
type documentMetadataBatchRequest struct {
|
||||
Selector *service.DocumentMetadataSelector `json:"selector"`
|
||||
Updates []service.DocumentMetadataUpdate `json:"updates"`
|
||||
Deletes []service.DocumentMetadataDelete `json:"deletes"`
|
||||
}
|
||||
|
||||
func (h *DocumentHandler) MetadataBatchUpdate(c *gin.Context) {
|
||||
h.handleBatchUpdateDocumentMetadatas(c)
|
||||
}
|
||||
|
||||
func (h *DocumentHandler) UpdateDocumentMetadatas(c *gin.Context) {
|
||||
h.handleBatchUpdateDocumentMetadatas(c)
|
||||
}
|
||||
|
||||
func (h *DocumentHandler) handleBatchUpdateDocumentMetadatas(c *gin.Context) {
|
||||
user, errorCode, errorMessage := GetUser(c)
|
||||
if errorCode != common.CodeSuccess {
|
||||
jsonError(c, errorCode, errorMessage)
|
||||
return
|
||||
}
|
||||
|
||||
datasetID := strings.TrimSpace(c.Param("dataset_id"))
|
||||
if datasetID == "" {
|
||||
jsonError(c, common.CodeArgumentError, "dataset_id is required")
|
||||
return
|
||||
}
|
||||
if !h.datasetService.Accessible(datasetID, user.ID) {
|
||||
jsonError(c, common.CodeDataError, "You don't own the dataset "+datasetID+".")
|
||||
return
|
||||
}
|
||||
|
||||
var req documentMetadataBatchRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
jsonError(c, common.CodeDataError, err.Error())
|
||||
return
|
||||
}
|
||||
if req.Selector == nil {
|
||||
req.Selector = &service.DocumentMetadataSelector{}
|
||||
}
|
||||
if req.Updates == nil {
|
||||
req.Updates = []service.DocumentMetadataUpdate{}
|
||||
}
|
||||
if req.Deletes == nil {
|
||||
req.Deletes = []service.DocumentMetadataDelete{}
|
||||
}
|
||||
|
||||
resp, code, err := h.documentService.BatchUpdateDocumentMetadatas(datasetID, req.Selector, req.Updates, req.Deletes)
|
||||
if err != nil {
|
||||
jsonError(c, code, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"code": common.CodeSuccess,
|
||||
"data": resp,
|
||||
"message": "success",
|
||||
})
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ package handler
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
@@ -49,6 +50,15 @@ type fakeDocumentService struct {
|
||||
func (f *fakeDocumentService) UpdateDatasetDocument(userID, datasetID, documentID string, req *service.UpdateDatasetDocumentRequest, present map[string]bool) (*service.UpdateDatasetDocumentResponse, common.ErrorCode, error) {
|
||||
return nil, common.CodeSuccess, nil
|
||||
}
|
||||
func (f *fakeDocumentService) BatchUpdateDocumentMetadatas(datasetID string, selector *service.DocumentMetadataSelector, updates []service.DocumentMetadataUpdate, deletes []service.DocumentMetadataDelete) (*service.BatchUpdateDocumentMetadatasResponse, common.ErrorCode, error) {
|
||||
return nil, common.CodeSuccess, nil
|
||||
}
|
||||
func (f *fakeDocumentService) UploadDocumentInfos(userID string, files []*multipart.FileHeader) ([]map[string]interface{}, common.ErrorCode, error) {
|
||||
return nil, common.CodeSuccess, nil
|
||||
}
|
||||
func (f *fakeDocumentService) UploadDocumentInfoByURL(userID, rawURL string) (map[string]interface{}, common.ErrorCode, error) {
|
||||
return nil, common.CodeSuccess, nil
|
||||
}
|
||||
|
||||
func (f *fakeDocumentService) GetDocumentArtifact(filename string) (*service.ArtifactResponse, error) {
|
||||
if filename == "error.txt" {
|
||||
|
||||
Reference in New Issue
Block a user