mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-05 10:58:34 +08:00
fix(go-api): sync document handler interface and enforce preview acce… (#15688)
### Description This PR syncs the `documentServiceIface` interface and introduces handler methods for document preview, artifact fetching, and downloading in the Go API. It also ensures that strict dataset alignment and access checks are enforced when retrieving or downloading documents. Furthermore, this PR introduces comprehensive unit tests for both the newly added Handler and Service methods to ensure robustness and prevent future regressions. ### Key Changes * **Router & Handler Integration**: * Added and wired new API endpoints in `internal/router/router.go`. * Synchronized the `documentServiceIface` with `GetDocumentArtifact`, `GetDocumentPreview`, and `DownloadDocument`. * Implemented handlers for these endpoints in `internal/handler/document.go`. * **Access & Validation Enforcement**: * Refactored `internal/service/document.go` to strictly check if a document belongs to the requested dataset before allowing downloads or previews. * Added robust artifact file sanitization (`sanitizeArtifactFilename`) and attachment handling (`shouldForceArtifactAttachment`). * **Comprehensive Unit Testing**: * **Handler Layer (`internal/handler/document_test.go`)**: Added mock service implementations and Gin router tests covering success, not-found, and internal error states for all 3 new endpoints. * **Service Layer (`internal/service/document_test.go`)**: Added extensive business logic tests including dataset mismatch checks, non-existent document checks, and artifact file validation.
This commit is contained in:
@@ -25,6 +25,7 @@ import (
|
||||
"path/filepath"
|
||||
"ragflow/internal/common"
|
||||
"ragflow/internal/entity"
|
||||
"ragflow/internal/utility"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -55,6 +56,9 @@ type documentServiceIface interface {
|
||||
DeleteDocumentMetadata(docID string, keys []string) error
|
||||
DeleteDocumentAllMetadata(docID string) error
|
||||
GetDocumentMetadataByID(docID string) (map[string]interface{}, error)
|
||||
GetDocumentArtifact(filename string) (*service.ArtifactResponse, error)
|
||||
GetDocumentPreview(docID string) (*service.DocumentPreview, error)
|
||||
DownloadDocument(datasetID, docID string) (*service.DownloadDocumentResp, error)
|
||||
}
|
||||
|
||||
// DocumentHandler document handler
|
||||
@@ -198,6 +202,68 @@ func (h *DocumentHandler) GetDocumentImage(c *gin.Context) {
|
||||
c.Data(http.StatusOK, contentType, data)
|
||||
}
|
||||
|
||||
func (h *DocumentHandler) GetDocumentArtifact(c *gin.Context) {
|
||||
filename := c.Param("filename")
|
||||
artifact, err := h.documentService.GetDocumentArtifact(filename)
|
||||
if err != nil {
|
||||
switch {
|
||||
case errors.Is(err, service.ErrArtifactInvalidFilename),
|
||||
errors.Is(err, service.ErrArtifactInvalidFileType),
|
||||
errors.Is(err, service.ErrArtifactNotFound):
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"code": common.CodeDataError,
|
||||
"message": err.Error(),
|
||||
})
|
||||
default:
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"code": common.CodeExceptionError,
|
||||
"data": nil,
|
||||
"message": err.Error(),
|
||||
})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
c.Header("Content-Type", artifact.ContentType)
|
||||
if artifact.ForceAttachment {
|
||||
c.Header("X-Content-Type-Options", "nosniff")
|
||||
c.Header("Content-Disposition", "attachment")
|
||||
} else {
|
||||
c.Header("Content-Disposition", fmt.Sprintf(`inline; filename="%s"`, artifact.SafeFilename))
|
||||
}
|
||||
c.Data(http.StatusOK, artifact.ContentType, artifact.Data)
|
||||
}
|
||||
|
||||
func (h *DocumentHandler) GetDocumentPreview(c *gin.Context) {
|
||||
docID := c.Param("id")
|
||||
|
||||
if docID == "" {
|
||||
jsonError(c, common.CodeParamError, "id is required")
|
||||
return
|
||||
}
|
||||
|
||||
preview, err := h.documentService.GetDocumentPreview(docID)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"code": common.CodeDataError,
|
||||
"message": "Document not found!",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
ext := utility.GetFileExtension(preview.FileName)
|
||||
if preview.ContentType != "" {
|
||||
c.Header("Content-Type", preview.ContentType)
|
||||
}
|
||||
|
||||
if utility.ShouldForceAttachment(ext, preview.ContentType) {
|
||||
c.Header("X-Content-Type-Options", "nosniff")
|
||||
c.Header("Content-Disposition", "attachment")
|
||||
}
|
||||
|
||||
c.Data(http.StatusOK, preview.ContentType, preview.Data)
|
||||
}
|
||||
|
||||
// UpdateDocument update document
|
||||
// @Summary Update Document
|
||||
// @Description Update document info
|
||||
@@ -382,6 +448,40 @@ func (h *DocumentHandler) ListDocuments(c *gin.Context) {
|
||||
})
|
||||
}
|
||||
|
||||
func (h *DocumentHandler) DownloadDocument(c *gin.Context) {
|
||||
datasetID := c.Param("dataset_id")
|
||||
docID := c.Param("document_id")
|
||||
|
||||
if docID == "" {
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"code": common.CodeDataError,
|
||||
"message": "Specify document_id please.",
|
||||
})
|
||||
return
|
||||
}
|
||||
if datasetID == "" {
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"code": common.CodeDataError,
|
||||
"message": fmt.Sprintf("The dataset not own the document %s.", docID),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
res, err := h.documentService.DownloadDocument(datasetID, docID)
|
||||
|
||||
if err != nil {
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"code": common.CodeDataError,
|
||||
"message": err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
c.Header("Content-Type", res.ContentType)
|
||||
c.Header("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, res.FileName))
|
||||
c.Data(http.StatusOK, res.ContentType, res.Data)
|
||||
}
|
||||
|
||||
func mapDocumentListItem(doc *entity.DocumentListItem, metaFields map[string]interface{}) map[string]interface{} {
|
||||
item := map[string]interface{}{
|
||||
"id": doc.ID,
|
||||
|
||||
@@ -42,6 +42,40 @@ type fakeDocumentService struct {
|
||||
stopErr error
|
||||
}
|
||||
|
||||
func (f *fakeDocumentService) GetDocumentArtifact(filename string) (*service.ArtifactResponse, error) {
|
||||
if filename == "error.txt" {
|
||||
return nil, service.ErrArtifactNotFound
|
||||
}
|
||||
if filename == "unexpected.txt" {
|
||||
return nil, fmt.Errorf("unexpected error")
|
||||
}
|
||||
return &service.ArtifactResponse{
|
||||
Data: []byte("artifact content"),
|
||||
ContentType: "text/plain",
|
||||
SafeFilename: "safe.txt",
|
||||
ForceAttachment: false,
|
||||
}, nil
|
||||
}
|
||||
func (f *fakeDocumentService) GetDocumentPreview(docID string) (*service.DocumentPreview, error) {
|
||||
if docID == "not-found" {
|
||||
return nil, fmt.Errorf("not found")
|
||||
}
|
||||
return &service.DocumentPreview{
|
||||
Data: []byte("preview content"),
|
||||
ContentType: "text/plain",
|
||||
FileName: "preview.txt",
|
||||
}, nil
|
||||
}
|
||||
func (f *fakeDocumentService) DownloadDocument(datasetID, docID string) (*service.DownloadDocumentResp, error) {
|
||||
if docID == "not-found" {
|
||||
return nil, fmt.Errorf("not found")
|
||||
}
|
||||
return &service.DownloadDocumentResp{
|
||||
Data: []byte("document data"),
|
||||
ContentType: "application/pdf",
|
||||
FileName: "doc.pdf",
|
||||
}, nil
|
||||
}
|
||||
func (f *fakeDocumentService) CreateDocument(req *service.CreateDocumentRequest) (*entity.Document, error) {
|
||||
return nil, nil
|
||||
}
|
||||
@@ -442,3 +476,146 @@ func TestStopParseDocumentsHandler_NotAccessible(t *testing.T) {
|
||||
t.Fatal("expected error for no authorization")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDocumentArtifact_Success(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
h := &DocumentHandler{
|
||||
documentService: &fakeDocumentService{},
|
||||
}
|
||||
c, w := setupGinContextWithUser("GET", "/api/v1/documents/artifact/test.txt", "")
|
||||
c.Params = gin.Params{{Key: "filename", Value: "test.txt"}}
|
||||
|
||||
h.GetDocumentArtifact(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
if w.Header().Get("Content-Type") != "text/plain" {
|
||||
t.Fatalf("unexpected content type: %s", w.Header().Get("Content-Type"))
|
||||
}
|
||||
if w.Body.String() != "artifact content" {
|
||||
t.Fatalf("unexpected body: %s", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDocumentArtifact_NotFound(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
h := &DocumentHandler{
|
||||
documentService: &fakeDocumentService{},
|
||||
}
|
||||
c, w := setupGinContextWithUser("GET", "/api/v1/documents/artifact/error.txt", "")
|
||||
c.Params = gin.Params{{Key: "filename", Value: "error.txt"}}
|
||||
|
||||
h.GetDocumentArtifact(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if resp["code"] != float64(common.CodeDataError) {
|
||||
t.Fatalf("expected code %d, got %v", common.CodeDataError, resp["code"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDocumentArtifact_UnexpectedError(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
h := &DocumentHandler{
|
||||
documentService: &fakeDocumentService{},
|
||||
}
|
||||
c, w := setupGinContextWithUser("GET", "/api/v1/documents/artifact/unexpected.txt", "")
|
||||
c.Params = gin.Params{{Key: "filename", Value: "unexpected.txt"}}
|
||||
|
||||
h.GetDocumentArtifact(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if resp["code"] != float64(common.CodeExceptionError) {
|
||||
t.Fatalf("expected code %d, got %v", common.CodeExceptionError, resp["code"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDocumentPreview_Success(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
h := &DocumentHandler{
|
||||
documentService: &fakeDocumentService{},
|
||||
}
|
||||
c, w := setupGinContextWithUser("GET", "/api/v1/documents/doc-1/preview", "")
|
||||
c.Params = gin.Params{{Key: "id", Value: "doc-1"}}
|
||||
|
||||
h.GetDocumentPreview(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
if w.Header().Get("Content-Type") != "text/plain" {
|
||||
t.Fatalf("unexpected content type: %s", w.Header().Get("Content-Type"))
|
||||
}
|
||||
if w.Body.String() != "preview content" {
|
||||
t.Fatalf("unexpected body: %s", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDocumentPreview_NotFound(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
h := &DocumentHandler{
|
||||
documentService: &fakeDocumentService{},
|
||||
}
|
||||
c, w := setupGinContextWithUser("GET", "/api/v1/documents/not-found/preview", "")
|
||||
c.Params = gin.Params{{Key: "id", Value: "not-found"}}
|
||||
|
||||
h.GetDocumentPreview(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if resp["code"] != float64(common.CodeDataError) {
|
||||
t.Fatalf("expected code %d, got %v", common.CodeDataError, resp["code"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestDownloadDocument_Success(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
h := &DocumentHandler{
|
||||
documentService: &fakeDocumentService{},
|
||||
}
|
||||
c, w := setupGinContextWithUser("GET", "/api/v1/datasets/ds-1/documents/doc-1", "")
|
||||
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}, {Key: "document_id", Value: "doc-1"}}
|
||||
|
||||
h.DownloadDocument(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
if w.Header().Get("Content-Type") != "application/pdf" {
|
||||
t.Fatalf("unexpected content type: %s", w.Header().Get("Content-Type"))
|
||||
}
|
||||
if w.Body.String() != "document data" {
|
||||
t.Fatalf("unexpected body: %s", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestDownloadDocument_NotFound(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
h := &DocumentHandler{
|
||||
documentService: &fakeDocumentService{},
|
||||
}
|
||||
c, w := setupGinContextWithUser("GET", "/api/v1/datasets/ds-1/documents/not-found", "")
|
||||
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}, {Key: "document_id", Value: "not-found"}}
|
||||
|
||||
h.DownloadDocument(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if resp["code"] != float64(common.CodeDataError) {
|
||||
t.Fatalf("expected code %d, got %v", common.CodeDataError, resp["code"])
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user