fix(go-api): sync document handler interface and enforce preview acce… (#15688)

### Description

This PR syncs the `documentServiceIface` interface and introduces
handler methods for document preview, artifact fetching, and downloading
in the Go API. It also ensures that strict dataset alignment and access
checks are enforced when retrieving or downloading documents.

Furthermore, this PR introduces comprehensive unit tests for both the
newly added Handler and Service methods to ensure robustness and prevent
future regressions.

### Key Changes
* **Router & Handler Integration**: 
  * Added and wired new API endpoints in `internal/router/router.go`.
* Synchronized the `documentServiceIface` with `GetDocumentArtifact`,
`GetDocumentPreview`, and `DownloadDocument`.
* Implemented handlers for these endpoints in
`internal/handler/document.go`.
* **Access & Validation Enforcement**: 
* Refactored `internal/service/document.go` to strictly check if a
document belongs to the requested dataset before allowing downloads or
previews.
* Added robust artifact file sanitization (`sanitizeArtifactFilename`)
and attachment handling (`shouldForceArtifactAttachment`).
* **Comprehensive Unit Testing**:
* **Handler Layer (`internal/handler/document_test.go`)**: Added mock
service implementations and Gin router tests covering success,
not-found, and internal error states for all 3 new endpoints.
* **Service Layer (`internal/service/document_test.go`)**: Added
extensive business logic tests including dataset mismatch checks,
non-existent document checks, and artifact file validation.
This commit is contained in:
Hz_
2026-06-08 11:37:06 +08:00
committed by GitHub
parent b05d5a5228
commit 074c331cdf
5 changed files with 573 additions and 0 deletions

View File

@@ -25,6 +25,7 @@ import (
"path/filepath"
"ragflow/internal/common"
"ragflow/internal/entity"
"ragflow/internal/utility"
"strconv"
"strings"
"time"
@@ -55,6 +56,9 @@ type documentServiceIface interface {
DeleteDocumentMetadata(docID string, keys []string) error
DeleteDocumentAllMetadata(docID string) error
GetDocumentMetadataByID(docID string) (map[string]interface{}, error)
GetDocumentArtifact(filename string) (*service.ArtifactResponse, error)
GetDocumentPreview(docID string) (*service.DocumentPreview, error)
DownloadDocument(datasetID, docID string) (*service.DownloadDocumentResp, error)
}
// DocumentHandler document handler
@@ -198,6 +202,68 @@ func (h *DocumentHandler) GetDocumentImage(c *gin.Context) {
c.Data(http.StatusOK, contentType, data)
}
func (h *DocumentHandler) GetDocumentArtifact(c *gin.Context) {
filename := c.Param("filename")
artifact, err := h.documentService.GetDocumentArtifact(filename)
if err != nil {
switch {
case errors.Is(err, service.ErrArtifactInvalidFilename),
errors.Is(err, service.ErrArtifactInvalidFileType),
errors.Is(err, service.ErrArtifactNotFound):
c.JSON(http.StatusOK, gin.H{
"code": common.CodeDataError,
"message": err.Error(),
})
default:
c.JSON(http.StatusOK, gin.H{
"code": common.CodeExceptionError,
"data": nil,
"message": err.Error(),
})
}
return
}
c.Header("Content-Type", artifact.ContentType)
if artifact.ForceAttachment {
c.Header("X-Content-Type-Options", "nosniff")
c.Header("Content-Disposition", "attachment")
} else {
c.Header("Content-Disposition", fmt.Sprintf(`inline; filename="%s"`, artifact.SafeFilename))
}
c.Data(http.StatusOK, artifact.ContentType, artifact.Data)
}
func (h *DocumentHandler) GetDocumentPreview(c *gin.Context) {
docID := c.Param("id")
if docID == "" {
jsonError(c, common.CodeParamError, "id is required")
return
}
preview, err := h.documentService.GetDocumentPreview(docID)
if err != nil {
c.JSON(http.StatusOK, gin.H{
"code": common.CodeDataError,
"message": "Document not found!",
})
return
}
ext := utility.GetFileExtension(preview.FileName)
if preview.ContentType != "" {
c.Header("Content-Type", preview.ContentType)
}
if utility.ShouldForceAttachment(ext, preview.ContentType) {
c.Header("X-Content-Type-Options", "nosniff")
c.Header("Content-Disposition", "attachment")
}
c.Data(http.StatusOK, preview.ContentType, preview.Data)
}
// UpdateDocument update document
// @Summary Update Document
// @Description Update document info
@@ -382,6 +448,40 @@ func (h *DocumentHandler) ListDocuments(c *gin.Context) {
})
}
func (h *DocumentHandler) DownloadDocument(c *gin.Context) {
datasetID := c.Param("dataset_id")
docID := c.Param("document_id")
if docID == "" {
c.JSON(http.StatusOK, gin.H{
"code": common.CodeDataError,
"message": "Specify document_id please.",
})
return
}
if datasetID == "" {
c.JSON(http.StatusOK, gin.H{
"code": common.CodeDataError,
"message": fmt.Sprintf("The dataset not own the document %s.", docID),
})
return
}
res, err := h.documentService.DownloadDocument(datasetID, docID)
if err != nil {
c.JSON(http.StatusOK, gin.H{
"code": common.CodeDataError,
"message": err.Error(),
})
return
}
c.Header("Content-Type", res.ContentType)
c.Header("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, res.FileName))
c.Data(http.StatusOK, res.ContentType, res.Data)
}
func mapDocumentListItem(doc *entity.DocumentListItem, metaFields map[string]interface{}) map[string]interface{} {
item := map[string]interface{}{
"id": doc.ID,

View File

@@ -42,6 +42,40 @@ type fakeDocumentService struct {
stopErr error
}
func (f *fakeDocumentService) GetDocumentArtifact(filename string) (*service.ArtifactResponse, error) {
if filename == "error.txt" {
return nil, service.ErrArtifactNotFound
}
if filename == "unexpected.txt" {
return nil, fmt.Errorf("unexpected error")
}
return &service.ArtifactResponse{
Data: []byte("artifact content"),
ContentType: "text/plain",
SafeFilename: "safe.txt",
ForceAttachment: false,
}, nil
}
func (f *fakeDocumentService) GetDocumentPreview(docID string) (*service.DocumentPreview, error) {
if docID == "not-found" {
return nil, fmt.Errorf("not found")
}
return &service.DocumentPreview{
Data: []byte("preview content"),
ContentType: "text/plain",
FileName: "preview.txt",
}, nil
}
func (f *fakeDocumentService) DownloadDocument(datasetID, docID string) (*service.DownloadDocumentResp, error) {
if docID == "not-found" {
return nil, fmt.Errorf("not found")
}
return &service.DownloadDocumentResp{
Data: []byte("document data"),
ContentType: "application/pdf",
FileName: "doc.pdf",
}, nil
}
func (f *fakeDocumentService) CreateDocument(req *service.CreateDocumentRequest) (*entity.Document, error) {
return nil, nil
}
@@ -442,3 +476,146 @@ func TestStopParseDocumentsHandler_NotAccessible(t *testing.T) {
t.Fatal("expected error for no authorization")
}
}
func TestGetDocumentArtifact_Success(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/documents/artifact/test.txt", "")
c.Params = gin.Params{{Key: "filename", Value: "test.txt"}}
h.GetDocumentArtifact(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
if w.Header().Get("Content-Type") != "text/plain" {
t.Fatalf("unexpected content type: %s", w.Header().Get("Content-Type"))
}
if w.Body.String() != "artifact content" {
t.Fatalf("unexpected body: %s", w.Body.String())
}
}
func TestGetDocumentArtifact_NotFound(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/documents/artifact/error.txt", "")
c.Params = gin.Params{{Key: "filename", Value: "error.txt"}}
h.GetDocumentArtifact(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeDataError) {
t.Fatalf("expected code %d, got %v", common.CodeDataError, resp["code"])
}
}
func TestGetDocumentArtifact_UnexpectedError(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/documents/artifact/unexpected.txt", "")
c.Params = gin.Params{{Key: "filename", Value: "unexpected.txt"}}
h.GetDocumentArtifact(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeExceptionError) {
t.Fatalf("expected code %d, got %v", common.CodeExceptionError, resp["code"])
}
}
func TestGetDocumentPreview_Success(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/documents/doc-1/preview", "")
c.Params = gin.Params{{Key: "id", Value: "doc-1"}}
h.GetDocumentPreview(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
if w.Header().Get("Content-Type") != "text/plain" {
t.Fatalf("unexpected content type: %s", w.Header().Get("Content-Type"))
}
if w.Body.String() != "preview content" {
t.Fatalf("unexpected body: %s", w.Body.String())
}
}
func TestGetDocumentPreview_NotFound(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/documents/not-found/preview", "")
c.Params = gin.Params{{Key: "id", Value: "not-found"}}
h.GetDocumentPreview(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeDataError) {
t.Fatalf("expected code %d, got %v", common.CodeDataError, resp["code"])
}
}
func TestDownloadDocument_Success(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/datasets/ds-1/documents/doc-1", "")
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}, {Key: "document_id", Value: "doc-1"}}
h.DownloadDocument(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
if w.Header().Get("Content-Type") != "application/pdf" {
t.Fatalf("unexpected content type: %s", w.Header().Get("Content-Type"))
}
if w.Body.String() != "document data" {
t.Fatalf("unexpected body: %s", w.Body.String())
}
}
func TestDownloadDocument_NotFound(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/datasets/ds-1/documents/not-found", "")
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}, {Key: "document_id", Value: "not-found"}}
h.DownloadDocument(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeDataError) {
t.Fatalf("expected code %d, got %v", common.CodeDataError, resp["code"])
}
}