Files
ragflow/internal/handler/document_test.go

1053 lines
33 KiB
Go
Raw Permalink Normal View History

feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577) ## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package handler
import (
"bytes"
feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577) ## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00
"encoding/json"
"fmt"
feat(go-api): Align document metadata batch APIs and upload_info with Python (#16269) ## Summary Align the Go implementations of these APIs with the Python behavior: - `POST /api/v1/datasets/:dataset_id/metadata/update` - `PATCH /api/v1/datasets/:dataset_id/documents/metadatas` - `POST /api/v1/documents/upload` ## What changed - Added the Go routes and handlers for the 3 APIs. - Aligned batch document metadata updates with Python semantics: - support `match` in update items - support list append / replace behavior - support deleting specific list values - remove metadata entirely when it becomes empty - create metadata for documents that previously had none when updates apply - count `updated` only when a document actually changes - Aligned `documents/upload` file uploads with Python-style `upload_info` behavior: - store upload-info blobs in the per-user downloads bucket - return lightweight upload descriptors instead of normal file-management responses - Improved URL upload behavior: - SSRF-guarded fetch with redirect validation - redirect limit aligned to Python behavior - normalize filename and MIME type - add `.pdf` when the fetched content is PDF - normalize HTML content into readable text instead of storing raw HTML shells ## Validation ### Unit tests Passed: - `go test ./internal/service` - `go test ./internal/handler` Also verified targeted cases for: - batch metadata update semantics - upload_info URL handling - upload_info download bucket behavior ### curl checks Verified the new Go endpoints with `curl` and compared the response shape and behavior with Python for: - `POST /api/v1/datasets/{dataset_id}/metadata/update` - `PATCH /api/v1/datasets/{dataset_id}/documents/metadatas` - `POST /api/v1/documents/upload` The Go responses were checked against Python for: - argument validation - success response shape - metadata update results - upload_info result structure - file vs URL input handling
2026-06-24 14:52:47 +08:00
"mime/multipart"
feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577) ## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/gin-gonic/gin"
"github.com/glebarez/sqlite"
"gorm.io/gorm"
feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577) ## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00
"ragflow/internal/common"
"ragflow/internal/dao"
feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577) ## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00
"ragflow/internal/entity"
"ragflow/internal/service"
)
// fakeDocumentService implements documentServiceIface for handler tests.
type fakeDocumentService struct {
deleted int
err error
stopResult map[string]interface{}
stopErr error
metadataSummary map[string]interface{}
metadataErr error
metadataKBID string
metadataDocIDs []string
uploadLocalData []map[string]interface{}
uploadLocalErrs []string
uploadLocalKB *entity.Knowledgebase
uploadLocalPath string
uploadOverride map[string]interface{}
ingestCode common.ErrorCode
ingestErr error
ingestUserID string
ingestReq *service.IngestDocumentRequest
}
func (f *fakeDocumentService) Ingest(userID string, req *service.IngestDocumentRequest) (common.ErrorCode, error) {
f.ingestUserID = userID
f.ingestReq = req
if f.ingestCode != 0 || f.ingestErr != nil {
return f.ingestCode, f.ingestErr
}
return common.CodeSuccess, nil
feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577) ## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00
}
const uploadTestDatasetID = "123e4567-e89b-12d3-a456-426614174000"
func (f *fakeDocumentService) UpdateDatasetDocument(userID, datasetID, documentID string, req *service.UpdateDatasetDocumentRequest, present map[string]bool) (*service.UpdateDatasetDocumentResponse, common.ErrorCode, error) {
return nil, common.CodeSuccess, nil
}
feat(go-api): Align document metadata batch APIs and upload_info with Python (#16269) ## Summary Align the Go implementations of these APIs with the Python behavior: - `POST /api/v1/datasets/:dataset_id/metadata/update` - `PATCH /api/v1/datasets/:dataset_id/documents/metadatas` - `POST /api/v1/documents/upload` ## What changed - Added the Go routes and handlers for the 3 APIs. - Aligned batch document metadata updates with Python semantics: - support `match` in update items - support list append / replace behavior - support deleting specific list values - remove metadata entirely when it becomes empty - create metadata for documents that previously had none when updates apply - count `updated` only when a document actually changes - Aligned `documents/upload` file uploads with Python-style `upload_info` behavior: - store upload-info blobs in the per-user downloads bucket - return lightweight upload descriptors instead of normal file-management responses - Improved URL upload behavior: - SSRF-guarded fetch with redirect validation - redirect limit aligned to Python behavior - normalize filename and MIME type - add `.pdf` when the fetched content is PDF - normalize HTML content into readable text instead of storing raw HTML shells ## Validation ### Unit tests Passed: - `go test ./internal/service` - `go test ./internal/handler` Also verified targeted cases for: - batch metadata update semantics - upload_info URL handling - upload_info download bucket behavior ### curl checks Verified the new Go endpoints with `curl` and compared the response shape and behavior with Python for: - `POST /api/v1/datasets/{dataset_id}/metadata/update` - `PATCH /api/v1/datasets/{dataset_id}/documents/metadatas` - `POST /api/v1/documents/upload` The Go responses were checked against Python for: - argument validation - success response shape - metadata update results - upload_info result structure - file vs URL input handling
2026-06-24 14:52:47 +08:00
func (f *fakeDocumentService) BatchUpdateDocumentMetadatas(datasetID string, selector *service.DocumentMetadataSelector, updates []service.DocumentMetadataUpdate, deletes []service.DocumentMetadataDelete) (*service.BatchUpdateDocumentMetadatasResponse, common.ErrorCode, error) {
return nil, common.CodeSuccess, nil
}
func (f *fakeDocumentService) UploadDocumentInfos(userID string, files []*multipart.FileHeader) ([]map[string]interface{}, common.ErrorCode, error) {
return nil, common.CodeSuccess, nil
}
func (f *fakeDocumentService) UploadDocumentInfoByURL(userID, rawURL string) (map[string]interface{}, common.ErrorCode, error) {
return nil, common.CodeSuccess, nil
}
fix(go-api): sync document handler interface and enforce preview acce… (#15688) ### Description This PR syncs the `documentServiceIface` interface and introduces handler methods for document preview, artifact fetching, and downloading in the Go API. It also ensures that strict dataset alignment and access checks are enforced when retrieving or downloading documents. Furthermore, this PR introduces comprehensive unit tests for both the newly added Handler and Service methods to ensure robustness and prevent future regressions. ### Key Changes * **Router & Handler Integration**: * Added and wired new API endpoints in `internal/router/router.go`. * Synchronized the `documentServiceIface` with `GetDocumentArtifact`, `GetDocumentPreview`, and `DownloadDocument`. * Implemented handlers for these endpoints in `internal/handler/document.go`. * **Access & Validation Enforcement**: * Refactored `internal/service/document.go` to strictly check if a document belongs to the requested dataset before allowing downloads or previews. * Added robust artifact file sanitization (`sanitizeArtifactFilename`) and attachment handling (`shouldForceArtifactAttachment`). * **Comprehensive Unit Testing**: * **Handler Layer (`internal/handler/document_test.go`)**: Added mock service implementations and Gin router tests covering success, not-found, and internal error states for all 3 new endpoints. * **Service Layer (`internal/service/document_test.go`)**: Added extensive business logic tests including dataset mismatch checks, non-existent document checks, and artifact file validation.
2026-06-08 11:37:06 +08:00
func (f *fakeDocumentService) GetDocumentArtifact(filename string) (*service.ArtifactResponse, error) {
if filename == "error.txt" {
return nil, service.ErrArtifactNotFound
}
if filename == "unexpected.txt" {
return nil, fmt.Errorf("unexpected error")
}
return &service.ArtifactResponse{
Data: []byte("artifact content"),
ContentType: "text/plain",
SafeFilename: "safe.txt",
ForceAttachment: false,
}, nil
}
func (f *fakeDocumentService) GetDocumentPreview(docID string) (*service.DocumentPreview, error) {
if docID == "not-found" {
return nil, fmt.Errorf("not found")
}
return &service.DocumentPreview{
Data: []byte("preview content"),
ContentType: "text/plain",
FileName: "preview.txt",
}, nil
}
func (f *fakeDocumentService) DownloadDocument(datasetID, docID string) (*service.DownloadDocumentResp, error) {
if docID == "not-found" {
return nil, fmt.Errorf("not found")
}
return &service.DownloadDocumentResp{
Data: []byte("document data"),
ContentType: "application/pdf",
FileName: "doc.pdf",
}, nil
}
feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577) ## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00
func (f *fakeDocumentService) CreateDocument(req *service.CreateDocumentRequest) (*entity.Document, error) {
return nil, nil
}
func (f *fakeDocumentService) GetDocumentByID(id string) (*service.DocumentResponse, error) {
return nil, nil
}
func (f *fakeDocumentService) UpdateDocument(id string, req *service.UpdateDocumentRequest) error {
return nil
}
func (f *fakeDocumentService) DeleteDocument(id string) error {
return nil
}
func (f *fakeDocumentService) DeleteDocuments(ids []string, deleteAll bool, datasetID, userID string) (int, error) {
return f.deleted, f.err
}
func (f *fakeDocumentService) ParseDocuments(datasetID, userID string, docIDs []string) ([]*service.ParseDocumentResponse, error) {
return nil, nil
}
func (f *fakeDocumentService) StopParseDocuments(datasetID string, docIDs []string) (map[string]interface{}, error) {
return f.stopResult, f.stopErr
}
feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577) ## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00
func (f *fakeDocumentService) ListDocuments(page, pageSize int) ([]*service.DocumentResponse, int64, error) {
return nil, 0, nil
}
func (f *fakeDocumentService) ListDocumentsByDatasetID(kbID string, page, pageSize int) ([]*entity.DocumentListItem, int64, error) {
return nil, 0, nil
}
func (f *fakeDocumentService) BatchUpdateDocumentStatus(userID, datasetID, status string, documentIDs []string) (map[string]interface{}, common.ErrorCode, error) {
return map[string]interface{}{}, common.CodeSuccess, nil
}
feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577) ## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00
func (f *fakeDocumentService) GetThumbnail(docID string) (*service.ThumbnailResponse, error) {
return nil, nil
}
func (f *fakeDocumentService) GetDocumentImage(imageID string) ([]byte, error) {
return nil, nil
}
func (f *fakeDocumentService) GetDocumentsByAuthorID(authorID, page, pageSize int) ([]*service.DocumentResponse, int64, error) {
return nil, 0, nil
}
func (f *fakeDocumentService) GetMetadataSummary(kbID string, docIDs []string) (map[string]interface{}, error) {
f.metadataKBID = kbID
f.metadataDocIDs = docIDs
return f.metadataSummary, f.metadataErr
feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577) ## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00
}
func (f *fakeDocumentService) SetDocumentMetadata(docID string, meta map[string]interface{}) error {
return nil
}
func (f *fakeDocumentService) DeleteDocumentMetadata(docID string, keys []string) error {
return nil
}
func (f *fakeDocumentService) DeleteDocumentAllMetadata(docID string) error {
return nil
}
func (f *fakeDocumentService) GetDocumentMetadataByID(docID string) (map[string]interface{}, error) {
return nil, nil
}
func (f *fakeDocumentService) UploadLocalDocuments(kb *entity.Knowledgebase, tenantID string, files []*multipart.FileHeader, parentPath string, parserConfigOverride map[string]interface{}) ([]map[string]interface{}, []string) {
f.uploadLocalKB = kb
f.uploadLocalPath = parentPath
f.uploadOverride = parserConfigOverride
return f.uploadLocalData, f.uploadLocalErrs
}
func (f *fakeDocumentService) UploadWebDocument(kb *entity.Knowledgebase, tenantID, name, url string) (map[string]interface{}, common.ErrorCode, error) {
return nil, common.CodeServerError, fmt.Errorf("not implemented")
}
func (f *fakeDocumentService) UploadEmptyDocument(kb *entity.Knowledgebase, tenantID, name string) (map[string]interface{}, common.ErrorCode, error) {
return nil, common.CodeServerError, fmt.Errorf("not implemented")
}
feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577) ## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00
Go: use NATS as the message queue (#15327) ### What problem does this PR solve? ``` RAGFlow(admin)> mq publish 'msg2'; SUCCESS RAGFlow(admin)> mq publish 'msg3'; SUCCESS RAGFlow(admin)> mq list; +---------+---------------+ | message | subject | +---------+---------------+ | msg1 | tasks.RAGFLOW | | msg2 | tasks.RAGFLOW | | msg3 | tasks.RAGFLOW | +---------+---------------+ RAGFlow(admin)> mq pull 2; +---------+---------------+ | message | subject | +---------+---------------+ | msg1 | tasks.RAGFLOW | | msg2 | tasks.RAGFLOW | +---------+---------------+ RAGFlow(admin)> mq pull noack; +---------+---------------+ | message | subject | +---------+---------------+ | abc | tasks.RAGFLOW | +---------+---------------+ RAGFlow(admin)> mq show +-------------------+----------------+--------+---------------+---------------+-------------------+---------------+ | ack_pending_count | consumer_count | memory | message_count | pending_count | redelivered_count | waiting_count | +-------------------+----------------+--------+---------------+---------------+-------------------+---------------+ | 2 | 1 | 0 | 2 | 0 | 1 | 0 | +-------------------+----------------+--------+---------------+---------------+-------------------+---------------+ RAGFlow(admin)> list ingestors; +--------------+-------------------------------------------+--------+ | host | name | status | +--------------+-------------------------------------------+--------+ | 192.168.1.38 | ingestor-8f0e4bd5650a4ac58b0151969fbf6935 | alive | +--------------+-------------------------------------------+--------+ RAGFlow(admin)> list ingestion tasks; +----------------------------------+----------------------------------+-----------+------+-------------+----------------------------------+ | document_id | id | status | step | user | user_id | +----------------------------------+----------------------------------+-----------+------+-------------+----------------------------------+ | ffe64fae423411f1a2d938a74640adcc | 90d3d0f6528941c1ac8eb0360effccc4 | COMPLETED | 5 | aaa@aaa.com | 2ba4881420fa11f19e9c38a74640adcc | +----------------------------------+----------------------------------+-----------+------+-------------+----------------------------------+ RAGFlow(admin)> remove ingestion tasks '90d3d0f6528941c1ac8eb0360effccc4'; +---------+----------------------------------+ | delete | task_id | +---------+----------------------------------+ | success | 90d3d0f6528941c1ac8eb0360effccc4 | +---------+----------------------------------+ RAGFlow(admin)> stop ingestion tasks 'e89e20d9a25848a1b79bd9345ddbfe1d'; +----------+----------------------------------+ | status | task_id | +----------+----------------------------------+ | STOPPING | e89e20d9a25848a1b79bd9345ddbfe1d | +----------+----------------------------------+ # Publish a message RAGFlow(admin)> mq publish 'cdd'; SUCCESS # List current tasks in the message queue RAGFlow(admin)> mq list +----------------------------------+---------------+ | message | subject | +----------------------------------+---------------+ | 7ce392a3c1624cd2be4b5276e8825059 | tasks.RAGFLOW | +----------------------------------+---------------+ # Consume a task from the message queue RAGFlow(admin)> mq pull +------+-----+----------------+ | ack | id | type | +------+-----+----------------+ | true | cdd | ingestion_test | +------+-----+----------------+ # User mode # List ingestion tasks, followed by dataset id RAGFlow(user)> list ingestion tasks from '0abe79f9423311f1ad8d38a74640adcc'; +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | create_date | create_time | dataset_id | document_id | id | schema | status | update_date | update_time | user_id | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | 2026-05-30T20:21:06+08:00 | 1780143666289 | 0abe79f9423311f1ad8d38a74640adcc | ffe64fae423411f1a2d938a74640adcc | 8d758cd14a8b4ba8ab505003fb52017d | | COMPLETED | 2026-05-30T20:21:26+08:00 | 1780143686431 | 2ba4881420fa11f19e9c38a74640adcc | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ RAGFlow(user)> list ingestion tasks; +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | create_date | create_time | dataset_id | document_id | id | schema | status | update_date | update_time | user_id | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | 2026-06-02T19:02:31+08:00 | 1780398151417 | 0abe79f9423311f1ad8d38a74640adcc | ffe64fae423411f1a2d938a74640adcc | e89e20d9a25848a1b79bd9345ddbfe1d | | COMPLETED | 2026-06-02T19:02:52+08:00 | 1780398172208 | 2ba4881420fa11f19e9c38a74640adcc | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ # Create an ingestion task # First argument is document id, second argument is dataset id RAGFlow(user)> start ingestion 'ffe64fae423411f1a2d938a74640adcc' from '0abe79f9423311f1ad8d38a74640adcc'; +----------------------------------+-------------------------------------------+ | document_id | result | +----------------------------------+-------------------------------------------+ | ffe64fae423411f1a2d938a74640adcc | task_id: 8d758cd14a8b4ba8ab505003fb52017d | +----------------------------------+-------------------------------------------+ # Pause an ingestion task, first argument is ingestion id RAGFlow(user)> stop ingestion '8d758cd14a8b4ba8ab505003fb52017d'; +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | create_date | create_time | dataset_id | document_id | id | schema | status | update_date | update_time | user_id | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | 2026-05-30T20:21:06+08:00 | 1780143666289 | 0abe79f9423311f1ad8d38a74640adcc | ffe64fae423411f1a2d938a74640adcc | 8d758cd14a8b4ba8ab505003fb52017d | | COMPLETED | 2026-05-30T20:21:26+08:00 | 1780143686431 | 2ba4881420fa11f19e9c38a74640adcc | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ # Delete an ingestion task RAGFlow(api/default)> remove ingestion tasks 'f366450a27d54677aec1c7090add30f0'; +---------+----------------------------------+ | remove | task_id | +---------+----------------------------------+ | success | f366450a27d54677aec1c7090add30f0 | +---------+----------------------------------+ ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2026-06-12 14:56:44 +08:00
func (f *fakeDocumentService) ListIngestionTasks(userID string, datasetID *string, page, pageSize int) ([]*entity.IngestionTask, error) {
return nil, nil
}
func (f *fakeDocumentService) IngestDocuments(datasetID, userID string, docIDs []string) ([]*service.ParseDocumentResponse, error) {
return nil, nil
}
func (f *fakeDocumentService) StopIngestionTasks(tasks []string, userID string) ([]*entity.IngestionTask, error) {
return nil, nil
}
func (f *fakeDocumentService) RemoveIngestionTasks(tasks []string, userID string) ([]map[string]string, error) {
return nil, nil
}
feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577) ## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00
func setupGinContextWithUser(method, path, body string) (*gin.Context, *httptest.ResponseRecorder) {
gin.SetMode(gin.TestMode)
w := httptest.NewRecorder()
req := httptest.NewRequest(method, path, strings.NewReader(body))
req.Header.Set("Content-Type", "application/json")
c, _ := gin.CreateTestContext(w)
c.Request = req
c.Set("user", &entity.User{ID: "user-1"})
c.Set("user_id", "user-1")
return c, w
}
func setupUploadHandlerDB(t *testing.T, role string) *gorm.DB {
t.Helper()
db, err := gorm.Open(sqlite.Open(":memory:"), &gorm.Config{
TranslateError: true,
})
if err != nil {
t.Fatalf("failed to open sqlite: %v", err)
}
if err := db.AutoMigrate(
&entity.User{},
&entity.Tenant{},
&entity.UserTenant{},
&entity.Knowledgebase{},
); err != nil {
t.Fatalf("failed to migrate: %v", err)
}
if err := db.Create(&entity.User{ID: "user-1", Nickname: "test", Email: "test@test.com", Password: sptr("x")}).Error; err != nil {
t.Fatalf("insert user: %v", err)
}
if err := db.Create(&entity.Tenant{ID: "tenant-1", LLMID: "llm-1", EmbdID: "embd-1", ASRID: "asr-1", Status: sptr(string(entity.StatusValid))}).Error; err != nil {
t.Fatalf("insert tenant: %v", err)
}
if err := db.Create(&entity.UserTenant{ID: "ut-1", UserID: "user-1", TenantID: "tenant-1", Role: role, Status: sptr(string(entity.StatusValid))}).Error; err != nil {
t.Fatalf("insert user_tenant: %v", err)
}
pipelineID := "pipe-1"
if err := db.Create(&entity.Knowledgebase{
ID: "123e4567e89b12d3a456426614174000",
TenantID: "tenant-1",
Name: "kb-upload",
EmbdID: "embd-1",
CreatedBy: "user-1",
Permission: string(entity.TenantPermissionTeam),
ParserID: "naive",
PipelineID: &pipelineID,
ParserConfig: entity.JSONMap{"base": "cfg"},
Status: sptr(string(entity.StatusValid)),
}).Error; err != nil {
t.Fatalf("insert knowledgebase: %v", err)
}
return db
}
func setupUploadContext(t *testing.T, path string, fields map[string]string, fileName string, fileContent []byte) (*gin.Context, *httptest.ResponseRecorder) {
t.Helper()
gin.SetMode(gin.TestMode)
w := httptest.NewRecorder()
var body bytes.Buffer
writer := multipart.NewWriter(&body)
for k, v := range fields {
if err := writer.WriteField(k, v); err != nil {
t.Fatalf("write field %s: %v", k, err)
}
}
part, err := writer.CreateFormFile("file", fileName)
if err != nil {
t.Fatalf("create form file: %v", err)
}
if _, err := part.Write(fileContent); err != nil {
t.Fatalf("write form file: %v", err)
}
if err := writer.Close(); err != nil {
t.Fatalf("close writer: %v", err)
}
req := httptest.NewRequest(http.MethodPost, path, &body)
req.Header.Set("Content-Type", writer.FormDataContentType())
c, _ := gin.CreateTestContext(w)
c.Request = req
c.Set("user", &entity.User{ID: "user-1"})
c.Set("user_id", "user-1")
c.Params = gin.Params{{Key: "dataset_id", Value: uploadTestDatasetID}}
return c, w
}
func setupDocumentIngestRoute(userID string, svc *fakeDocumentService) *gin.Engine {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: svc,
datasetService: service.NewDatasetService(),
}
r := gin.New()
r.Use(func(c *gin.Context) {
c.Set("user", &entity.User{ID: userID})
c.Set("user_id", userID)
})
r.POST("/api/v1/documents/ingest", h.Ingest)
return r
}
feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577) ## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00
func TestDeleteDocumentsHandler_Success(t *testing.T) {
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{deleted: 3}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{"ids": ["doc-1", "doc-2", "doc-3"]}`)
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.DeleteDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeSuccess) {
t.Fatalf("expected code 0, got %v", resp["code"])
}
data := resp["data"].(map[string]interface{})
if data["deleted"] != float64(3) {
t.Fatalf("expected deleted=3, got %v", data["deleted"])
}
}
func TestUploadDocumentsHandler_LocalUsesFullKBAndIgnoresBadParserConfig(t *testing.T) {
db := setupUploadHandlerDB(t, "normal")
orig := dao.DB
dao.DB = db
t.Cleanup(func() { dao.DB = orig })
fake := &fakeDocumentService{
uploadLocalData: []map[string]interface{}{
{"id": "doc-1", "kb_id": "ds-1", "parser_id": "naive", "chunk_num": int64(0), "token_num": int64(0), "name": "a.txt"},
},
}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupUploadContext(t, "/api/v1/datasets/ds-1/documents?type=local", map[string]string{
"parent_path": "nested/path",
"parser_config": "{bad json",
}, "a.txt", []byte("abc"))
h.UploadDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
if fake.uploadLocalKB == nil {
t.Fatalf("UploadLocalDocuments was not called, response=%s", w.Body.String())
}
if fake.uploadLocalKB.TenantID != "tenant-1" || fake.uploadLocalKB.Name != "kb-upload" || fake.uploadLocalKB.ParserID != "naive" {
t.Fatalf("incomplete kb passed to service: %+v", fake.uploadLocalKB)
}
if fake.uploadLocalPath != "nested/path" {
t.Fatalf("parent path=%q, want nested/path", fake.uploadLocalPath)
}
if fake.uploadOverride != nil {
t.Fatalf("bad parser_config should be ignored, got %v", fake.uploadOverride)
}
}
func TestUploadDocumentsHandler_LocalReturnsPartialSuccess(t *testing.T) {
db := setupUploadHandlerDB(t, "normal")
orig := dao.DB
dao.DB = db
t.Cleanup(func() { dao.DB = orig })
fake := &fakeDocumentService{
uploadLocalData: []map[string]interface{}{
{"id": "doc-1", "kb_id": "ds-1", "parser_id": "naive", "chunk_num": int64(0), "token_num": int64(0), "name": "ok.txt"},
},
uploadLocalErrs: []string{"bad.exe: This type of file has not been supported yet!"},
}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupUploadContext(t, "/api/v1/datasets/ds-1/documents?type=local", nil, "ok.txt", []byte("abc"))
h.UploadDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("unmarshal response: %v", err)
}
if resp["code"] != float64(common.CodeSuccess) {
t.Fatalf("expected success for partial upload, got %v", resp)
}
data := resp["data"].(map[string]interface{})
if len(data["documents"].([]interface{})) != 1 {
t.Fatalf("expected one successful document, got %v", data["documents"])
}
if len(data["errors"].([]interface{})) != 1 {
t.Fatalf("expected one file error, got %v", data["errors"])
}
}
func TestUploadDocumentsHandler_DeniesNonNormalTeamRole(t *testing.T) {
db := setupUploadHandlerDB(t, "admin")
orig := dao.DB
dao.DB = db
t.Cleanup(func() { dao.DB = orig })
fake := &fakeDocumentService{}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupUploadContext(t, "/api/v1/datasets/ds-1/documents?type=local", nil, "a.txt", []byte("abc"))
h.UploadDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("unmarshal response: %v", err)
}
if resp["code"] == float64(common.CodeSuccess) {
t.Fatalf("expected authorization error, got %v", resp)
}
if fake.uploadLocalKB != nil {
t.Fatal("service should not be called on denied upload")
}
}
feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577) ## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00
func TestDeleteDocumentsHandler_DeleteAll(t *testing.T) {
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{deleted: 5}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{"delete_all": true}`)
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.DeleteDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
}
func TestDeleteDocumentsHandler_MutuallyExclusive(t *testing.T) {
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{"ids": ["doc-1"], "delete_all": true}`)
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.DeleteDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
code, _ := resp["code"].(float64)
if code == float64(common.CodeSuccess) {
t.Fatal("expected error for mutually exclusive ids+delete_all")
}
}
func TestDeleteDocumentsHandler_NoIDsNoDeleteAll(t *testing.T) {
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{}`)
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.DeleteDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
code, _ := resp["code"].(float64)
if code == float64(common.CodeSuccess) {
t.Fatal("expected error for no ids and no delete_all")
}
}
func TestDeleteDocumentsHandler_ServiceError(t *testing.T) {
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{err: fmt.Errorf("permission denied")}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{"ids": ["doc-1"]}`)
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.DeleteDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
code, _ := resp["code"].(float64)
if code == float64(common.CodeSuccess) {
t.Fatal("expected error code")
}
}
func TestDeleteDocumentsHandler_MissingDatasetID(t *testing.T) {
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets//documents", `{"ids": ["doc-1"]}`)
h.DeleteDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
code, _ := resp["code"].(float64)
if code == float64(common.CodeSuccess) {
t.Fatal("expected error for missing dataset_id")
}
}
func TestDocumentHandlerIngestMatchesPythonResponseShape(t *testing.T) {
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("POST", "/api/v1/documents/ingest", `{"doc_ids":["doc-1"],"run":"1"}`)
h.Ingest(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatal(err)
}
if resp["code"] != float64(common.CodeSuccess) {
t.Fatalf("expected top-level code 0, got %v", resp["code"])
}
if resp["data"] != true {
t.Fatalf("expected top-level data=true, got %#v", resp["data"])
}
if _, ok := resp["data"].(map[string]interface{}); ok {
t.Fatalf("response must not nest code/message under data: %#v", resp["data"])
}
if fake.ingestUserID != "user-1" {
t.Fatalf("expected user-1, got %q", fake.ingestUserID)
}
if fake.ingestReq == nil || len(fake.ingestReq.DocIDs) != 1 || fake.ingestReq.DocIDs[0] != "doc-1" {
t.Fatalf("unexpected ingest request: %#v", fake.ingestReq)
}
}
func TestDocumentIngestRoutePassesPythonBodyToService(t *testing.T) {
fake := &fakeDocumentService{}
r := setupDocumentIngestRoute("user-1", fake)
w := httptest.NewRecorder()
req := httptest.NewRequest("POST", "/api/v1/documents/ingest", strings.NewReader(`{"doc_ids":["doc-1","doc-2"],"run":1,"delete":true,"apply_kb":true}`))
req.Header.Set("Content-Type", "application/json")
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatal(err)
}
if resp["code"] != float64(common.CodeSuccess) || resp["data"] != true {
t.Fatalf("unexpected response: %s", w.Body.String())
}
if fake.ingestUserID != "user-1" {
t.Fatalf("userID = %q, want user-1", fake.ingestUserID)
}
if fake.ingestReq == nil {
t.Fatal("service did not receive ingest request")
}
if len(fake.ingestReq.DocIDs) != 2 || fake.ingestReq.DocIDs[0] != "doc-1" || fake.ingestReq.DocIDs[1] != "doc-2" {
t.Fatalf("doc_ids = %#v, want [doc-1 doc-2]", fake.ingestReq.DocIDs)
}
if fmt.Sprint(fake.ingestReq.Run) != "1" {
t.Fatalf("run = %#v, want 1", fake.ingestReq.Run)
}
if !fake.ingestReq.Delete {
t.Fatal("delete = false, want true")
}
if !fake.ingestReq.ApplyKB {
t.Fatal("apply_kb = false, want true")
}
}
func TestDocumentHandlerIngestPropagatesServiceErrorCode(t *testing.T) {
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{
ingestCode: common.CodeAuthenticationError,
ingestErr: fmt.Errorf("No authorization."),
}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("POST", "/api/v1/documents/ingest", `{"doc_ids":["doc-1"],"run":"1"}`)
h.Ingest(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatal(err)
}
if resp["code"] != float64(common.CodeAuthenticationError) {
t.Fatalf("expected auth error code, got %v", resp["code"])
}
if resp["message"] != "No authorization." {
t.Fatalf("unexpected message: %v", resp["message"])
}
if resp["data"] != nil {
t.Fatalf("expected nil data, got %#v", resp["data"])
}
}
func TestStopParseDocumentsHandler_EmptyDocIDs(t *testing.T) {
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("POST", "/api/v1/datasets/ds-1/documents/stop", `{"document_ids": []}`)
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.StopParseDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
code, _ := resp["code"].(float64)
if code == float64(common.CodeSuccess) {
t.Fatal("expected error for empty document_ids")
}
}
func TestStopParseDocumentsHandler_BadJSON(t *testing.T) {
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("POST", "/api/v1/datasets/ds-1/documents/stop", `not json`)
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.StopParseDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
code, _ := resp["code"].(float64)
if code == float64(common.CodeSuccess) {
t.Fatal("expected error for bad JSON body")
}
}
// setupHandlerAccessDB sets up SQLite in-memory DB for handler tests that need
// datasetService.Accessible to work.
func setupHandlerAccessDB(t *testing.T) *gorm.DB {
t.Helper()
db, err := gorm.Open(sqlite.Open(":memory:"), &gorm.Config{
TranslateError: true,
})
if err != nil {
t.Fatalf("failed to open sqlite: %v", err)
}
if err := db.AutoMigrate(
&entity.User{},
&entity.Tenant{},
&entity.UserTenant{},
&entity.Knowledgebase{},
); err != nil {
t.Fatalf("failed to migrate: %v", err)
}
// Insert user
db.Create(&entity.User{ID: "user-1", Nickname: "test", Email: "test@test.com", Password: sptr("x")})
// Insert tenant
db.Create(&entity.Tenant{ID: "tenant-1", LLMID: "llm-1", EmbdID: "embd-1", ASRID: "asr-1"})
// Insert user_tenant mapping
db.Create(&entity.UserTenant{ID: "ut-1", UserID: "user-1", TenantID: "tenant-1", Role: "admin"})
// Insert knowledgebase
db.Create(&entity.Knowledgebase{
ID: "ds-1", TenantID: "tenant-1", Name: "test-kb", EmbdID: "embd-1",
CreatedBy: "user-1", Permission: string(entity.TenantPermissionTeam),
Status: sptr(string(entity.StatusValid)),
})
return db
}
// sptr returns a pointer to the given string (copy of service test helper).
func sptr(s string) *string { return &s }
func TestStopParseDocumentsHandler_Success(t *testing.T) {
db := setupHandlerAccessDB(t)
orig := dao.DB
dao.DB = db
t.Cleanup(func() { dao.DB = orig })
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{
stopResult: map[string]interface{}{"success_count": 1},
}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("POST", "/api/v1/datasets/ds-1/documents/stop", `{"document_ids": ["doc-1"]}`)
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.StopParseDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeSuccess) {
t.Fatalf("expected code 0, got %v: %v", resp["code"], resp)
}
data := resp["data"].(map[string]interface{})
if data["success_count"] != float64(1) {
t.Fatalf("expected success_count=1, got %v", data["success_count"])
}
}
func TestStopParseDocumentsHandler_ServiceError(t *testing.T) {
db := setupHandlerAccessDB(t)
orig := dao.DB
dao.DB = db
t.Cleanup(func() { dao.DB = orig })
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{
stopErr: fmt.Errorf("internal failure"),
}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("POST", "/api/v1/datasets/ds-1/documents/stop", `{"document_ids": ["doc-1"]}`)
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.StopParseDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
code, _ := resp["code"].(float64)
if code == float64(common.CodeSuccess) {
t.Fatal("expected error code for service error")
}
}
func TestStopParseDocumentsHandler_NotAccessible(t *testing.T) {
db := setupHandlerAccessDB(t)
orig := dao.DB
dao.DB = db
t.Cleanup(func() { dao.DB = orig })
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("POST", "/api/v1/datasets/ds-1/documents/stop", `{"document_ids": ["doc-1"]}`)
// Use a user that doesn't have access to ds-1
c.Set("user_id", "other-user")
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.StopParseDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
code, _ := resp["code"].(float64)
if code == float64(common.CodeSuccess) {
t.Fatal("expected error for no authorization")
}
}
fix(go-api): sync document handler interface and enforce preview acce… (#15688) ### Description This PR syncs the `documentServiceIface` interface and introduces handler methods for document preview, artifact fetching, and downloading in the Go API. It also ensures that strict dataset alignment and access checks are enforced when retrieving or downloading documents. Furthermore, this PR introduces comprehensive unit tests for both the newly added Handler and Service methods to ensure robustness and prevent future regressions. ### Key Changes * **Router & Handler Integration**: * Added and wired new API endpoints in `internal/router/router.go`. * Synchronized the `documentServiceIface` with `GetDocumentArtifact`, `GetDocumentPreview`, and `DownloadDocument`. * Implemented handlers for these endpoints in `internal/handler/document.go`. * **Access & Validation Enforcement**: * Refactored `internal/service/document.go` to strictly check if a document belongs to the requested dataset before allowing downloads or previews. * Added robust artifact file sanitization (`sanitizeArtifactFilename`) and attachment handling (`shouldForceArtifactAttachment`). * **Comprehensive Unit Testing**: * **Handler Layer (`internal/handler/document_test.go`)**: Added mock service implementations and Gin router tests covering success, not-found, and internal error states for all 3 new endpoints. * **Service Layer (`internal/service/document_test.go`)**: Added extensive business logic tests including dataset mismatch checks, non-existent document checks, and artifact file validation.
2026-06-08 11:37:06 +08:00
func TestMetadataSummaryByDataset_Success(t *testing.T) {
db := setupHandlerAccessDB(t)
orig := dao.DB
dao.DB = db
t.Cleanup(func() { dao.DB = orig })
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{
metadataSummary: map[string]interface{}{
"author": map[string]interface{}{
"type": "string",
"values": []interface{}{
[]interface{}{"alice", 2},
},
},
},
}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("GET", "/api/v1/datasets/ds-1/metadata/summary?doc_ids=doc-1,doc-2", "")
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.MetadataSummaryByDataset(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
if fake.metadataKBID != "ds-1" {
t.Fatalf("expected kbID ds-1, got %q", fake.metadataKBID)
}
if len(fake.metadataDocIDs) != 2 || fake.metadataDocIDs[0] != "doc-1" || fake.metadataDocIDs[1] != "doc-2" {
t.Fatalf("unexpected docIDs: %#v", fake.metadataDocIDs)
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("failed to unmarshal response: %v", err)
}
if resp["code"] != float64(common.CodeSuccess) {
t.Fatalf("expected code 0, got %v: %v", resp["code"], resp)
}
data := resp["data"].(map[string]interface{})
summary := data["summary"].(map[string]interface{})
author := summary["author"].(map[string]interface{})
if author["type"] != "string" {
t.Fatalf("expected author type string, got %v", author["type"])
}
}
fix(go-api): sync document handler interface and enforce preview acce… (#15688) ### Description This PR syncs the `documentServiceIface` interface and introduces handler methods for document preview, artifact fetching, and downloading in the Go API. It also ensures that strict dataset alignment and access checks are enforced when retrieving or downloading documents. Furthermore, this PR introduces comprehensive unit tests for both the newly added Handler and Service methods to ensure robustness and prevent future regressions. ### Key Changes * **Router & Handler Integration**: * Added and wired new API endpoints in `internal/router/router.go`. * Synchronized the `documentServiceIface` with `GetDocumentArtifact`, `GetDocumentPreview`, and `DownloadDocument`. * Implemented handlers for these endpoints in `internal/handler/document.go`. * **Access & Validation Enforcement**: * Refactored `internal/service/document.go` to strictly check if a document belongs to the requested dataset before allowing downloads or previews. * Added robust artifact file sanitization (`sanitizeArtifactFilename`) and attachment handling (`shouldForceArtifactAttachment`). * **Comprehensive Unit Testing**: * **Handler Layer (`internal/handler/document_test.go`)**: Added mock service implementations and Gin router tests covering success, not-found, and internal error states for all 3 new endpoints. * **Service Layer (`internal/service/document_test.go`)**: Added extensive business logic tests including dataset mismatch checks, non-existent document checks, and artifact file validation.
2026-06-08 11:37:06 +08:00
func TestGetDocumentArtifact_Success(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/documents/artifact/test.txt", "")
c.Params = gin.Params{{Key: "filename", Value: "test.txt"}}
h.GetDocumentArtifact(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
if w.Header().Get("Content-Type") != "text/plain" {
t.Fatalf("unexpected content type: %s", w.Header().Get("Content-Type"))
}
if w.Body.String() != "artifact content" {
t.Fatalf("unexpected body: %s", w.Body.String())
}
}
func TestGetDocumentArtifact_NotFound(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/documents/artifact/error.txt", "")
c.Params = gin.Params{{Key: "filename", Value: "error.txt"}}
h.GetDocumentArtifact(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeDataError) {
t.Fatalf("expected code %d, got %v", common.CodeDataError, resp["code"])
}
}
func TestGetDocumentArtifact_UnexpectedError(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/documents/artifact/unexpected.txt", "")
c.Params = gin.Params{{Key: "filename", Value: "unexpected.txt"}}
h.GetDocumentArtifact(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeExceptionError) {
t.Fatalf("expected code %d, got %v", common.CodeExceptionError, resp["code"])
}
}
func TestGetDocumentPreview_Success(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/documents/doc-1/preview", "")
c.Params = gin.Params{{Key: "id", Value: "doc-1"}}
h.GetDocumentPreview(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
if w.Header().Get("Content-Type") != "text/plain" {
t.Fatalf("unexpected content type: %s", w.Header().Get("Content-Type"))
}
if w.Body.String() != "preview content" {
t.Fatalf("unexpected body: %s", w.Body.String())
}
}
func TestGetDocumentPreview_NotFound(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/documents/not-found/preview", "")
c.Params = gin.Params{{Key: "id", Value: "not-found"}}
h.GetDocumentPreview(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeDataError) {
t.Fatalf("expected code %d, got %v", common.CodeDataError, resp["code"])
}
}
func TestDownloadDocument_Success(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/datasets/ds-1/documents/doc-1", "")
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}, {Key: "document_id", Value: "doc-1"}}
h.DownloadDocument(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
if w.Header().Get("Content-Type") != "application/pdf" {
t.Fatalf("unexpected content type: %s", w.Header().Get("Content-Type"))
}
if w.Body.String() != "document data" {
t.Fatalf("unexpected body: %s", w.Body.String())
}
}
func TestDownloadDocument_NotFound(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/datasets/ds-1/documents/not-found", "")
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}, {Key: "document_id", Value: "not-found"}}
h.DownloadDocument(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeDataError) {
t.Fatalf("expected code %d, got %v", common.CodeDataError, resp["code"])
}
}