Files
ragflow/internal/handler/document_test.go
Jack 67c3e73d70 feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577)
## Summary

Migrate the batch document deletion endpoint from Python to Go. Two
modes supported: explicit `ids` list and `delete_all`.

## Changes

| File | Change |
|------|--------|
| `internal/dao/file2document.go` | Add `GetByDocumentID`,
`DeleteByDocumentID` |
| `internal/dao/file2document_test.go` | 5 new tests |
| `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) |
| `internal/service/document.go` | Add `deleteDocumentFull` +
`DeleteDocuments`, refactor `DeleteDocument` |
| `internal/service/document_test.go` | 10 new tests |
| `internal/handler/document.go` | Add `documentServiceIface` +
`DeleteDocuments` handler |
| `internal/handler/document_test.go` | 7 new tests |
| `internal/router/router.go` | Register `DELETE /:dataset_id/documents`
|
| `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var |
| `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking |
| `internal/dao/database.go` | Skip Error 1091/1138 during migration |
| `internal/service/llm.go` | Fix vet warning |

## Per-document cleanup

- Delete tasks from DB
- Hard-delete document + decrement KB counters
- Delete chunks from document engine (nil-guarded)
- Delete metadata from document engine (nil-guarded)
- Remove file2document mapping + file record + storage blob

## Test Results

**24 unit tests all passing** (7 DAO + 10 service + 7 handler) using
SQLite :memory: + gin.TestMode.

See [test report](docs/test_report_delete_documents.md) for manual
integration test results.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00

247 lines
7.3 KiB
Go

//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package handler
import (
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/gin-gonic/gin"
"ragflow/internal/common"
"ragflow/internal/entity"
"ragflow/internal/service"
)
// fakeDocumentService implements documentServiceIface for handler tests.
type fakeDocumentService struct {
deleted int
err error
}
func (f *fakeDocumentService) CreateDocument(req *service.CreateDocumentRequest) (*entity.Document, error) {
return nil, nil
}
func (f *fakeDocumentService) GetDocumentByID(id string) (*service.DocumentResponse, error) {
return nil, nil
}
func (f *fakeDocumentService) UpdateDocument(id string, req *service.UpdateDocumentRequest) error {
return nil
}
func (f *fakeDocumentService) DeleteDocument(id string) error {
return nil
}
func (f *fakeDocumentService) DeleteDocuments(ids []string, deleteAll bool, datasetID, userID string) (int, error) {
return f.deleted, f.err
}
func (f *fakeDocumentService) ParseDocuments(datasetID, userID string, docIDs []string) ([]*service.ParseDocumentResponse, error) {
return nil, nil
}
func (f *fakeDocumentService) ListDocuments(page, pageSize int) ([]*service.DocumentResponse, int64, error) {
return nil, 0, nil
}
func (f *fakeDocumentService) ListDocumentsByDatasetID(kbID string, page, pageSize int) ([]*entity.DocumentListItem, int64, error) {
return nil, 0, nil
}
func (f *fakeDocumentService) GetThumbnail(docID string) (*service.ThumbnailResponse, error) {
return nil, nil
}
func (f *fakeDocumentService) GetDocumentImage(imageID string) ([]byte, error) {
return nil, nil
}
func (f *fakeDocumentService) GetDocumentsByAuthorID(authorID, page, pageSize int) ([]*service.DocumentResponse, int64, error) {
return nil, 0, nil
}
func (f *fakeDocumentService) GetMetadataSummary(kbID string, docIDs []string) (map[string]interface{}, error) {
return nil, nil
}
func (f *fakeDocumentService) SetDocumentMetadata(docID string, meta map[string]interface{}) error {
return nil
}
func (f *fakeDocumentService) DeleteDocumentMetadata(docID string, keys []string) error {
return nil
}
func (f *fakeDocumentService) DeleteDocumentAllMetadata(docID string) error {
return nil
}
func (f *fakeDocumentService) GetDocumentMetadataByID(docID string) (map[string]interface{}, error) {
return nil, nil
}
func setupGinContextWithUser(method, path, body string) (*gin.Context, *httptest.ResponseRecorder) {
gin.SetMode(gin.TestMode)
w := httptest.NewRecorder()
req := httptest.NewRequest(method, path, strings.NewReader(body))
req.Header.Set("Content-Type", "application/json")
c, _ := gin.CreateTestContext(w)
c.Request = req
c.Set("user", &entity.User{ID: "user-1"})
c.Set("user_id", "user-1")
return c, w
}
func TestDeleteDocumentsHandler_Success(t *testing.T) {
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{deleted: 3}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{"ids": ["doc-1", "doc-2", "doc-3"]}`)
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.DeleteDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeSuccess) {
t.Fatalf("expected code 0, got %v", resp["code"])
}
data := resp["data"].(map[string]interface{})
if data["deleted"] != float64(3) {
t.Fatalf("expected deleted=3, got %v", data["deleted"])
}
}
func TestDeleteDocumentsHandler_DeleteAll(t *testing.T) {
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{deleted: 5}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{"delete_all": true}`)
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.DeleteDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
}
func TestDeleteDocumentsHandler_MutuallyExclusive(t *testing.T) {
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{"ids": ["doc-1"], "delete_all": true}`)
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.DeleteDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
code, _ := resp["code"].(float64)
if code == float64(common.CodeSuccess) {
t.Fatal("expected error for mutually exclusive ids+delete_all")
}
}
func TestDeleteDocumentsHandler_NoIDsNoDeleteAll(t *testing.T) {
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{}`)
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.DeleteDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
code, _ := resp["code"].(float64)
if code == float64(common.CodeSuccess) {
t.Fatal("expected error for no ids and no delete_all")
}
}
func TestDeleteDocumentsHandler_ServiceError(t *testing.T) {
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{err: fmt.Errorf("permission denied")}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{"ids": ["doc-1"]}`)
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
h.DeleteDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
code, _ := resp["code"].(float64)
if code == float64(common.CodeSuccess) {
t.Fatal("expected error code")
}
}
func TestDeleteDocumentsHandler_MissingDatasetID(t *testing.T) {
gin.SetMode(gin.TestMode)
fake := &fakeDocumentService{}
h := &DocumentHandler{
documentService: fake,
datasetService: service.NewDatasetService(),
}
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets//documents", `{"ids": ["doc-1"]}`)
h.DeleteDocuments(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
code, _ := resp["code"].(float64)
if code == float64(common.CodeSuccess) {
t.Fatal("expected error for missing dataset_id")
}
}