mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
247 lines
7.3 KiB
Go
247 lines
7.3 KiB
Go
//
|
|
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
package handler
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/gin-gonic/gin"
|
|
|
|
"ragflow/internal/common"
|
|
"ragflow/internal/entity"
|
|
"ragflow/internal/service"
|
|
)
|
|
|
|
// fakeDocumentService implements documentServiceIface for handler tests.
|
|
type fakeDocumentService struct {
|
|
deleted int
|
|
err error
|
|
}
|
|
|
|
func (f *fakeDocumentService) CreateDocument(req *service.CreateDocumentRequest) (*entity.Document, error) {
|
|
return nil, nil
|
|
}
|
|
func (f *fakeDocumentService) GetDocumentByID(id string) (*service.DocumentResponse, error) {
|
|
return nil, nil
|
|
}
|
|
func (f *fakeDocumentService) UpdateDocument(id string, req *service.UpdateDocumentRequest) error {
|
|
return nil
|
|
}
|
|
func (f *fakeDocumentService) DeleteDocument(id string) error {
|
|
return nil
|
|
}
|
|
func (f *fakeDocumentService) DeleteDocuments(ids []string, deleteAll bool, datasetID, userID string) (int, error) {
|
|
return f.deleted, f.err
|
|
}
|
|
func (f *fakeDocumentService) ParseDocuments(datasetID, userID string, docIDs []string) ([]*service.ParseDocumentResponse, error) {
|
|
return nil, nil
|
|
}
|
|
func (f *fakeDocumentService) ListDocuments(page, pageSize int) ([]*service.DocumentResponse, int64, error) {
|
|
return nil, 0, nil
|
|
}
|
|
func (f *fakeDocumentService) ListDocumentsByDatasetID(kbID string, page, pageSize int) ([]*entity.DocumentListItem, int64, error) {
|
|
return nil, 0, nil
|
|
}
|
|
func (f *fakeDocumentService) GetThumbnail(docID string) (*service.ThumbnailResponse, error) {
|
|
return nil, nil
|
|
}
|
|
func (f *fakeDocumentService) GetDocumentImage(imageID string) ([]byte, error) {
|
|
return nil, nil
|
|
}
|
|
func (f *fakeDocumentService) GetDocumentsByAuthorID(authorID, page, pageSize int) ([]*service.DocumentResponse, int64, error) {
|
|
return nil, 0, nil
|
|
}
|
|
func (f *fakeDocumentService) GetMetadataSummary(kbID string, docIDs []string) (map[string]interface{}, error) {
|
|
return nil, nil
|
|
}
|
|
func (f *fakeDocumentService) SetDocumentMetadata(docID string, meta map[string]interface{}) error {
|
|
return nil
|
|
}
|
|
func (f *fakeDocumentService) DeleteDocumentMetadata(docID string, keys []string) error {
|
|
return nil
|
|
}
|
|
func (f *fakeDocumentService) DeleteDocumentAllMetadata(docID string) error {
|
|
return nil
|
|
}
|
|
func (f *fakeDocumentService) GetDocumentMetadataByID(docID string) (map[string]interface{}, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func setupGinContextWithUser(method, path, body string) (*gin.Context, *httptest.ResponseRecorder) {
|
|
gin.SetMode(gin.TestMode)
|
|
w := httptest.NewRecorder()
|
|
req := httptest.NewRequest(method, path, strings.NewReader(body))
|
|
req.Header.Set("Content-Type", "application/json")
|
|
c, _ := gin.CreateTestContext(w)
|
|
c.Request = req
|
|
c.Set("user", &entity.User{ID: "user-1"})
|
|
c.Set("user_id", "user-1")
|
|
return c, w
|
|
}
|
|
|
|
func TestDeleteDocumentsHandler_Success(t *testing.T) {
|
|
gin.SetMode(gin.TestMode)
|
|
|
|
fake := &fakeDocumentService{deleted: 3}
|
|
h := &DocumentHandler{
|
|
documentService: fake,
|
|
datasetService: service.NewDatasetService(),
|
|
}
|
|
|
|
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{"ids": ["doc-1", "doc-2", "doc-3"]}`)
|
|
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
|
|
|
|
h.DeleteDocuments(c)
|
|
|
|
if w.Code != http.StatusOK {
|
|
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
|
}
|
|
|
|
var resp map[string]interface{}
|
|
json.Unmarshal(w.Body.Bytes(), &resp)
|
|
if resp["code"] != float64(common.CodeSuccess) {
|
|
t.Fatalf("expected code 0, got %v", resp["code"])
|
|
}
|
|
data := resp["data"].(map[string]interface{})
|
|
if data["deleted"] != float64(3) {
|
|
t.Fatalf("expected deleted=3, got %v", data["deleted"])
|
|
}
|
|
}
|
|
|
|
func TestDeleteDocumentsHandler_DeleteAll(t *testing.T) {
|
|
gin.SetMode(gin.TestMode)
|
|
|
|
fake := &fakeDocumentService{deleted: 5}
|
|
h := &DocumentHandler{
|
|
documentService: fake,
|
|
datasetService: service.NewDatasetService(),
|
|
}
|
|
|
|
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{"delete_all": true}`)
|
|
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
|
|
|
|
h.DeleteDocuments(c)
|
|
|
|
if w.Code != http.StatusOK {
|
|
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
|
}
|
|
}
|
|
|
|
func TestDeleteDocumentsHandler_MutuallyExclusive(t *testing.T) {
|
|
gin.SetMode(gin.TestMode)
|
|
|
|
fake := &fakeDocumentService{}
|
|
h := &DocumentHandler{
|
|
documentService: fake,
|
|
datasetService: service.NewDatasetService(),
|
|
}
|
|
|
|
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{"ids": ["doc-1"], "delete_all": true}`)
|
|
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
|
|
|
|
h.DeleteDocuments(c)
|
|
|
|
if w.Code != http.StatusOK {
|
|
t.Fatalf("expected 200, got %d", w.Code)
|
|
}
|
|
var resp map[string]interface{}
|
|
json.Unmarshal(w.Body.Bytes(), &resp)
|
|
code, _ := resp["code"].(float64)
|
|
if code == float64(common.CodeSuccess) {
|
|
t.Fatal("expected error for mutually exclusive ids+delete_all")
|
|
}
|
|
}
|
|
|
|
func TestDeleteDocumentsHandler_NoIDsNoDeleteAll(t *testing.T) {
|
|
gin.SetMode(gin.TestMode)
|
|
|
|
fake := &fakeDocumentService{}
|
|
h := &DocumentHandler{
|
|
documentService: fake,
|
|
datasetService: service.NewDatasetService(),
|
|
}
|
|
|
|
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{}`)
|
|
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
|
|
|
|
h.DeleteDocuments(c)
|
|
|
|
if w.Code != http.StatusOK {
|
|
t.Fatalf("expected 200, got %d", w.Code)
|
|
}
|
|
var resp map[string]interface{}
|
|
json.Unmarshal(w.Body.Bytes(), &resp)
|
|
code, _ := resp["code"].(float64)
|
|
if code == float64(common.CodeSuccess) {
|
|
t.Fatal("expected error for no ids and no delete_all")
|
|
}
|
|
}
|
|
|
|
func TestDeleteDocumentsHandler_ServiceError(t *testing.T) {
|
|
gin.SetMode(gin.TestMode)
|
|
|
|
fake := &fakeDocumentService{err: fmt.Errorf("permission denied")}
|
|
h := &DocumentHandler{
|
|
documentService: fake,
|
|
datasetService: service.NewDatasetService(),
|
|
}
|
|
|
|
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{"ids": ["doc-1"]}`)
|
|
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
|
|
|
|
h.DeleteDocuments(c)
|
|
|
|
if w.Code != http.StatusOK {
|
|
t.Fatalf("expected 200, got %d", w.Code)
|
|
}
|
|
var resp map[string]interface{}
|
|
json.Unmarshal(w.Body.Bytes(), &resp)
|
|
code, _ := resp["code"].(float64)
|
|
if code == float64(common.CodeSuccess) {
|
|
t.Fatal("expected error code")
|
|
}
|
|
}
|
|
|
|
func TestDeleteDocumentsHandler_MissingDatasetID(t *testing.T) {
|
|
gin.SetMode(gin.TestMode)
|
|
|
|
fake := &fakeDocumentService{}
|
|
h := &DocumentHandler{
|
|
documentService: fake,
|
|
datasetService: service.NewDatasetService(),
|
|
}
|
|
|
|
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets//documents", `{"ids": ["doc-1"]}`)
|
|
|
|
h.DeleteDocuments(c)
|
|
|
|
if w.Code != http.StatusOK {
|
|
t.Fatalf("expected 200, got %d", w.Code)
|
|
}
|
|
var resp map[string]interface{}
|
|
json.Unmarshal(w.Body.Bytes(), &resp)
|
|
code, _ := resp["code"].(float64)
|
|
if code == float64(common.CodeSuccess) {
|
|
t.Fatal("expected error for missing dataset_id")
|
|
}
|
|
}
|