mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-05 19:08:38 +08:00
## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
429 lines
11 KiB
Go
429 lines
11 KiB
Go
//
|
|
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
package service
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/glebarez/sqlite"
|
|
"gorm.io/gorm"
|
|
|
|
"ragflow/internal/dao"
|
|
"ragflow/internal/entity"
|
|
)
|
|
|
|
// setupServiceTestDB initializes an in-memory SQLite database for service tests.
|
|
func setupServiceTestDB(t *testing.T) *gorm.DB {
|
|
t.Helper()
|
|
|
|
db, err := gorm.Open(sqlite.Open(":memory:"), &gorm.Config{
|
|
TranslateError: true,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("failed to open sqlite: %v", err)
|
|
}
|
|
|
|
// Migrate tables used by deleteDocumentFull + DeleteDocuments
|
|
if err := db.AutoMigrate(
|
|
&entity.Document{},
|
|
&entity.Knowledgebase{},
|
|
&entity.Task{},
|
|
&entity.File2Document{},
|
|
&entity.File{},
|
|
&entity.User{},
|
|
&entity.Tenant{},
|
|
&entity.UserTenant{},
|
|
); err != nil {
|
|
t.Fatalf("failed to migrate: %v", err)
|
|
}
|
|
|
|
return db
|
|
}
|
|
|
|
// pushServiceDB swaps dao.DB for the test and restores after.
|
|
func pushServiceDB(t *testing.T, testDB *gorm.DB) {
|
|
t.Helper()
|
|
orig := dao.DB
|
|
dao.DB = testDB
|
|
t.Cleanup(func() {
|
|
dao.DB = orig
|
|
})
|
|
}
|
|
|
|
func testDocumentService(t *testing.T) *DocumentService {
|
|
t.Helper()
|
|
// Use nil engine since we test DB cleanup only; engine ops are nil-guarded.
|
|
return &DocumentService{
|
|
documentDAO: dao.NewDocumentDAO(),
|
|
kbDAO: dao.NewKnowledgebaseDAO(),
|
|
taskDAO: dao.NewTaskDAO(),
|
|
file2DocumentDAO: dao.NewFile2DocumentDAO(),
|
|
docEngine: nil,
|
|
metadataSvc: nil, // nil engine → metadata ops skipped
|
|
}
|
|
}
|
|
|
|
// sptr returns a pointer to the given string.
|
|
func sptr(s string) *string { return &s }
|
|
|
|
func insertTestKB(t *testing.T, id, tenantID string, docNum, tokenNum, chunkNum int64) {
|
|
t.Helper()
|
|
kb := &entity.Knowledgebase{
|
|
ID: id,
|
|
TenantID: tenantID,
|
|
Name: "test-kb",
|
|
EmbdID: "embd-1",
|
|
CreatedBy: "user-1",
|
|
Permission: string(entity.TenantPermissionMe),
|
|
DocNum: docNum,
|
|
TokenNum: tokenNum,
|
|
ChunkNum: chunkNum,
|
|
Status: sptr(string(entity.StatusValid)),
|
|
}
|
|
if err := dao.DB.Create(kb).Error; err != nil {
|
|
t.Fatalf("insert test kb: %v", err)
|
|
}
|
|
}
|
|
|
|
func insertTestDoc(t *testing.T, id, kbID string, tokenNum, chunkNum int64) {
|
|
t.Helper()
|
|
doc := &entity.Document{
|
|
ID: id,
|
|
KbID: kbID,
|
|
ParserID: "naive",
|
|
ParserConfig: entity.JSONMap{},
|
|
TokenNum: tokenNum,
|
|
ChunkNum: chunkNum,
|
|
Suffix: ".txt",
|
|
Status: sptr("1"),
|
|
}
|
|
if err := dao.DB.Create(doc).Error; err != nil {
|
|
t.Fatalf("insert test doc: %v", err)
|
|
}
|
|
}
|
|
|
|
func insertTestTask(t *testing.T, id, docID string) {
|
|
t.Helper()
|
|
task := &entity.Task{
|
|
ID: id,
|
|
DocID: docID,
|
|
}
|
|
if err := dao.DB.Create(task).Error; err != nil {
|
|
t.Fatalf("insert test task: %v", err)
|
|
}
|
|
}
|
|
|
|
func insertTestFile2Document(t *testing.T, id, fileID, docID string) {
|
|
t.Helper()
|
|
f2d := &entity.File2Document{
|
|
ID: id,
|
|
FileID: &fileID,
|
|
DocumentID: &docID,
|
|
}
|
|
if err := dao.DB.Create(f2d).Error; err != nil {
|
|
t.Fatalf("insert test f2d: %v", err)
|
|
}
|
|
}
|
|
|
|
func insertTestFile(t *testing.T, id, parentID, name string, location *string) {
|
|
t.Helper()
|
|
srcType := string(entity.FileSourceKnowledgebase)
|
|
f := &entity.File{
|
|
ID: id,
|
|
ParentID: parentID,
|
|
TenantID: "tenant-1",
|
|
CreatedBy: "user-1",
|
|
Name: name,
|
|
Location: location,
|
|
SourceType: srcType,
|
|
Type: "pdf",
|
|
}
|
|
if err := dao.DB.Create(f).Error; err != nil {
|
|
t.Fatalf("insert test file: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestDeleteDocumentFull_Basic(t *testing.T) {
|
|
db := setupServiceTestDB(t)
|
|
pushServiceDB(t, db)
|
|
|
|
insertTestKB(t, "kb-1", "tenant-1", 3, 100, 50)
|
|
insertTestDoc(t, "doc-1", "kb-1", 30, 10)
|
|
insertTestTask(t, "task-1", "doc-1")
|
|
|
|
svc := testDocumentService(t)
|
|
|
|
err := svc.deleteDocumentFull("doc-1")
|
|
if err != nil {
|
|
t.Fatalf("deleteDocumentFull failed: %v", err)
|
|
}
|
|
|
|
// Verify document deleted
|
|
_, err = dao.NewDocumentDAO().GetByID("doc-1")
|
|
if err == nil {
|
|
t.Fatal("document should be deleted but it still exists")
|
|
}
|
|
|
|
// Verify tasks deleted
|
|
tasks, _ := dao.NewTaskDAO().GetAllTasks()
|
|
if len(tasks) != 0 {
|
|
t.Fatalf("expected 0 tasks, got %d", len(tasks))
|
|
}
|
|
|
|
// Verify KB counters decremented
|
|
kb, err := dao.NewKnowledgebaseDAO().GetByID("kb-1")
|
|
if err != nil {
|
|
t.Fatalf("kb not found: %v", err)
|
|
}
|
|
if kb.DocNum != 2 {
|
|
t.Fatalf("doc_num: expected 2, got %d", kb.DocNum)
|
|
}
|
|
if kb.TokenNum != 70 {
|
|
t.Fatalf("token_num: expected 70, got %d", kb.TokenNum)
|
|
}
|
|
if kb.ChunkNum != 40 {
|
|
t.Fatalf("chunk_num: expected 40, got %d", kb.ChunkNum)
|
|
}
|
|
}
|
|
|
|
func TestDeleteDocumentFull_NotFound(t *testing.T) {
|
|
db := setupServiceTestDB(t)
|
|
pushServiceDB(t, db)
|
|
|
|
svc := testDocumentService(t)
|
|
|
|
err := svc.deleteDocumentFull("nonexistent")
|
|
if err == nil {
|
|
t.Fatal("expected error for nonexistent document")
|
|
}
|
|
}
|
|
|
|
func TestDeleteDocumentFull_CleansUpFile2Document(t *testing.T) {
|
|
db := setupServiceTestDB(t)
|
|
pushServiceDB(t, db)
|
|
|
|
insertTestKB(t, "kb-1", "tenant-1", 1, 10, 5)
|
|
insertTestDoc(t, "doc-1", "kb-1", 10, 5)
|
|
loc := "path/to/blob"
|
|
insertTestFile(t, "file-1", "kb-1", "test.pdf", &loc)
|
|
insertTestFile2Document(t, "f2d-1", "file-1", "doc-1")
|
|
|
|
svc := testDocumentService(t)
|
|
|
|
err := svc.deleteDocumentFull("doc-1")
|
|
if err != nil {
|
|
t.Fatalf("deleteDocumentFull failed: %v", err)
|
|
}
|
|
|
|
// Verify f2d mapping deleted
|
|
f2dDAO := dao.NewFile2DocumentDAO()
|
|
mappings, _ := f2dDAO.GetByDocumentID("doc-1")
|
|
if len(mappings) != 0 {
|
|
t.Fatalf("expected 0 f2d mappings, got %d", len(mappings))
|
|
}
|
|
|
|
// Verify file record deleted (hard delete)
|
|
files, _ := dao.NewFileDAO().GetByIDs([]string{"file-1"})
|
|
if len(files) != 0 {
|
|
t.Fatalf("expected 0 files, got %d", len(files))
|
|
}
|
|
}
|
|
|
|
func insertUserTenantForAccessCheck(t *testing.T, userID, tenantID string) {
|
|
t.Helper()
|
|
// Insert user if not exists (email is NOT NULL, password is nullable pointer)
|
|
var existingUser entity.User
|
|
if err := dao.DB.Where("id = ?", userID).First(&existingUser).Error; err != nil {
|
|
u := &entity.User{ID: userID, Nickname: "test-user", Email: userID + "@test.com", Password: sptr("x")}
|
|
if err := dao.DB.Create(u).Error; err != nil {
|
|
t.Fatalf("insert test user: %v", err)
|
|
}
|
|
}
|
|
// Insert tenant if not exists (llm_id, embd_id, asr_id are NOT NULL)
|
|
var existingTenant entity.Tenant
|
|
if err := dao.DB.Where("id = ?", tenantID).First(&existingTenant).Error; err != nil {
|
|
tn := &entity.Tenant{
|
|
ID: tenantID,
|
|
LLMID: "llm-default",
|
|
EmbdID: "embd-default",
|
|
ASRID: "asr-default",
|
|
}
|
|
if err := dao.DB.Create(tn).Error; err != nil {
|
|
t.Fatalf("insert test tenant: %v", err)
|
|
}
|
|
}
|
|
// Insert user_tenant mapping if not exists
|
|
var existingUT entity.UserTenant
|
|
if err := dao.DB.Where("user_id = ? AND tenant_id = ?", userID, tenantID).First(&existingUT).Error; err != nil {
|
|
ut := &entity.UserTenant{
|
|
ID: userID + "_" + tenantID,
|
|
UserID: userID,
|
|
TenantID: tenantID,
|
|
Role: "admin",
|
|
}
|
|
if err := dao.DB.Create(ut).Error; err != nil {
|
|
t.Fatalf("insert test user_tenant: %v", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestDeleteDocuments_DeleteAll(t *testing.T) {
|
|
db := setupServiceTestDB(t)
|
|
pushServiceDB(t, db)
|
|
insertUserTenantForAccessCheck(t, "user-1", "tenant-1")
|
|
|
|
insertTestKB(t, "kb-1", "tenant-1", 3, 100, 50)
|
|
insertTestDoc(t, "doc-1", "kb-1", 30, 10)
|
|
insertTestDoc(t, "doc-2", "kb-1", 40, 20)
|
|
insertTestDoc(t, "doc-3", "kb-1", 30, 20)
|
|
|
|
svc := testDocumentService(t)
|
|
|
|
deleted, err := svc.DeleteDocuments(nil, true, "kb-1", "user-1")
|
|
if err != nil {
|
|
t.Fatalf("DeleteDocuments failed: %v", err)
|
|
}
|
|
if deleted != 3 {
|
|
t.Fatalf("expected 3 deleted, got %d", deleted)
|
|
}
|
|
|
|
// KB counters: doc_num 3→0, token_num 100→0, chunk_num 50→0
|
|
kb, _ := dao.NewKnowledgebaseDAO().GetByID("kb-1")
|
|
if kb.DocNum != 0 {
|
|
t.Fatalf("doc_num: expected 0, got %d", kb.DocNum)
|
|
}
|
|
}
|
|
|
|
func TestDeleteDocuments_ByIDs(t *testing.T) {
|
|
db := setupServiceTestDB(t)
|
|
pushServiceDB(t, db)
|
|
insertUserTenantForAccessCheck(t, "user-1", "tenant-1")
|
|
|
|
insertTestKB(t, "kb-1", "tenant-1", 3, 100, 50)
|
|
insertTestDoc(t, "doc-1", "kb-1", 30, 10)
|
|
insertTestDoc(t, "doc-2", "kb-1", 40, 20)
|
|
insertTestDoc(t, "doc-3", "kb-1", 30, 20) // won't be deleted
|
|
|
|
svc := testDocumentService(t)
|
|
|
|
deleted, err := svc.DeleteDocuments([]string{"doc-1", "doc-2"}, false, "kb-1", "user-1")
|
|
if err != nil {
|
|
t.Fatalf("DeleteDocuments failed: %v", err)
|
|
}
|
|
if deleted != 2 {
|
|
t.Fatalf("expected 2 deleted, got %d", deleted)
|
|
}
|
|
|
|
// doc-3 should still exist
|
|
_, err = dao.NewDocumentDAO().GetByID("doc-3")
|
|
if err != nil {
|
|
t.Fatal("doc-3 should not have been deleted")
|
|
}
|
|
}
|
|
|
|
func TestDeleteDocuments_WrongDataset(t *testing.T) {
|
|
db := setupServiceTestDB(t)
|
|
pushServiceDB(t, db)
|
|
insertUserTenantForAccessCheck(t, "user-1", "tenant-1")
|
|
insertUserTenantForAccessCheck(t, "user-1", "tenant-2")
|
|
|
|
insertTestKB(t, "kb-1", "tenant-1", 1, 10, 5)
|
|
insertTestKB(t, "kb-2", "tenant-2", 1, 10, 5)
|
|
insertTestDoc(t, "doc-1", "kb-2", 10, 5) // belongs to kb-2, not kb-1
|
|
|
|
svc := testDocumentService(t)
|
|
|
|
_, err := svc.DeleteDocuments([]string{"doc-1"}, false, "kb-1", "user-1")
|
|
if err == nil {
|
|
t.Fatal("expected error for doc not belonging to dataset")
|
|
}
|
|
}
|
|
|
|
func TestDeleteDocuments_NotAccessible(t *testing.T) {
|
|
db := setupServiceTestDB(t)
|
|
pushServiceDB(t, db)
|
|
|
|
insertTestKB(t, "kb-1", "tenant-1", 1, 10, 5)
|
|
|
|
svc := testDocumentService(t)
|
|
|
|
// user-1 has no user_tenant entry → accessible returns false
|
|
_, err := svc.DeleteDocuments([]string{"doc-1"}, false, "kb-1", "user-1")
|
|
if err == nil {
|
|
t.Fatal("expected error for inaccessible dataset")
|
|
}
|
|
}
|
|
|
|
func TestDeleteDocuments_EmptyIDs(t *testing.T) {
|
|
db := setupServiceTestDB(t)
|
|
pushServiceDB(t, db)
|
|
insertUserTenantForAccessCheck(t, "user-1", "tenant-1")
|
|
insertTestKB(t, "kb-1", "tenant-1", 0, 0, 0)
|
|
|
|
svc := testDocumentService(t)
|
|
|
|
// Empty ids, no deleteAll → returns 0, no error
|
|
deleted, err := svc.DeleteDocuments([]string{}, false, "kb-1", "user-1")
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
if deleted != 0 {
|
|
t.Fatalf("expected 0 deleted, got %d", deleted)
|
|
}
|
|
}
|
|
|
|
func TestDeleteDocuments_Deduplicate(t *testing.T) {
|
|
db := setupServiceTestDB(t)
|
|
pushServiceDB(t, db)
|
|
insertUserTenantForAccessCheck(t, "user-1", "tenant-1")
|
|
|
|
insertTestKB(t, "kb-1", "tenant-1", 1, 10, 5)
|
|
insertTestDoc(t, "doc-1", "kb-1", 10, 5)
|
|
|
|
svc := testDocumentService(t)
|
|
|
|
deleted, err := svc.DeleteDocuments([]string{"doc-1", "doc-1", "doc-1"}, false, "kb-1", "user-1")
|
|
if err != nil {
|
|
t.Fatalf("DeleteDocuments failed: %v", err)
|
|
}
|
|
// Dedup should result in only 1 delete
|
|
if deleted != 1 {
|
|
t.Fatalf("expected 1 deleted after dedup, got %d", deleted)
|
|
}
|
|
}
|
|
|
|
func TestDeleteDocument_DeligatesToFullCleanup(t *testing.T) {
|
|
db := setupServiceTestDB(t)
|
|
pushServiceDB(t, db)
|
|
|
|
insertTestKB(t, "kb-1", "tenant-1", 1, 5, 2)
|
|
insertTestDoc(t, "doc-1", "kb-1", 5, 2)
|
|
|
|
svc := testDocumentService(t)
|
|
|
|
// Public DeleteDocument should delegate to deleteDocumentFull
|
|
err := svc.DeleteDocument("doc-1")
|
|
if err != nil {
|
|
t.Fatalf("DeleteDocument failed: %v", err)
|
|
}
|
|
|
|
_, err = dao.NewDocumentDAO().GetByID("doc-1")
|
|
if err == nil {
|
|
t.Fatal("document should be deleted")
|
|
}
|
|
}
|