mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577)
## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -138,8 +138,12 @@ func main() {
|
||||
local.InitAdminStatus(1, "admin server not connected")
|
||||
|
||||
// Initialize tokenizer (rag_analyzer)
|
||||
dictPath := os.Getenv("RAGFLOW_DICT_PATH")
|
||||
if dictPath == "" {
|
||||
dictPath = "/usr/share/infinity/resource"
|
||||
}
|
||||
tokenizerCfg := &tokenizer.PoolConfig{
|
||||
DictPath: "/usr/share/infinity/resource",
|
||||
DictPath: dictPath,
|
||||
}
|
||||
if err := tokenizer.Init(tokenizerCfg); err != nil {
|
||||
common.Fatal("Failed to initialize tokenizer", zap.Error(err))
|
||||
|
||||
9
go.mod
9
go.mod
@@ -29,7 +29,7 @@ require (
|
||||
google.golang.org/protobuf v1.36.10
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
gorm.io/driver/mysql v1.5.2
|
||||
gorm.io/gorm v1.25.5
|
||||
gorm.io/gorm v1.25.7
|
||||
)
|
||||
|
||||
require (
|
||||
@@ -59,6 +59,8 @@ require (
|
||||
github.com/fsnotify/fsnotify v1.7.0 // indirect
|
||||
github.com/gabriel-vasile/mimetype v1.4.2 // indirect
|
||||
github.com/gin-contrib/sse v0.1.0 // indirect
|
||||
github.com/glebarez/go-sqlite v1.21.2 // indirect
|
||||
github.com/glebarez/sqlite v1.11.0 // indirect
|
||||
github.com/go-ini/ini v1.67.0 // indirect
|
||||
github.com/go-logr/logr v1.4.3 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
@@ -93,6 +95,7 @@ require (
|
||||
github.com/pelletier/go-toml/v2 v2.1.1 // indirect
|
||||
github.com/philhofer/fwd v1.2.0 // indirect
|
||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||
github.com/rs/xid v1.6.0 // indirect
|
||||
github.com/sagikazarmark/locafero v0.4.0 // indirect
|
||||
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
|
||||
@@ -122,6 +125,10 @@ require (
|
||||
golang.org/x/text v0.33.0 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect
|
||||
gopkg.in/ini.v1 v1.67.0 // indirect
|
||||
modernc.org/libc v1.22.5 // indirect
|
||||
modernc.org/mathutil v1.5.0 // indirect
|
||||
modernc.org/memory v1.5.0 // indirect
|
||||
modernc.org/sqlite v1.23.1 // indirect
|
||||
)
|
||||
|
||||
replace github.com/infiniflow/infinity-go-sdk => github.com/infiniflow/infinity/go v0.0.0-20260424025959-72028e662929
|
||||
|
||||
17
go.sum
17
go.sum
@@ -87,6 +87,10 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE
|
||||
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
|
||||
github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg=
|
||||
github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU=
|
||||
github.com/glebarez/go-sqlite v1.21.2 h1:3a6LFC4sKahUunAmynQKLZceZCOzUthkRkEAl9gAXWo=
|
||||
github.com/glebarez/go-sqlite v1.21.2/go.mod h1:sfxdZyhQjTM2Wry3gVYWaW072Ri1WMdWJi0k6+3382k=
|
||||
github.com/glebarez/sqlite v1.11.0 h1:wSG0irqzP6VurnMEpFGer5Li19RpIRi2qvQz++w0GMw=
|
||||
github.com/glebarez/sqlite v1.11.0/go.mod h1:h8/o8j5wiAsqSPoWELDUdJXhjAhsVliSn7bWZjOhrgQ=
|
||||
github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A=
|
||||
github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8=
|
||||
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
@@ -204,6 +208,9 @@ github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:Om
|
||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/redis/go-redis/v9 v9.18.0 h1:pMkxYPkEbMPwRdenAzUNyFNrDgHx9U+DrBabWNfSRQs=
|
||||
github.com/redis/go-redis/v9 v9.18.0/go.mod h1:k3ufPphLU5YXwNTUcCRXGxUoF1fqxnhFQmscfkCoDA0=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
||||
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
||||
github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU=
|
||||
@@ -375,6 +382,16 @@ gorm.io/driver/mysql v1.5.2/go.mod h1:pQLhh1Ut/WUAySdTHwBpBv6+JKcj+ua4ZFx1QQTBzb
|
||||
gorm.io/gorm v1.25.2-0.20230530020048-26663ab9bf55/go.mod h1:L4uxeKpfBml98NYqVqwAdmV1a2nBtAec/cf3fpucW/k=
|
||||
gorm.io/gorm v1.25.5 h1:zR9lOiiYf09VNh5Q1gphfyia1JpiClIWG9hQaxB/mls=
|
||||
gorm.io/gorm v1.25.5/go.mod h1:hbnx/Oo0ChWMn1BIhpy1oYozzpM15i4YPuHDmfYtwg8=
|
||||
gorm.io/gorm v1.25.7 h1:VsD6acwRjz2zFxGO50gPO6AkNs7KKnvfzUjHQhZDz/A=
|
||||
gorm.io/gorm v1.25.7/go.mod h1:hbnx/Oo0ChWMn1BIhpy1oYozzpM15i4YPuHDmfYtwg8=
|
||||
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
modernc.org/libc v1.22.5 h1:91BNch/e5B0uPbJFgqbxXuOnxBQjlS//icfQEGmvyjE=
|
||||
modernc.org/libc v1.22.5/go.mod h1:jj+Z7dTNX8fBScMVNRAYZ/jF91K8fdT2hYMThc3YjBY=
|
||||
modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ=
|
||||
modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E=
|
||||
modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds=
|
||||
modernc.org/memory v1.5.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU=
|
||||
modernc.org/sqlite v1.23.1 h1:nrSBg4aRQQwq59JpvGEQ15tNxoO5pX/kUjcRNwSAGQM=
|
||||
modernc.org/sqlite v1.23.1/go.mod h1:OrDj17Mggn6MhE+iPbBNf7RGKODDE9NFT0f3EwDzJqk=
|
||||
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
|
||||
|
||||
@@ -18,7 +18,7 @@ package rag_analyzer
|
||||
|
||||
/*
|
||||
#cgo CXXFLAGS: -std=c++20 -I${SRCDIR}/..
|
||||
#cgo linux LDFLAGS: ${SRCDIR}/../cpp/cmake-build-release/librag_tokenizer_c_api.a -lstdc++ -lm -lpthread /usr/lib/x86_64-linux-gnu/libpcre2-8.a
|
||||
#cgo linux LDFLAGS: ${SRCDIR}/../cpp/cmake-build-release/librag_tokenizer_c_api.a -lstdc++ -lm -lpthread -lpcre2-8
|
||||
// Apple Silicon: Homebrew installs to /opt/homebrew; Intel Macs keep /usr/local.
|
||||
#cgo darwin,arm64 LDFLAGS: ${SRCDIR}/../cpp/cmake-build-release/librag_tokenizer_c_api.a -lstdc++ -lm -lpthread /opt/homebrew/lib/libpcre2-8.a
|
||||
#cgo darwin,amd64 LDFLAGS: ${SRCDIR}/../cpp/cmake-build-release/librag_tokenizer_c_api.a -lstdc++ -lm -lpthread /usr/local/lib/libpcre2-8.a
|
||||
|
||||
@@ -206,5 +206,15 @@ func autoMigrateSafely(db *gorm.DB, model interface{}) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
if strings.Contains(errStr, "Error 1091") && strings.Contains(errStr, "Can't DROP") {
|
||||
common.Info("Index/column already dropped, skipping", zap.String("error", errStr))
|
||||
return nil
|
||||
}
|
||||
|
||||
if strings.Contains(errStr, "Error 1138") && strings.Contains(errStr, "Invalid use of NULL") {
|
||||
common.Info("NULL value in existing rows, skipping migration change", zap.String("error", errStr))
|
||||
return nil
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -70,3 +70,15 @@ func (dao *File2DocumentDAO) GetByFileID(fileID string) ([]*entity.File2Document
|
||||
func (dao *File2DocumentDAO) DeleteByFileID(fileID string) error {
|
||||
return DB.Unscoped().Where("file_id = ?", fileID).Delete(&entity.File2Document{}).Error
|
||||
}
|
||||
|
||||
// GetByDocumentID gets file2document mappings by document ID
|
||||
func (dao *File2DocumentDAO) GetByDocumentID(docID string) ([]*entity.File2Document, error) {
|
||||
var mappings []*entity.File2Document
|
||||
err := DB.Where("document_id = ?", docID).Find(&mappings).Error
|
||||
return mappings, err
|
||||
}
|
||||
|
||||
// DeleteByDocumentID deletes file2document mappings by document ID
|
||||
func (dao *File2DocumentDAO) DeleteByDocumentID(docID string) error {
|
||||
return DB.Unscoped().Where("document_id = ?", docID).Delete(&entity.File2Document{}).Error
|
||||
}
|
||||
|
||||
165
internal/dao/file2document_test.go
Normal file
165
internal/dao/file2document_test.go
Normal file
@@ -0,0 +1,165 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package dao
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/glebarez/sqlite"
|
||||
"gorm.io/gorm"
|
||||
|
||||
"ragflow/internal/entity"
|
||||
)
|
||||
|
||||
// setupTestDB initializes an in-memory SQLite database for DAO tests.
|
||||
func setupTestDB(t *testing.T) *gorm.DB {
|
||||
t.Helper()
|
||||
|
||||
db, err := gorm.Open(sqlite.Open(":memory:"), &gorm.Config{
|
||||
TranslateError: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to open sqlite: %v", err)
|
||||
}
|
||||
|
||||
// Migrate only the tables needed for file2document tests
|
||||
if err := db.AutoMigrate(&entity.File2Document{}); err != nil {
|
||||
t.Fatalf("failed to migrate: %v", err)
|
||||
}
|
||||
|
||||
return db
|
||||
}
|
||||
|
||||
// saveDB saves the test DB and restores the original after the test.
|
||||
func pushDB(t *testing.T, testDB *gorm.DB) {
|
||||
t.Helper()
|
||||
orig := DB
|
||||
DB = testDB
|
||||
t.Cleanup(func() {
|
||||
DB = orig
|
||||
})
|
||||
}
|
||||
|
||||
func testFile2Document(t *testing.T, fileID, docID string) *entity.File2Document {
|
||||
t.Helper()
|
||||
f2d := &entity.File2Document{
|
||||
ID: fileID + "_" + docID,
|
||||
FileID: &fileID,
|
||||
DocumentID: &docID,
|
||||
}
|
||||
if err := DB.Create(f2d).Error; err != nil {
|
||||
t.Fatalf("failed to create test record: %v", err)
|
||||
}
|
||||
return f2d
|
||||
}
|
||||
|
||||
func TestFile2DocumentDAO_GetByDocumentID(t *testing.T) {
|
||||
db := setupTestDB(t)
|
||||
pushDB(t, db)
|
||||
dao := NewFile2DocumentDAO()
|
||||
|
||||
f2d := testFile2Document(t, "file-1", "doc-1")
|
||||
|
||||
results, err := dao.GetByDocumentID("doc-1")
|
||||
if err != nil {
|
||||
t.Fatalf("GetByDocumentID failed: %v", err)
|
||||
}
|
||||
if len(results) != 1 {
|
||||
t.Fatalf("expected 1 result, got %d", len(results))
|
||||
}
|
||||
if *results[0].FileID != *f2d.FileID {
|
||||
t.Fatalf("file_id mismatch: expected %q, got %q", *f2d.FileID, *results[0].FileID)
|
||||
}
|
||||
if *results[0].DocumentID != *f2d.DocumentID {
|
||||
t.Fatalf("document_id mismatch: expected %q, got %q", *f2d.DocumentID, *results[0].DocumentID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFile2DocumentDAO_GetByDocumentID_NotFound(t *testing.T) {
|
||||
db := setupTestDB(t)
|
||||
pushDB(t, db)
|
||||
dao := NewFile2DocumentDAO()
|
||||
|
||||
results, err := dao.GetByDocumentID("nonexistent")
|
||||
if err != nil {
|
||||
t.Fatalf("GetByDocumentID failed: %v", err)
|
||||
}
|
||||
if len(results) != 0 {
|
||||
t.Fatalf("expected 0 results, got %d", len(results))
|
||||
}
|
||||
}
|
||||
|
||||
func TestFile2DocumentDAO_GetByDocumentID_MultipleResults(t *testing.T) {
|
||||
db := setupTestDB(t)
|
||||
pushDB(t, db)
|
||||
dao := NewFile2DocumentDAO()
|
||||
|
||||
testFile2Document(t, "file-1", "doc-shared")
|
||||
testFile2Document(t, "file-2", "doc-shared")
|
||||
|
||||
results, err := dao.GetByDocumentID("doc-shared")
|
||||
if err != nil {
|
||||
t.Fatalf("GetByDocumentID failed: %v", err)
|
||||
}
|
||||
if len(results) != 2 {
|
||||
t.Fatalf("expected 2 results, got %d", len(results))
|
||||
}
|
||||
}
|
||||
|
||||
func TestFile2DocumentDAO_DeleteByDocumentID(t *testing.T) {
|
||||
db := setupTestDB(t)
|
||||
pushDB(t, db)
|
||||
dao := NewFile2DocumentDAO()
|
||||
|
||||
testFile2Document(t, "file-1", "doc-del")
|
||||
testFile2Document(t, "file-2", "doc-del")
|
||||
testFile2Document(t, "file-3", "doc-keep")
|
||||
|
||||
err := dao.DeleteByDocumentID("doc-del")
|
||||
if err != nil {
|
||||
t.Fatalf("DeleteByDocumentID failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify deleted records are gone
|
||||
results, err := dao.GetByDocumentID("doc-del")
|
||||
if err != nil {
|
||||
t.Fatalf("GetByDocumentID failed: %v", err)
|
||||
}
|
||||
if len(results) != 0 {
|
||||
t.Fatalf("expected 0 results after delete, got %d", len(results))
|
||||
}
|
||||
|
||||
// Verify other document's records are untouched
|
||||
results, err = dao.GetByDocumentID("doc-keep")
|
||||
if err != nil {
|
||||
t.Fatalf("GetByDocumentID failed: %v", err)
|
||||
}
|
||||
if len(results) != 1 {
|
||||
t.Fatalf("expected 1 untouched result, got %d", len(results))
|
||||
}
|
||||
}
|
||||
|
||||
func TestFile2DocumentDAO_DeleteByDocumentID_Noop(t *testing.T) {
|
||||
db := setupTestDB(t)
|
||||
pushDB(t, db)
|
||||
dao := NewFile2DocumentDAO()
|
||||
|
||||
err := dao.DeleteByDocumentID("nonexistent")
|
||||
if err != nil {
|
||||
t.Fatalf("DeleteByDocumentID should not error on missing: %v", err)
|
||||
}
|
||||
}
|
||||
121
internal/dao/kb_test.go
Normal file
121
internal/dao/kb_test.go
Normal file
@@ -0,0 +1,121 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package dao
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/glebarez/sqlite"
|
||||
"gorm.io/gorm"
|
||||
|
||||
"ragflow/internal/entity"
|
||||
)
|
||||
|
||||
// setupKBTestDB initializes an in-memory SQLite database for KB DAO tests.
|
||||
func setupKBTestDB(t *testing.T) *gorm.DB {
|
||||
t.Helper()
|
||||
|
||||
db, err := gorm.Open(sqlite.Open(":memory:"), &gorm.Config{
|
||||
TranslateError: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to open sqlite: %v", err)
|
||||
}
|
||||
|
||||
// Migrate knowledgebase table
|
||||
if err := db.AutoMigrate(&entity.Knowledgebase{}); err != nil {
|
||||
t.Fatalf("failed to migrate: %v", err)
|
||||
}
|
||||
|
||||
return db
|
||||
}
|
||||
|
||||
func testKnowledgebase(t *testing.T, db *gorm.DB, id string, docNum, tokenNum, chunkNum int64) *entity.Knowledgebase {
|
||||
t.Helper()
|
||||
kb := &entity.Knowledgebase{
|
||||
ID: id,
|
||||
TenantID: "tenant-1",
|
||||
Name: "test-kb-" + id,
|
||||
EmbdID: "embd-1",
|
||||
DocNum: docNum,
|
||||
TokenNum: tokenNum,
|
||||
ChunkNum: chunkNum,
|
||||
Status: stringPtr(string(entity.StatusValid)),
|
||||
}
|
||||
if err := db.Create(kb).Error; err != nil {
|
||||
t.Fatalf("failed to create test kb: %v", err)
|
||||
}
|
||||
return kb
|
||||
}
|
||||
|
||||
func stringPtr(s string) *string {
|
||||
return &s
|
||||
}
|
||||
|
||||
func TestKnowledgebaseDAO_DecreaseDocumentNum(t *testing.T) {
|
||||
db := setupKBTestDB(t)
|
||||
pushDB(t, db)
|
||||
dao := NewKnowledgebaseDAO()
|
||||
|
||||
testKnowledgebase(t, db, "kb-1", 5, 100, 50)
|
||||
|
||||
err := dao.DecreaseDocumentNum("kb-1", 1, 20, 10)
|
||||
if err != nil {
|
||||
t.Fatalf("DecreaseDocumentNum failed: %v", err)
|
||||
}
|
||||
|
||||
kb, err := dao.GetByID("kb-1")
|
||||
if err != nil {
|
||||
t.Fatalf("GetByID failed: %v", err)
|
||||
}
|
||||
if kb.DocNum != 4 {
|
||||
t.Fatalf("doc_num: expected 4, got %d", kb.DocNum)
|
||||
}
|
||||
if kb.TokenNum != 90 {
|
||||
t.Fatalf("token_num: expected 90, got %d", kb.TokenNum)
|
||||
}
|
||||
if kb.ChunkNum != 30 {
|
||||
t.Fatalf("chunk_num: expected 30, got %d", kb.ChunkNum)
|
||||
}
|
||||
}
|
||||
|
||||
func TestKnowledgebaseDAO_DecreaseDocumentNum_ZeroDecrement(t *testing.T) {
|
||||
db := setupKBTestDB(t)
|
||||
pushDB(t, db)
|
||||
dao := NewKnowledgebaseDAO()
|
||||
|
||||
testKnowledgebase(t, db, "kb-2", 3, 60, 15)
|
||||
|
||||
err := dao.DecreaseDocumentNum("kb-2", 0, 0, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("DecreaseDocumentNum failed: %v", err)
|
||||
}
|
||||
|
||||
kb, err := dao.GetByID("kb-2")
|
||||
if err != nil {
|
||||
t.Fatalf("GetByID failed: %v", err)
|
||||
}
|
||||
if kb.DocNum != 3 {
|
||||
t.Fatalf("doc_num should be unchanged: expected 3, got %d", kb.DocNum)
|
||||
}
|
||||
if kb.TokenNum != 60 {
|
||||
t.Fatalf("token_num should be unchanged: expected 60, got %d", kb.TokenNum)
|
||||
}
|
||||
if kb.ChunkNum != 15 {
|
||||
t.Fatalf("chunk_num should be unchanged: expected 15, got %d", kb.ChunkNum)
|
||||
}
|
||||
}
|
||||
@@ -36,9 +36,29 @@ import (
|
||||
|
||||
var IMG_BASE64_PREFIX = "data:image/png;base64,"
|
||||
|
||||
// documentServiceIface defines the DocumentService methods used by DocumentHandler.
|
||||
type documentServiceIface interface {
|
||||
CreateDocument(req *service.CreateDocumentRequest) (*entity.Document, error)
|
||||
GetDocumentByID(id string) (*service.DocumentResponse, error)
|
||||
UpdateDocument(id string, req *service.UpdateDocumentRequest) error
|
||||
DeleteDocument(id string) error
|
||||
DeleteDocuments(ids []string, deleteAll bool, datasetID, userID string) (int, error)
|
||||
ParseDocuments(datasetID, userID string, docIDs []string) ([]*service.ParseDocumentResponse, error)
|
||||
ListDocuments(page, pageSize int) ([]*service.DocumentResponse, int64, error)
|
||||
ListDocumentsByDatasetID(kbID string, page, pageSize int) ([]*entity.DocumentListItem, int64, error)
|
||||
GetDocumentsByAuthorID(authorID, page, pageSize int) ([]*service.DocumentResponse, int64, error)
|
||||
GetThumbnail(docID string) (*service.ThumbnailResponse, error)
|
||||
GetDocumentImage(imageID string) ([]byte, error)
|
||||
GetMetadataSummary(kbID string, docIDs []string) (map[string]interface{}, error)
|
||||
SetDocumentMetadata(docID string, meta map[string]interface{}) error
|
||||
DeleteDocumentMetadata(docID string, keys []string) error
|
||||
DeleteDocumentAllMetadata(docID string) error
|
||||
GetDocumentMetadataByID(docID string) (map[string]interface{}, error)
|
||||
}
|
||||
|
||||
// DocumentHandler document handler
|
||||
type DocumentHandler struct {
|
||||
documentService *service.DocumentService
|
||||
documentService documentServiceIface
|
||||
datasetService *service.DatasetService
|
||||
}
|
||||
|
||||
@@ -258,6 +278,54 @@ func (h *DocumentHandler) DeleteDocument(c *gin.Context) {
|
||||
})
|
||||
}
|
||||
|
||||
// DeleteDocuments handles DELETE /api/v1/datasets/:dataset_id/documents
|
||||
func (h *DocumentHandler) DeleteDocuments(c *gin.Context) {
|
||||
_, errorCode, errorMessage := GetUser(c)
|
||||
if errorCode != common.CodeSuccess {
|
||||
jsonError(c, errorCode, errorMessage)
|
||||
return
|
||||
}
|
||||
|
||||
datasetID := c.Param("dataset_id")
|
||||
if datasetID == "" {
|
||||
jsonError(c, common.CodeArgumentError, "dataset_id is required")
|
||||
return
|
||||
}
|
||||
|
||||
var req struct {
|
||||
IDs *[]string `json:"ids"`
|
||||
DeleteAll bool `json:"delete_all,omitempty"`
|
||||
}
|
||||
if c.Request.ContentLength > 0 {
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
jsonError(c, common.CodeDataError, err.Error())
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
var ids []string
|
||||
if req.IDs != nil {
|
||||
ids = *req.IDs
|
||||
}
|
||||
if len(ids) > 0 && req.DeleteAll {
|
||||
jsonError(c, common.CodeArgumentError, "should not provide both ids and delete_all")
|
||||
return
|
||||
}
|
||||
if len(ids) == 0 && !req.DeleteAll {
|
||||
jsonError(c, common.CodeArgumentError, "should either provide doc ids or set delete_all(true)")
|
||||
return
|
||||
}
|
||||
|
||||
userID := c.GetString("user_id")
|
||||
deleted, err := h.documentService.DeleteDocuments(ids, req.DeleteAll, datasetID, userID)
|
||||
if err != nil {
|
||||
jsonError(c, common.CodeDataError, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
jsonResponse(c, common.CodeSuccess, map[string]interface{}{"deleted": deleted}, "success")
|
||||
}
|
||||
|
||||
// ListDocuments document list
|
||||
|
||||
func (h *DocumentHandler) ListDocuments(c *gin.Context) {
|
||||
|
||||
246
internal/handler/document_test.go
Normal file
246
internal/handler/document_test.go
Normal file
@@ -0,0 +1,246 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package handler
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"ragflow/internal/common"
|
||||
"ragflow/internal/entity"
|
||||
"ragflow/internal/service"
|
||||
)
|
||||
|
||||
// fakeDocumentService implements documentServiceIface for handler tests.
|
||||
type fakeDocumentService struct {
|
||||
deleted int
|
||||
err error
|
||||
}
|
||||
|
||||
func (f *fakeDocumentService) CreateDocument(req *service.CreateDocumentRequest) (*entity.Document, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (f *fakeDocumentService) GetDocumentByID(id string) (*service.DocumentResponse, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (f *fakeDocumentService) UpdateDocument(id string, req *service.UpdateDocumentRequest) error {
|
||||
return nil
|
||||
}
|
||||
func (f *fakeDocumentService) DeleteDocument(id string) error {
|
||||
return nil
|
||||
}
|
||||
func (f *fakeDocumentService) DeleteDocuments(ids []string, deleteAll bool, datasetID, userID string) (int, error) {
|
||||
return f.deleted, f.err
|
||||
}
|
||||
func (f *fakeDocumentService) ParseDocuments(datasetID, userID string, docIDs []string) ([]*service.ParseDocumentResponse, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (f *fakeDocumentService) ListDocuments(page, pageSize int) ([]*service.DocumentResponse, int64, error) {
|
||||
return nil, 0, nil
|
||||
}
|
||||
func (f *fakeDocumentService) ListDocumentsByDatasetID(kbID string, page, pageSize int) ([]*entity.DocumentListItem, int64, error) {
|
||||
return nil, 0, nil
|
||||
}
|
||||
func (f *fakeDocumentService) GetThumbnail(docID string) (*service.ThumbnailResponse, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (f *fakeDocumentService) GetDocumentImage(imageID string) ([]byte, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (f *fakeDocumentService) GetDocumentsByAuthorID(authorID, page, pageSize int) ([]*service.DocumentResponse, int64, error) {
|
||||
return nil, 0, nil
|
||||
}
|
||||
func (f *fakeDocumentService) GetMetadataSummary(kbID string, docIDs []string) (map[string]interface{}, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (f *fakeDocumentService) SetDocumentMetadata(docID string, meta map[string]interface{}) error {
|
||||
return nil
|
||||
}
|
||||
func (f *fakeDocumentService) DeleteDocumentMetadata(docID string, keys []string) error {
|
||||
return nil
|
||||
}
|
||||
func (f *fakeDocumentService) DeleteDocumentAllMetadata(docID string) error {
|
||||
return nil
|
||||
}
|
||||
func (f *fakeDocumentService) GetDocumentMetadataByID(docID string) (map[string]interface{}, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func setupGinContextWithUser(method, path, body string) (*gin.Context, *httptest.ResponseRecorder) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
req := httptest.NewRequest(method, path, strings.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = req
|
||||
c.Set("user", &entity.User{ID: "user-1"})
|
||||
c.Set("user_id", "user-1")
|
||||
return c, w
|
||||
}
|
||||
|
||||
func TestDeleteDocumentsHandler_Success(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
fake := &fakeDocumentService{deleted: 3}
|
||||
h := &DocumentHandler{
|
||||
documentService: fake,
|
||||
datasetService: service.NewDatasetService(),
|
||||
}
|
||||
|
||||
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{"ids": ["doc-1", "doc-2", "doc-3"]}`)
|
||||
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
|
||||
|
||||
h.DeleteDocuments(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
var resp map[string]interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if resp["code"] != float64(common.CodeSuccess) {
|
||||
t.Fatalf("expected code 0, got %v", resp["code"])
|
||||
}
|
||||
data := resp["data"].(map[string]interface{})
|
||||
if data["deleted"] != float64(3) {
|
||||
t.Fatalf("expected deleted=3, got %v", data["deleted"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteDocumentsHandler_DeleteAll(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
fake := &fakeDocumentService{deleted: 5}
|
||||
h := &DocumentHandler{
|
||||
documentService: fake,
|
||||
datasetService: service.NewDatasetService(),
|
||||
}
|
||||
|
||||
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{"delete_all": true}`)
|
||||
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
|
||||
|
||||
h.DeleteDocuments(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteDocumentsHandler_MutuallyExclusive(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
fake := &fakeDocumentService{}
|
||||
h := &DocumentHandler{
|
||||
documentService: fake,
|
||||
datasetService: service.NewDatasetService(),
|
||||
}
|
||||
|
||||
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{"ids": ["doc-1"], "delete_all": true}`)
|
||||
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
|
||||
|
||||
h.DeleteDocuments(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
code, _ := resp["code"].(float64)
|
||||
if code == float64(common.CodeSuccess) {
|
||||
t.Fatal("expected error for mutually exclusive ids+delete_all")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteDocumentsHandler_NoIDsNoDeleteAll(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
fake := &fakeDocumentService{}
|
||||
h := &DocumentHandler{
|
||||
documentService: fake,
|
||||
datasetService: service.NewDatasetService(),
|
||||
}
|
||||
|
||||
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{}`)
|
||||
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
|
||||
|
||||
h.DeleteDocuments(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
code, _ := resp["code"].(float64)
|
||||
if code == float64(common.CodeSuccess) {
|
||||
t.Fatal("expected error for no ids and no delete_all")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteDocumentsHandler_ServiceError(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
fake := &fakeDocumentService{err: fmt.Errorf("permission denied")}
|
||||
h := &DocumentHandler{
|
||||
documentService: fake,
|
||||
datasetService: service.NewDatasetService(),
|
||||
}
|
||||
|
||||
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets/ds-1/documents", `{"ids": ["doc-1"]}`)
|
||||
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}}
|
||||
|
||||
h.DeleteDocuments(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
code, _ := resp["code"].(float64)
|
||||
if code == float64(common.CodeSuccess) {
|
||||
t.Fatal("expected error code")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteDocumentsHandler_MissingDatasetID(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
fake := &fakeDocumentService{}
|
||||
h := &DocumentHandler{
|
||||
documentService: fake,
|
||||
datasetService: service.NewDatasetService(),
|
||||
}
|
||||
|
||||
c, w := setupGinContextWithUser("DELETE", "/api/v1/datasets//documents", `{"ids": ["doc-1"]}`)
|
||||
|
||||
h.DeleteDocuments(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
code, _ := resp["code"].(float64)
|
||||
if code == float64(common.CodeSuccess) {
|
||||
t.Fatal("expected error for missing dataset_id")
|
||||
}
|
||||
}
|
||||
@@ -229,6 +229,7 @@ func (r *Router) Setup(engine *gin.Engine) {
|
||||
|
||||
// Dataset documents
|
||||
datasets.GET("/:dataset_id/documents", r.documentHandler.ListDocuments)
|
||||
datasets.DELETE("/:dataset_id/documents", r.documentHandler.DeleteDocuments)
|
||||
|
||||
// Dataset document chunk
|
||||
datasets.GET("/:dataset_id/documents/:document_id/chunks/:chunk_id", r.chunkHandler.Get)
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"ragflow/internal/common"
|
||||
@@ -37,13 +38,15 @@ import (
|
||||
|
||||
// DocumentService document service
|
||||
type DocumentService struct {
|
||||
documentDAO *dao.DocumentDAO
|
||||
kbDAO *dao.KnowledgebaseDAO
|
||||
ingestionTaskDAO *dao.IngestionDAO
|
||||
ingestionLogDAO *dao.IngestionLogDAO
|
||||
docEngine engine.DocEngine
|
||||
engineType server.EngineType
|
||||
metadataSvc *MetadataService
|
||||
documentDAO *dao.DocumentDAO
|
||||
kbDAO *dao.KnowledgebaseDAO
|
||||
ingestionTaskDAO *dao.IngestionDAO
|
||||
ingestionLogDAO *dao.IngestionLogDAO
|
||||
docEngine engine.DocEngine
|
||||
engineType server.EngineType
|
||||
metadataSvc *MetadataService
|
||||
taskDAO *dao.TaskDAO
|
||||
file2DocumentDAO *dao.File2DocumentDAO
|
||||
}
|
||||
|
||||
// NewDocumentService create document service
|
||||
@@ -57,6 +60,8 @@ func NewDocumentService() *DocumentService {
|
||||
docEngine: engine.Get(),
|
||||
engineType: cfg.DocEngine.Type,
|
||||
metadataSvc: NewMetadataService(),
|
||||
taskDAO: dao.NewTaskDAO(),
|
||||
file2DocumentDAO: dao.NewFile2DocumentDAO(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -184,9 +189,157 @@ func (s *DocumentService) UpdateDocument(id string, req *UpdateDocumentRequest)
|
||||
return s.documentDAO.Update(document)
|
||||
}
|
||||
|
||||
// DeleteDocument delete document
|
||||
// DeleteDocument delete document — delegates to full cleanup logic.
|
||||
func (s *DocumentService) DeleteDocument(id string) error {
|
||||
return s.documentDAO.Delete(id)
|
||||
return s.deleteDocumentFull(id)
|
||||
}
|
||||
|
||||
// DeleteDocuments deletes multiple documents under a dataset.
|
||||
//
|
||||
// ids: specific document IDs; deleteAll: delete all docs in the dataset.
|
||||
// Returns the number of successfully deleted documents.
|
||||
func (s *DocumentService) DeleteDocuments(ids []string, deleteAll bool, datasetID, userID string) (int, error) {
|
||||
// 1. Check dataset is accessible by the user
|
||||
if !s.kbDAO.Accessible(datasetID, userID) {
|
||||
return 0, fmt.Errorf("You don't own the dataset %s.", datasetID)
|
||||
}
|
||||
|
||||
// 2. Resolve document IDs
|
||||
if deleteAll {
|
||||
if err := dao.DB.Model(&entity.Document{}).
|
||||
Where("kb_id = ?", datasetID).
|
||||
Pluck("id", &ids).Error; err != nil {
|
||||
return 0, fmt.Errorf("failed to query documents: %w", err)
|
||||
}
|
||||
}
|
||||
if len(ids) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// 3. Deduplicate (before validation so dup count doesn't matter)
|
||||
ids = common.Deduplicate(ids)
|
||||
|
||||
// 4. Validate IDs belong to this dataset (only for explicit ids; deleteAll is already scoped)
|
||||
if !deleteAll {
|
||||
docs, err := s.documentDAO.GetByIDs(ids)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to fetch documents: %w", err)
|
||||
}
|
||||
if len(docs) != len(ids) {
|
||||
return 0, fmt.Errorf("some document IDs not found in dataset %s", datasetID)
|
||||
}
|
||||
var invalid []string
|
||||
for _, d := range docs {
|
||||
if d.KbID != datasetID {
|
||||
invalid = append(invalid, d.ID)
|
||||
}
|
||||
}
|
||||
if len(invalid) > 0 {
|
||||
return 0, fmt.Errorf("These documents do not belong to dataset %s: %v", datasetID, invalid)
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Delete each document (non-critical failures are tolerated per doc)
|
||||
deleted := 0
|
||||
for _, docID := range ids {
|
||||
if err := s.deleteDocumentFull(docID); err != nil {
|
||||
common.Logger.Warn(fmt.Sprintf("DeleteDocuments: failed to delete %s: %v", docID, err))
|
||||
continue
|
||||
}
|
||||
deleted++
|
||||
}
|
||||
|
||||
return deleted, nil
|
||||
}
|
||||
|
||||
// deleteDocumentFull performs full document cleanup:
|
||||
// 1. Delete tasks from DB
|
||||
// 2. Delete chunks from document engine
|
||||
// 3. Delete metadata from document engine
|
||||
// 4. Hard-delete document row + decrement KB counters
|
||||
// 5. Delete file2document mapping + file record + storage blob
|
||||
//
|
||||
// Non-critical failures are tolerated (logged and continue).
|
||||
func (s *DocumentService) deleteDocumentFull(docID string) error {
|
||||
doc, err := s.documentDAO.GetByID(docID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("document not found: %w", err)
|
||||
}
|
||||
kbID := doc.KbID
|
||||
tokenNum := doc.TokenNum
|
||||
chunkIDNum := doc.ChunkNum
|
||||
|
||||
// Resolve tenant ID for engine index name
|
||||
kb, err := s.kbDAO.GetByID(kbID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("knowledgebase not found: %w", err)
|
||||
}
|
||||
tenantID := kb.TenantID
|
||||
|
||||
// 1. Delete tasks from DB
|
||||
if _, delErr := s.taskDAO.DeleteByDocIDs([]string{docID}); delErr != nil {
|
||||
common.Logger.Warn(fmt.Sprintf("deleteDocumentFull: failed to delete tasks for %s: %v", docID, delErr))
|
||||
}
|
||||
|
||||
// 2. Delete chunks from document engine
|
||||
if s.docEngine != nil {
|
||||
ctx := context.Background()
|
||||
indexName := fmt.Sprintf("ragflow_%s", tenantID)
|
||||
if _, delErr := s.docEngine.DeleteChunks(ctx, map[string]interface{}{"doc_id": docID}, indexName, kbID); delErr != nil {
|
||||
common.Logger.Warn(fmt.Sprintf("deleteDocumentFull: failed to delete chunks for %s: %v", docID, delErr))
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Delete metadata from document engine (skip if engine not available)
|
||||
if s.docEngine != nil && s.metadataSvc != nil {
|
||||
_ = s.DeleteDocumentAllMetadata(docID) // logs internally
|
||||
}
|
||||
|
||||
// 4. Hard-delete document + decrement KB counters
|
||||
if delErr := s.documentDAO.Delete(docID); delErr != nil {
|
||||
return fmt.Errorf("failed to delete document %s: %w", docID, delErr)
|
||||
}
|
||||
if decErr := s.kbDAO.DecreaseDocumentNum(kbID, 1, chunkIDNum, tokenNum); decErr != nil {
|
||||
common.Logger.Warn(fmt.Sprintf("deleteDocumentFull: failed to decrement KB counters for %s: %v", kbID, decErr))
|
||||
}
|
||||
|
||||
// 5. Clean up file2document mapping, file record, and storage blob
|
||||
mappings, mapErr := s.file2DocumentDAO.GetByDocumentID(docID)
|
||||
if mapErr != nil {
|
||||
common.Logger.Warn(fmt.Sprintf("deleteDocumentFull: failed to get file2document mappings for %s: %v", docID, mapErr))
|
||||
}
|
||||
for _, m := range mappings {
|
||||
if m.FileID == nil {
|
||||
continue
|
||||
}
|
||||
fileID := *m.FileID
|
||||
// Delete the mapping
|
||||
if delErr := s.file2DocumentDAO.DeleteByDocumentID(docID); delErr != nil {
|
||||
common.Logger.Warn(fmt.Sprintf("deleteDocumentFull: failed to delete f2d mapping for %s: %v", docID, delErr))
|
||||
}
|
||||
// Get file to remove storage blob
|
||||
fileDAO := dao.NewFileDAO()
|
||||
file, fErr := fileDAO.GetByID(fileID)
|
||||
if fErr != nil || file == nil {
|
||||
common.Logger.Warn(fmt.Sprintf("deleteDocumentFull: file not found %s: %v", fileID, fErr))
|
||||
continue
|
||||
}
|
||||
// Delete file record
|
||||
if _, delErr := fileDAO.DeleteByIDs([]string{fileID}); delErr != nil {
|
||||
common.Logger.Warn(fmt.Sprintf("deleteDocumentFull: failed to delete file %s: %v", fileID, delErr))
|
||||
}
|
||||
// Delete storage blob
|
||||
if file.Location != nil && *file.Location != "" {
|
||||
storageImpl := storage.GetStorageFactory().GetStorage()
|
||||
if storageImpl != nil {
|
||||
if rmErr := storageImpl.Remove(file.ParentID, *file.Location); rmErr != nil {
|
||||
common.Logger.Warn(fmt.Sprintf("deleteDocumentFull: failed to remove blob %s/%s: %v", file.ParentID, *file.Location, rmErr))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ListDocuments list documents
|
||||
|
||||
428
internal/service/document_test.go
Normal file
428
internal/service/document_test.go
Normal file
@@ -0,0 +1,428 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package service
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/glebarez/sqlite"
|
||||
"gorm.io/gorm"
|
||||
|
||||
"ragflow/internal/dao"
|
||||
"ragflow/internal/entity"
|
||||
)
|
||||
|
||||
// setupServiceTestDB initializes an in-memory SQLite database for service tests.
|
||||
func setupServiceTestDB(t *testing.T) *gorm.DB {
|
||||
t.Helper()
|
||||
|
||||
db, err := gorm.Open(sqlite.Open(":memory:"), &gorm.Config{
|
||||
TranslateError: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to open sqlite: %v", err)
|
||||
}
|
||||
|
||||
// Migrate tables used by deleteDocumentFull + DeleteDocuments
|
||||
if err := db.AutoMigrate(
|
||||
&entity.Document{},
|
||||
&entity.Knowledgebase{},
|
||||
&entity.Task{},
|
||||
&entity.File2Document{},
|
||||
&entity.File{},
|
||||
&entity.User{},
|
||||
&entity.Tenant{},
|
||||
&entity.UserTenant{},
|
||||
); err != nil {
|
||||
t.Fatalf("failed to migrate: %v", err)
|
||||
}
|
||||
|
||||
return db
|
||||
}
|
||||
|
||||
// pushServiceDB swaps dao.DB for the test and restores after.
|
||||
func pushServiceDB(t *testing.T, testDB *gorm.DB) {
|
||||
t.Helper()
|
||||
orig := dao.DB
|
||||
dao.DB = testDB
|
||||
t.Cleanup(func() {
|
||||
dao.DB = orig
|
||||
})
|
||||
}
|
||||
|
||||
func testDocumentService(t *testing.T) *DocumentService {
|
||||
t.Helper()
|
||||
// Use nil engine since we test DB cleanup only; engine ops are nil-guarded.
|
||||
return &DocumentService{
|
||||
documentDAO: dao.NewDocumentDAO(),
|
||||
kbDAO: dao.NewKnowledgebaseDAO(),
|
||||
taskDAO: dao.NewTaskDAO(),
|
||||
file2DocumentDAO: dao.NewFile2DocumentDAO(),
|
||||
docEngine: nil,
|
||||
metadataSvc: nil, // nil engine → metadata ops skipped
|
||||
}
|
||||
}
|
||||
|
||||
// sptr returns a pointer to the given string.
|
||||
func sptr(s string) *string { return &s }
|
||||
|
||||
func insertTestKB(t *testing.T, id, tenantID string, docNum, tokenNum, chunkNum int64) {
|
||||
t.Helper()
|
||||
kb := &entity.Knowledgebase{
|
||||
ID: id,
|
||||
TenantID: tenantID,
|
||||
Name: "test-kb",
|
||||
EmbdID: "embd-1",
|
||||
CreatedBy: "user-1",
|
||||
Permission: string(entity.TenantPermissionMe),
|
||||
DocNum: docNum,
|
||||
TokenNum: tokenNum,
|
||||
ChunkNum: chunkNum,
|
||||
Status: sptr(string(entity.StatusValid)),
|
||||
}
|
||||
if err := dao.DB.Create(kb).Error; err != nil {
|
||||
t.Fatalf("insert test kb: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func insertTestDoc(t *testing.T, id, kbID string, tokenNum, chunkNum int64) {
|
||||
t.Helper()
|
||||
doc := &entity.Document{
|
||||
ID: id,
|
||||
KbID: kbID,
|
||||
ParserID: "naive",
|
||||
ParserConfig: entity.JSONMap{},
|
||||
TokenNum: tokenNum,
|
||||
ChunkNum: chunkNum,
|
||||
Suffix: ".txt",
|
||||
Status: sptr("1"),
|
||||
}
|
||||
if err := dao.DB.Create(doc).Error; err != nil {
|
||||
t.Fatalf("insert test doc: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func insertTestTask(t *testing.T, id, docID string) {
|
||||
t.Helper()
|
||||
task := &entity.Task{
|
||||
ID: id,
|
||||
DocID: docID,
|
||||
}
|
||||
if err := dao.DB.Create(task).Error; err != nil {
|
||||
t.Fatalf("insert test task: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func insertTestFile2Document(t *testing.T, id, fileID, docID string) {
|
||||
t.Helper()
|
||||
f2d := &entity.File2Document{
|
||||
ID: id,
|
||||
FileID: &fileID,
|
||||
DocumentID: &docID,
|
||||
}
|
||||
if err := dao.DB.Create(f2d).Error; err != nil {
|
||||
t.Fatalf("insert test f2d: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func insertTestFile(t *testing.T, id, parentID, name string, location *string) {
|
||||
t.Helper()
|
||||
srcType := string(entity.FileSourceKnowledgebase)
|
||||
f := &entity.File{
|
||||
ID: id,
|
||||
ParentID: parentID,
|
||||
TenantID: "tenant-1",
|
||||
CreatedBy: "user-1",
|
||||
Name: name,
|
||||
Location: location,
|
||||
SourceType: srcType,
|
||||
Type: "pdf",
|
||||
}
|
||||
if err := dao.DB.Create(f).Error; err != nil {
|
||||
t.Fatalf("insert test file: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteDocumentFull_Basic(t *testing.T) {
|
||||
db := setupServiceTestDB(t)
|
||||
pushServiceDB(t, db)
|
||||
|
||||
insertTestKB(t, "kb-1", "tenant-1", 3, 100, 50)
|
||||
insertTestDoc(t, "doc-1", "kb-1", 30, 10)
|
||||
insertTestTask(t, "task-1", "doc-1")
|
||||
|
||||
svc := testDocumentService(t)
|
||||
|
||||
err := svc.deleteDocumentFull("doc-1")
|
||||
if err != nil {
|
||||
t.Fatalf("deleteDocumentFull failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify document deleted
|
||||
_, err = dao.NewDocumentDAO().GetByID("doc-1")
|
||||
if err == nil {
|
||||
t.Fatal("document should be deleted but it still exists")
|
||||
}
|
||||
|
||||
// Verify tasks deleted
|
||||
tasks, _ := dao.NewTaskDAO().GetAllTasks()
|
||||
if len(tasks) != 0 {
|
||||
t.Fatalf("expected 0 tasks, got %d", len(tasks))
|
||||
}
|
||||
|
||||
// Verify KB counters decremented
|
||||
kb, err := dao.NewKnowledgebaseDAO().GetByID("kb-1")
|
||||
if err != nil {
|
||||
t.Fatalf("kb not found: %v", err)
|
||||
}
|
||||
if kb.DocNum != 2 {
|
||||
t.Fatalf("doc_num: expected 2, got %d", kb.DocNum)
|
||||
}
|
||||
if kb.TokenNum != 70 {
|
||||
t.Fatalf("token_num: expected 70, got %d", kb.TokenNum)
|
||||
}
|
||||
if kb.ChunkNum != 40 {
|
||||
t.Fatalf("chunk_num: expected 40, got %d", kb.ChunkNum)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteDocumentFull_NotFound(t *testing.T) {
|
||||
db := setupServiceTestDB(t)
|
||||
pushServiceDB(t, db)
|
||||
|
||||
svc := testDocumentService(t)
|
||||
|
||||
err := svc.deleteDocumentFull("nonexistent")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for nonexistent document")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteDocumentFull_CleansUpFile2Document(t *testing.T) {
|
||||
db := setupServiceTestDB(t)
|
||||
pushServiceDB(t, db)
|
||||
|
||||
insertTestKB(t, "kb-1", "tenant-1", 1, 10, 5)
|
||||
insertTestDoc(t, "doc-1", "kb-1", 10, 5)
|
||||
loc := "path/to/blob"
|
||||
insertTestFile(t, "file-1", "kb-1", "test.pdf", &loc)
|
||||
insertTestFile2Document(t, "f2d-1", "file-1", "doc-1")
|
||||
|
||||
svc := testDocumentService(t)
|
||||
|
||||
err := svc.deleteDocumentFull("doc-1")
|
||||
if err != nil {
|
||||
t.Fatalf("deleteDocumentFull failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify f2d mapping deleted
|
||||
f2dDAO := dao.NewFile2DocumentDAO()
|
||||
mappings, _ := f2dDAO.GetByDocumentID("doc-1")
|
||||
if len(mappings) != 0 {
|
||||
t.Fatalf("expected 0 f2d mappings, got %d", len(mappings))
|
||||
}
|
||||
|
||||
// Verify file record deleted (hard delete)
|
||||
files, _ := dao.NewFileDAO().GetByIDs([]string{"file-1"})
|
||||
if len(files) != 0 {
|
||||
t.Fatalf("expected 0 files, got %d", len(files))
|
||||
}
|
||||
}
|
||||
|
||||
func insertUserTenantForAccessCheck(t *testing.T, userID, tenantID string) {
|
||||
t.Helper()
|
||||
// Insert user if not exists (email is NOT NULL, password is nullable pointer)
|
||||
var existingUser entity.User
|
||||
if err := dao.DB.Where("id = ?", userID).First(&existingUser).Error; err != nil {
|
||||
u := &entity.User{ID: userID, Nickname: "test-user", Email: userID + "@test.com", Password: sptr("x")}
|
||||
if err := dao.DB.Create(u).Error; err != nil {
|
||||
t.Fatalf("insert test user: %v", err)
|
||||
}
|
||||
}
|
||||
// Insert tenant if not exists (llm_id, embd_id, asr_id are NOT NULL)
|
||||
var existingTenant entity.Tenant
|
||||
if err := dao.DB.Where("id = ?", tenantID).First(&existingTenant).Error; err != nil {
|
||||
tn := &entity.Tenant{
|
||||
ID: tenantID,
|
||||
LLMID: "llm-default",
|
||||
EmbdID: "embd-default",
|
||||
ASRID: "asr-default",
|
||||
}
|
||||
if err := dao.DB.Create(tn).Error; err != nil {
|
||||
t.Fatalf("insert test tenant: %v", err)
|
||||
}
|
||||
}
|
||||
// Insert user_tenant mapping if not exists
|
||||
var existingUT entity.UserTenant
|
||||
if err := dao.DB.Where("user_id = ? AND tenant_id = ?", userID, tenantID).First(&existingUT).Error; err != nil {
|
||||
ut := &entity.UserTenant{
|
||||
ID: userID + "_" + tenantID,
|
||||
UserID: userID,
|
||||
TenantID: tenantID,
|
||||
Role: "admin",
|
||||
}
|
||||
if err := dao.DB.Create(ut).Error; err != nil {
|
||||
t.Fatalf("insert test user_tenant: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteDocuments_DeleteAll(t *testing.T) {
|
||||
db := setupServiceTestDB(t)
|
||||
pushServiceDB(t, db)
|
||||
insertUserTenantForAccessCheck(t, "user-1", "tenant-1")
|
||||
|
||||
insertTestKB(t, "kb-1", "tenant-1", 3, 100, 50)
|
||||
insertTestDoc(t, "doc-1", "kb-1", 30, 10)
|
||||
insertTestDoc(t, "doc-2", "kb-1", 40, 20)
|
||||
insertTestDoc(t, "doc-3", "kb-1", 30, 20)
|
||||
|
||||
svc := testDocumentService(t)
|
||||
|
||||
deleted, err := svc.DeleteDocuments(nil, true, "kb-1", "user-1")
|
||||
if err != nil {
|
||||
t.Fatalf("DeleteDocuments failed: %v", err)
|
||||
}
|
||||
if deleted != 3 {
|
||||
t.Fatalf("expected 3 deleted, got %d", deleted)
|
||||
}
|
||||
|
||||
// KB counters: doc_num 3→0, token_num 100→0, chunk_num 50→0
|
||||
kb, _ := dao.NewKnowledgebaseDAO().GetByID("kb-1")
|
||||
if kb.DocNum != 0 {
|
||||
t.Fatalf("doc_num: expected 0, got %d", kb.DocNum)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteDocuments_ByIDs(t *testing.T) {
|
||||
db := setupServiceTestDB(t)
|
||||
pushServiceDB(t, db)
|
||||
insertUserTenantForAccessCheck(t, "user-1", "tenant-1")
|
||||
|
||||
insertTestKB(t, "kb-1", "tenant-1", 3, 100, 50)
|
||||
insertTestDoc(t, "doc-1", "kb-1", 30, 10)
|
||||
insertTestDoc(t, "doc-2", "kb-1", 40, 20)
|
||||
insertTestDoc(t, "doc-3", "kb-1", 30, 20) // won't be deleted
|
||||
|
||||
svc := testDocumentService(t)
|
||||
|
||||
deleted, err := svc.DeleteDocuments([]string{"doc-1", "doc-2"}, false, "kb-1", "user-1")
|
||||
if err != nil {
|
||||
t.Fatalf("DeleteDocuments failed: %v", err)
|
||||
}
|
||||
if deleted != 2 {
|
||||
t.Fatalf("expected 2 deleted, got %d", deleted)
|
||||
}
|
||||
|
||||
// doc-3 should still exist
|
||||
_, err = dao.NewDocumentDAO().GetByID("doc-3")
|
||||
if err != nil {
|
||||
t.Fatal("doc-3 should not have been deleted")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteDocuments_WrongDataset(t *testing.T) {
|
||||
db := setupServiceTestDB(t)
|
||||
pushServiceDB(t, db)
|
||||
insertUserTenantForAccessCheck(t, "user-1", "tenant-1")
|
||||
insertUserTenantForAccessCheck(t, "user-1", "tenant-2")
|
||||
|
||||
insertTestKB(t, "kb-1", "tenant-1", 1, 10, 5)
|
||||
insertTestKB(t, "kb-2", "tenant-2", 1, 10, 5)
|
||||
insertTestDoc(t, "doc-1", "kb-2", 10, 5) // belongs to kb-2, not kb-1
|
||||
|
||||
svc := testDocumentService(t)
|
||||
|
||||
_, err := svc.DeleteDocuments([]string{"doc-1"}, false, "kb-1", "user-1")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for doc not belonging to dataset")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteDocuments_NotAccessible(t *testing.T) {
|
||||
db := setupServiceTestDB(t)
|
||||
pushServiceDB(t, db)
|
||||
|
||||
insertTestKB(t, "kb-1", "tenant-1", 1, 10, 5)
|
||||
|
||||
svc := testDocumentService(t)
|
||||
|
||||
// user-1 has no user_tenant entry → accessible returns false
|
||||
_, err := svc.DeleteDocuments([]string{"doc-1"}, false, "kb-1", "user-1")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for inaccessible dataset")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteDocuments_EmptyIDs(t *testing.T) {
|
||||
db := setupServiceTestDB(t)
|
||||
pushServiceDB(t, db)
|
||||
insertUserTenantForAccessCheck(t, "user-1", "tenant-1")
|
||||
insertTestKB(t, "kb-1", "tenant-1", 0, 0, 0)
|
||||
|
||||
svc := testDocumentService(t)
|
||||
|
||||
// Empty ids, no deleteAll → returns 0, no error
|
||||
deleted, err := svc.DeleteDocuments([]string{}, false, "kb-1", "user-1")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if deleted != 0 {
|
||||
t.Fatalf("expected 0 deleted, got %d", deleted)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteDocuments_Deduplicate(t *testing.T) {
|
||||
db := setupServiceTestDB(t)
|
||||
pushServiceDB(t, db)
|
||||
insertUserTenantForAccessCheck(t, "user-1", "tenant-1")
|
||||
|
||||
insertTestKB(t, "kb-1", "tenant-1", 1, 10, 5)
|
||||
insertTestDoc(t, "doc-1", "kb-1", 10, 5)
|
||||
|
||||
svc := testDocumentService(t)
|
||||
|
||||
deleted, err := svc.DeleteDocuments([]string{"doc-1", "doc-1", "doc-1"}, false, "kb-1", "user-1")
|
||||
if err != nil {
|
||||
t.Fatalf("DeleteDocuments failed: %v", err)
|
||||
}
|
||||
// Dedup should result in only 1 delete
|
||||
if deleted != 1 {
|
||||
t.Fatalf("expected 1 deleted after dedup, got %d", deleted)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteDocument_DeligatesToFullCleanup(t *testing.T) {
|
||||
db := setupServiceTestDB(t)
|
||||
pushServiceDB(t, db)
|
||||
|
||||
insertTestKB(t, "kb-1", "tenant-1", 1, 5, 2)
|
||||
insertTestDoc(t, "doc-1", "kb-1", 5, 2)
|
||||
|
||||
svc := testDocumentService(t)
|
||||
|
||||
// Public DeleteDocument should delegate to deleteDocumentFull
|
||||
err := svc.DeleteDocument("doc-1")
|
||||
if err != nil {
|
||||
t.Fatalf("DeleteDocument failed: %v", err)
|
||||
}
|
||||
|
||||
_, err = dao.NewDocumentDAO().GetByID("doc-1")
|
||||
if err == nil {
|
||||
t.Fatal("document should be deleted")
|
||||
}
|
||||
}
|
||||
@@ -353,7 +353,7 @@ func (s *LLMService) SetAPIKey(tenantID string, req *SetAPIKeyRequest) (*SetAPIK
|
||||
if req.Verify {
|
||||
return &SetAPIKeyResult{Message: msg, Success: false}, nil
|
||||
}
|
||||
return nil, fmt.Errorf(msg)
|
||||
return nil, fmt.Errorf("%s", msg)
|
||||
}
|
||||
|
||||
llmConfig := map[string]interface{}{
|
||||
|
||||
Reference in New Issue
Block a user