mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-04 18:45:38 +08:00
### Summary As title This PR fixes dataset index task creation failing with unsupported data type: entity.JSONMap when loading document chunking config. #### issues: ``` 2026/06/30 15:19:40 /home/infiniflow/Documents/development/ragflow/internal/dao/document.go:162 [error] unsupported data type: ragflow/internal/entity.JSONMap ``` #### Changes: + Adds the missing GORM type:longtext tag to ParserConfig in DocumentDAO.GetChunkingConfig. + Adds a DAO regression test covering GetChunkingConfig joins across document, knowledgebase, and tenant while scanning parser_config.
228 lines
6.7 KiB
Go
228 lines
6.7 KiB
Go
//
|
|
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
package dao
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/glebarez/sqlite"
|
|
"gorm.io/gorm"
|
|
|
|
"ragflow/internal/entity"
|
|
)
|
|
|
|
func setupDocumentTestDB(t *testing.T) *gorm.DB {
|
|
t.Helper()
|
|
db, err := gorm.Open(sqlite.Open(":memory:"), &gorm.Config{
|
|
TranslateError: true,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("failed to open sqlite: %v", err)
|
|
}
|
|
if err := db.AutoMigrate(
|
|
&entity.Document{},
|
|
&entity.Knowledgebase{},
|
|
&entity.Tenant{},
|
|
); err != nil {
|
|
t.Fatalf("failed to migrate: %v", err)
|
|
}
|
|
return db
|
|
}
|
|
|
|
func pushDocDB(t *testing.T, testDB *gorm.DB) {
|
|
t.Helper()
|
|
orig := DB
|
|
DB = testDB
|
|
t.Cleanup(func() { DB = orig })
|
|
}
|
|
|
|
func TestDocumentGetByIDs_Success(t *testing.T) {
|
|
db := setupDocumentTestDB(t)
|
|
pushDocDB(t, db)
|
|
|
|
db.Create(&entity.Document{ID: "doc1", KbID: "kb1", Name: sp("Doc 1"), CreatedBy: "user1", ParserConfig: entity.JSONMap{}})
|
|
db.Create(&entity.Document{ID: "doc2", KbID: "kb1", Name: sp("Doc 2"), CreatedBy: "user1", ParserConfig: entity.JSONMap{}})
|
|
db.Create(&entity.Document{ID: "doc3", KbID: "kb2", Name: sp("Doc 3"), CreatedBy: "user2", ParserConfig: entity.JSONMap{}})
|
|
|
|
dao := NewDocumentDAO()
|
|
docs, err := dao.GetByIDs([]string{"doc1", "doc3"})
|
|
if err != nil {
|
|
t.Fatalf("GetByIDs failed: %v", err)
|
|
}
|
|
if len(docs) != 2 {
|
|
t.Fatalf("expected 2 docs, got %d", len(docs))
|
|
}
|
|
|
|
ids := make(map[string]bool)
|
|
for _, d := range docs {
|
|
ids[d.ID] = true
|
|
}
|
|
if !ids["doc1"] || !ids["doc3"] {
|
|
t.Errorf("expected doc1 and doc3, got %v", ids)
|
|
}
|
|
}
|
|
|
|
func TestDocumentGetByIDs_EmptyIDs(t *testing.T) {
|
|
db := setupDocumentTestDB(t)
|
|
pushDocDB(t, db)
|
|
|
|
dao := NewDocumentDAO()
|
|
docs, err := dao.GetByIDs([]string{})
|
|
if err != nil {
|
|
t.Fatalf("GetByIDs failed: %v", err)
|
|
}
|
|
if docs != nil {
|
|
t.Errorf("expected nil for empty IDs, got %v", docs)
|
|
}
|
|
}
|
|
|
|
func TestDocumentGetByIDs_NilIDs(t *testing.T) {
|
|
db := setupDocumentTestDB(t)
|
|
pushDocDB(t, db)
|
|
|
|
dao := NewDocumentDAO()
|
|
docs, err := dao.GetByIDs(nil)
|
|
if err != nil {
|
|
t.Fatalf("GetByIDs failed: %v", err)
|
|
}
|
|
if docs != nil {
|
|
t.Errorf("expected nil for nil IDs, got %v", docs)
|
|
}
|
|
}
|
|
|
|
func TestDocumentGetByIDs_NoMatch(t *testing.T) {
|
|
db := setupDocumentTestDB(t)
|
|
pushDocDB(t, db)
|
|
|
|
db.Create(&entity.Document{ID: "doc1", KbID: "kb1", Name: sp("Doc 1"), CreatedBy: "user1", ParserConfig: entity.JSONMap{}})
|
|
|
|
dao := NewDocumentDAO()
|
|
docs, err := dao.GetByIDs([]string{"nonexistent"})
|
|
if err != nil {
|
|
t.Fatalf("GetByIDs failed: %v", err)
|
|
}
|
|
if len(docs) != 0 {
|
|
t.Errorf("expected 0 docs, got %d", len(docs))
|
|
}
|
|
}
|
|
|
|
func TestDocumentGetByKBIDOrdersByCreateTime(t *testing.T) {
|
|
db := setupDocumentTestDB(t)
|
|
pushDocDB(t, db)
|
|
|
|
createTime10 := int64(10)
|
|
createTime20 := int64(20)
|
|
createTime30 := int64(30)
|
|
db.Create(&entity.Document{ID: "doc-later", KbID: "kb1", Name: sp("Doc Later"), CreatedBy: "user1", ParserConfig: entity.JSONMap{}, BaseModel: entity.BaseModel{CreateTime: &createTime30}})
|
|
db.Create(&entity.Document{ID: "doc-other", KbID: "kb2", Name: sp("Doc Other"), CreatedBy: "user1", ParserConfig: entity.JSONMap{}, BaseModel: entity.BaseModel{CreateTime: &createTime10}})
|
|
db.Create(&entity.Document{ID: "doc-earlier", KbID: "kb1", Name: sp("Doc Earlier"), CreatedBy: "user1", ParserConfig: entity.JSONMap{}, BaseModel: entity.BaseModel{CreateTime: &createTime20}})
|
|
|
|
docs, total, err := NewDocumentDAO().GetByKBID("kb1")
|
|
if err != nil {
|
|
t.Fatalf("GetByKBID failed: %v", err)
|
|
}
|
|
if total != 2 {
|
|
t.Fatalf("expected total=2, got %d", total)
|
|
}
|
|
if len(docs) != 2 {
|
|
t.Fatalf("expected 2 docs, got %d", len(docs))
|
|
}
|
|
if docs[0].ID != "doc-earlier" || docs[1].ID != "doc-later" {
|
|
t.Fatalf("unexpected order: %s, %s", docs[0].ID, docs[1].ID)
|
|
}
|
|
}
|
|
|
|
func TestDocumentGetByDocumentIDAndDatasetIDUsesKBID(t *testing.T) {
|
|
db := setupDocumentTestDB(t)
|
|
pushDocDB(t, db)
|
|
|
|
db.Create(&entity.Document{ID: "doc1", KbID: "kb1", Name: sp("Doc 1"), CreatedBy: "user1", ParserConfig: entity.JSONMap{}})
|
|
db.Create(&entity.Document{ID: "doc1-other", KbID: "kb2", Name: sp("Doc 2"), CreatedBy: "user1", ParserConfig: entity.JSONMap{}})
|
|
|
|
doc, err := NewDocumentDAO().GetByDocumentIDAndDatasetID("doc1", "kb1")
|
|
if err != nil {
|
|
t.Fatalf("GetByDocumentIDAndDatasetID failed: %v", err)
|
|
}
|
|
if doc.ID != "doc1" || doc.KbID != "kb1" {
|
|
t.Fatalf("unexpected document: id=%s kb_id=%s", doc.ID, doc.KbID)
|
|
}
|
|
|
|
if _, err := NewDocumentDAO().GetByDocumentIDAndDatasetID("doc1", "kb2"); err == nil {
|
|
t.Fatal("expected no match when document does not belong to dataset")
|
|
}
|
|
}
|
|
|
|
func TestDocumentGetChunkingConfigScansParserConfig(t *testing.T) {
|
|
db := setupDocumentTestDB(t)
|
|
pushDocDB(t, db)
|
|
|
|
if err := db.Create(&entity.Tenant{
|
|
ID: "tenant1",
|
|
LLMID: "llm1",
|
|
EmbdID: "embd1",
|
|
ASRID: "asr1",
|
|
Img2TxtID: "img2txt1",
|
|
RerankID: "rerank1",
|
|
ParserIDs: "naive",
|
|
}).Error; err != nil {
|
|
t.Fatalf("create tenant: %v", err)
|
|
}
|
|
if err := db.Create(&entity.Knowledgebase{
|
|
ID: "kb1",
|
|
TenantID: "tenant1",
|
|
Name: "Dataset 1",
|
|
Language: sp("English"),
|
|
EmbdID: "kb-embd1",
|
|
Permission: "me",
|
|
CreatedBy: "user1",
|
|
ParserID: "naive",
|
|
ParserConfig: entity.JSONMap{},
|
|
}).Error; err != nil {
|
|
t.Fatalf("create knowledgebase: %v", err)
|
|
}
|
|
if err := db.Create(&entity.Document{
|
|
ID: "doc1",
|
|
KbID: "kb1",
|
|
ParserID: "naive",
|
|
ParserConfig: entity.JSONMap{"chunk_token_num": float64(128), "delimiter": "\\n"},
|
|
SourceType: "local",
|
|
Type: "doc",
|
|
CreatedBy: "user1",
|
|
Size: 42,
|
|
Suffix: ".txt",
|
|
}).Error; err != nil {
|
|
t.Fatalf("create document: %v", err)
|
|
}
|
|
|
|
config, err := NewDocumentDAO().GetChunkingConfig("doc1")
|
|
if err != nil {
|
|
t.Fatalf("GetChunkingConfig failed: %v", err)
|
|
}
|
|
parserConfig, ok := config["parser_config"].(entity.JSONMap)
|
|
if !ok {
|
|
t.Fatalf("parser_config type = %T, want entity.JSONMap", config["parser_config"])
|
|
}
|
|
if parserConfig["chunk_token_num"] != float64(128) || parserConfig["delimiter"] != "\\n" {
|
|
t.Fatalf("unexpected parser_config: %#v", parserConfig)
|
|
}
|
|
if config["tenant_id"] != "tenant1" || config["embd_id"] != "kb-embd1" {
|
|
t.Fatalf("unexpected joined config: %#v", config)
|
|
}
|
|
}
|
|
|
|
func sp(s string) *string { return &s }
|