diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d9fa56b662..675a636179 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,6 +9,7 @@ on: push: tags: - "v*.*.*" # normal release + - 'nightly' # mutable tag permissions: contents: write diff --git a/build.sh b/build.sh index 6c0baef895..05b3274bb4 100755 --- a/build.sh +++ b/build.sh @@ -199,6 +199,30 @@ build_cpp() { echo -e "${GREEN}✓ C++ static library built successfully${NC}" } +# Build C++ test executable +build_cpp_test() { + print_section "Building C++ test executable" + + if [ ! -d "$BUILD_DIR" ]; then + echo "Build directory not found, running cmake first..." + mkdir -p "$BUILD_DIR" + cd "$BUILD_DIR" + cmake .. -DCMAKE_BUILD_TYPE=Release + else + cd "$BUILD_DIR" + fi + + echo "Building rag_analyzer_c_test..." + make rag_analyzer_c_test -j$(nproc) + + if [ ! -f "$BUILD_DIR/rag_analyzer_c_test" ]; then + echo -e "${RED}Error: Failed to build rag_analyzer_c_test${NC}" + exit 1 + fi + + echo -e "${GREEN}✓ C++ test executable built successfully: $BUILD_DIR/rag_analyzer_c_test${NC}" +} + # Build Go server build_go() { print_section "Building RAGFlow go" @@ -312,6 +336,7 @@ Build script for RAGFlow Go server with C++ bindings. OPTIONS: --all, -a Build everything (C++ library + Go server) [default] --cpp, -c Build only C++ static library + --cpp-test Build C++ test executable (requires --cpp first) --go, -g Build only Go server (requires C++ library to be built) --clean, -C Clean all build artifacts --run, -r Build and run the server @@ -321,6 +346,7 @@ EXAMPLES: $0 # Build everything $0 --cpp # Build only C++ library $0 --go # Build only Go server + $0 --cpp-test # Build C++ test executable $0 --run # Build and run $0 --clean # Clean build artifacts @@ -343,6 +369,10 @@ main() { check_cpp_deps build_cpp ;; + --cpp-test) + check_cpp_deps + build_cpp_test + ;; --go|-g) check_go_deps build_go diff --git a/docker/docker-compose-base.yml b/docker/docker-compose-base.yml index fbc13b5d05..36cca3db77 100644 --- a/docker/docker-compose-base.yml +++ b/docker/docker-compose-base.yml @@ -253,7 +253,7 @@ services: nats: profiles: - ragflow-go - image: nats:2.14.1 + image: nats:2.14.2 ports: - ${NATS_PORT}:4222 - "8222:8222" diff --git a/internal/cpp/CMakeLists.txt b/internal/cpp/CMakeLists.txt index bcd96a5fe9..2f017a6b90 100644 --- a/internal/cpp/CMakeLists.txt +++ b/internal/cpp/CMakeLists.txt @@ -164,7 +164,7 @@ add_executable(rag_analyzer_c_test rag_analyzer_c_test.cpp ) -target_link_libraries(rag_analyzer_c_test rag_tokenizer_c_api stdc++ libm.a ${PCRE2_LIB}) +target_link_libraries(rag_analyzer_c_test rag_tokenizer_c_api stdc++ m ${PCRE2_LIB}) target_include_directories(rag_analyzer_c_test PUBLIC "${CMAKE_SOURCE_DIR}") set_target_properties(rag_analyzer_c_test PROPERTIES CXX_STANDARD 20 diff --git a/internal/cpp/rag_analyzer.cpp b/internal/cpp/rag_analyzer.cpp index 658afec6f3..23377ff40e 100644 --- a/internal/cpp/rag_analyzer.cpp +++ b/internal/cpp/rag_analyzer.cpp @@ -684,35 +684,38 @@ RAGAnalyzer::~RAGAnalyzer() { int32_t RAGAnalyzer::Load() { fs::path root(dict_path_); - fs::path dict_path(root / DICT_PATH); + fs::path dict_path(root / DICT_PATH); + printf("Validate dict file: %s\n", dict_path.string().c_str()); if (!fs::exists(dict_path)) { - printf("Invalid analyzer file: %s", dict_path.string().c_str()); - // return Status::InvalidAnalyzerFile(dict_path); + printf("Invalid dict file: %s\n", dict_path.string().c_str()); return -1; } fs::path pos_def_path(root / POS_DEF_PATH); + printf("Validate pos file: %s\n", pos_def_path.string().c_str()); if (!fs::exists(pos_def_path)) { - printf("Invalid post file: %s", pos_def_path.string().c_str()); - // return Status::InvalidAnalyzerFile(pos_def_path); - return -1; + printf("Invalid pos file: %s\n", pos_def_path.string().c_str()); + return -2; } own_dict_ = true; trie_ = new DartsTrie(); pos_table_ = new POSTable(pos_def_path.string()); + printf("Load pos file: %s\n", pos_def_path.string().c_str()); if (pos_table_->Load() != 0) { - printf("Fail to load post table: %s", pos_def_path.string().c_str()); - return -1; - // return Status::InvalidAnalyzerFile("Failed to load RAGAnalyzer POS definition"); + printf("Fail to load pos file: %s\n", pos_def_path.string().c_str()); + return -3; } fs::path trie_path(root / TRIE_PATH); + printf("Validate trie file: %s\n", trie_path.string().c_str()); if (fs::exists(trie_path)) { + printf("Load trie file: %s\n", trie_path.string().c_str()); trie_->Load(trie_path.string()); } else { // Build trie try { + printf("Build trie file: %s\n", dict_path.string().c_str()); std::ifstream from(dict_path.string()); std::string line; re2::RE2 re_pattern(R"([\r\n]+)"); @@ -737,36 +740,35 @@ int32_t RAGAnalyzer::Load() { } trie_->Build(); } catch (const std::exception &e) { - return -1; - // return Status::InvalidAnalyzerFile("Failed to load RAGAnalyzer analyzer"); + printf("Fail to build trie: %s\n", e.what()); + return -4; } + printf("Save trie file: %s\n", trie_path.string().c_str()); trie_->Save(trie_path.string()); } fs::path lemma_path(root / WORDNET_PATH); if (!fs::exists(lemma_path)) { - printf("Fail to load wordnet: %s", lemma_path.string().c_str()); - return -1; - // return Status::InvalidAnalyzerFile(lemma_path); + printf("Fail to load wordnet: %s\n", lemma_path.string().c_str()); + return -5; } wordnet_lemma_ = new WordNetLemmatizer(lemma_path.string()); fs::path opencc_path(root / OPENCC_PATH); - + printf("Validate opencc file: %s\n", opencc_path.string().c_str()); if (!fs::exists(opencc_path)) { - printf("Fail to load opencc_path: %s", opencc_path.string().c_str()); - return -1; - // return Status::InvalidAnalyzerFile(opencc_path); + printf("opencc_path not exists: %s\n", opencc_path.string().c_str()); + return -6; } try { + printf("Load opencc file: %s\n", opencc_path.string().c_str()); opencc_ = new ::OpenCC(opencc_path.string()); } catch (const std::exception &e) { - return -1; - // return Status::InvalidAnalyzerFile("Failed to load OpenCC"); + printf("Fail to open opencc: %s\n", opencc_path.string().c_str()); + return -7; } - // return Status::OK(); return 0; } diff --git a/internal/cpp/rag_analyzer_c_test.cpp b/internal/cpp/rag_analyzer_c_test.cpp index f62401a68e..05a8af6be4 100644 --- a/internal/cpp/rag_analyzer_c_test.cpp +++ b/internal/cpp/rag_analyzer_c_test.cpp @@ -10,7 +10,7 @@ void test_single_thread() { std::cout << "Test 1: Single thread, 1000 iterations..." << std::endl; // Create analyzer instance - RAGAnalyzerHandle handle = RAGAnalyzer_Create("."); + RAGAnalyzerHandle handle = RAGAnalyzer_Create("./resource"); assert(handle != nullptr && "Failed to create RAGAnalyzer"); // Load the analyzer @@ -110,8 +110,9 @@ void test_multi_thread() { } int main() { + std::cout << "Please make sure the './resource' directory exists and is accessible." << std::endl; std::cout << "=== RAGAnalyzer C API Test ===" << std::endl; - + test_single_thread(); // test_multi_thread(); diff --git a/internal/entity/tenant.go b/internal/entity/tenant.go index 88129cb6b1..c17d74469e 100644 --- a/internal/entity/tenant.go +++ b/internal/entity/tenant.go @@ -31,10 +31,10 @@ type Tenant struct { TenantImg2TxtID *int64 `gorm:"column:tenant_img2txt_id;index" json:"tenant_img2txt_id,omitempty"` RerankID string `gorm:"column:rerank_id;size:128;not null;index" json:"rerank_id"` TenantRerankID *int64 `gorm:"column:tenant_rerank_id;index" json:"tenant_rerank_id,omitempty"` - TTSID *string `gorm:"column:tts_id;size:256;index" json:"tts_id,omitempty"` + TTSID string `gorm:"column:tts_id;size:256;index" json:"tts_id,omitempty"` TenantTTSID *int64 `gorm:"column:tenant_tts_id;index" json:"tenant_tts_id,omitempty"` ParserIDs string `gorm:"column:parser_ids;size:256;not null;index" json:"parser_ids"` - OCRID *string `gorm:"column:ocr_id;size:256" json:"ocr_id,omitempty"` + OCRID string `gorm:"column:ocr_id;size:256" json:"ocr_id,omitempty"` TenantOCRID *int64 `gorm:"column:tenant_ocr_id" json:"tenant_ocr_id,omitempty"` Credit int64 `gorm:"column:credit;default:512;index" json:"credit"` Status *string `gorm:"column:status;size:1;index" json:"status,omitempty"` diff --git a/internal/server/config.go b/internal/server/config.go index 620d341199..33cd3bf235 100644 --- a/internal/server/config.go +++ b/internal/server/config.go @@ -88,6 +88,8 @@ type DefaultModelsConfig struct { RerankModel ModelConfig `mapstructure:"rerank_model"` ASRModel ModelConfig `mapstructure:"asr_model"` Image2TextModel ModelConfig `mapstructure:"image2text_model"` + OCRModel ModelConfig `mapstructure:"ocr_model"` + TTSModel ModelConfig `mapstructure:"tts_model"` } // ModelConfig model configuration diff --git a/internal/service/tenant.go b/internal/service/tenant.go index a6e3f43476..095eed7548 100644 --- a/internal/service/tenant.go +++ b/internal/service/tenant.go @@ -396,15 +396,9 @@ func (s *TenantService) GetDefaultModelName(tenantID string, modelType entity.Mo case entity.ModelTypeImage2Text: modelID = tenant.Img2TxtID case entity.ModelTypeTTS: - if tenant.TTSID == nil { - return "", fmt.Errorf("tenant TTS model not configured") - } - modelID = *tenant.TTSID + modelID = tenant.TTSID case entity.ModelTypeOCR: - if tenant.OCRID == nil { - return "", fmt.Errorf("tenant OCR model not configured") - } - modelID = *tenant.OCRID + modelID = tenant.OCRID default: return "", fmt.Errorf("invalid model type: %s", modelType) } diff --git a/internal/service/user.go b/internal/service/user.go index 5afcaf8200..bc568c8f71 100644 --- a/internal/service/user.go +++ b/internal/service/user.go @@ -186,6 +186,14 @@ func (s *UserService) Register(req *RegisterRequest) (*entity.User, common.Error if rerankID == "" { rerankID = "" } + ttsID := cfg.UserDefaultLLM.DefaultModels.TTSModel.Name + if ttsID == "" { + ttsID = "" + } + ocrID := cfg.UserDefaultLLM.DefaultModels.OCRModel.Name + if ocrID == "" { + ocrID = "" + } tenant := &entity.Tenant{ ID: userID, @@ -195,6 +203,8 @@ func (s *UserService) Register(req *RegisterRequest) (*entity.User, common.Error ASRID: asrID, Img2TxtID: img2txtID, RerankID: rerankID, + TTSID: ttsID, + OCRID: ocrID, ParserIDs: "naive:General,Q&A:Q&A,manual:Manual,table:Table,paper:Research Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,email:Email,tag:Tag", Status: &status, }