From 1880e65e997e4fb6bce84b53c2860378269f01c4 Mon Sep 17 00:00:00 2001 From: Jin Hai Date: Fri, 3 Jul 2026 17:00:43 +0800 Subject: [PATCH] Go: refactor (#16602) ### Summary 1. update doc 2. refactor route code --------- Signed-off-by: Jin Hai --- .dockerignore | 6 +- .github/workflows/release.yml | 18 +- .github/workflows/sep-tests.yml | 4 +- .github/workflows/tests.yml | 4 +- .gitignore | 6 +- .rooignore | 6 +- build.sh | 26 +- cmd/{ragflow_main.go => ragflow_server.go} | 7 +- docker/entrypoint.sh | 8 +- internal/{ => binding}/cpp/CMakeLists.txt | 0 internal/{ => binding}/cpp/Makefile | 0 internal/{ => binding}/cpp/analyzer.h | 0 internal/{ => binding}/cpp/dart_trie.h | 0 internal/{ => binding}/cpp/darts/darts.h | 0 internal/{ => binding}/cpp/darts_trie.cpp | 0 internal/{ => binding}/cpp/main.cpp | 0 .../{ => binding}/cpp/opencc/config_reader.c | 0 .../{ => binding}/cpp/opencc/config_reader.h | 0 internal/{ => binding}/cpp/opencc/converter.c | 0 internal/{ => binding}/cpp/opencc/converter.h | 0 .../cpp/opencc/dictionary/abstract.c | 0 .../cpp/opencc/dictionary/abstract.h | 0 .../cpp/opencc/dictionary/datrie.c | 0 .../cpp/opencc/dictionary/datrie.h | 0 .../cpp/opencc/dictionary/text.c | 0 .../cpp/opencc/dictionary/text.h | 0 .../cpp/opencc/dictionary_group.c | 0 .../cpp/opencc/dictionary_group.h | 0 .../{ => binding}/cpp/opencc/dictionary_set.c | 0 .../{ => binding}/cpp/opencc/dictionary_set.h | 0 internal/{ => binding}/cpp/opencc/encoding.c | 0 internal/{ => binding}/cpp/opencc/encoding.h | 0 internal/{ => binding}/cpp/opencc/opencc.c | 0 internal/{ => binding}/cpp/opencc/opencc.h | 0 .../{ => binding}/cpp/opencc/opencc_types.h | 0 .../{ => binding}/cpp/opencc/openccxx.cpp | 0 internal/{ => binding}/cpp/opencc/openccxx.h | 0 internal/{ => binding}/cpp/opencc/utils.c | 0 internal/{ => binding}/cpp/opencc/utils.h | 0 internal/{ => binding}/cpp/pcre2.h | 0 internal/{ => binding}/cpp/pcre2posix.h | 0 internal/{ => binding}/cpp/rag_analyzer.cpp | 0 internal/{ => binding}/cpp/rag_analyzer.h | 0 .../{ => binding}/cpp/rag_analyzer_c_api.cpp | 0 .../{ => binding}/cpp/rag_analyzer_c_api.h | 0 .../cpp/rag_analyzer_c_api_debug.cpp | 0 .../{ => binding}/cpp/rag_analyzer_c_test.cpp | 0 internal/{ => binding}/cpp/re2/bitmap256.cc | 0 internal/{ => binding}/cpp/re2/bitmap256.h | 0 internal/{ => binding}/cpp/re2/bitstate.cc | 0 internal/{ => binding}/cpp/re2/compile.cc | 0 internal/{ => binding}/cpp/re2/dfa.cc | 0 .../{ => binding}/cpp/re2/filtered_re2.cc | 0 internal/{ => binding}/cpp/re2/filtered_re2.h | 0 internal/{ => binding}/cpp/re2/mimics_pcre.cc | 0 internal/{ => binding}/cpp/re2/nfa.cc | 0 internal/{ => binding}/cpp/re2/onepass.cc | 0 internal/{ => binding}/cpp/re2/parse.cc | 0 internal/{ => binding}/cpp/re2/perl_groups.cc | 0 internal/{ => binding}/cpp/re2/pod_array.h | 0 internal/{ => binding}/cpp/re2/prefilter.cc | 0 internal/{ => binding}/cpp/re2/prefilter.h | 0 .../{ => binding}/cpp/re2/prefilter_tree.cc | 0 .../{ => binding}/cpp/re2/prefilter_tree.h | 0 internal/{ => binding}/cpp/re2/prog.cc | 0 internal/{ => binding}/cpp/re2/prog.h | 0 internal/{ => binding}/cpp/re2/re2.cc | 0 internal/{ => binding}/cpp/re2/re2.h | 0 internal/{ => binding}/cpp/re2/regexp.cc | 0 internal/{ => binding}/cpp/re2/regexp.h | 0 internal/{ => binding}/cpp/re2/set.cc | 0 internal/{ => binding}/cpp/re2/set.h | 0 internal/{ => binding}/cpp/re2/simplify.cc | 0 internal/{ => binding}/cpp/re2/sparse_array.h | 0 internal/{ => binding}/cpp/re2/sparse_set.h | 0 internal/{ => binding}/cpp/re2/stringpiece.cc | 0 internal/{ => binding}/cpp/re2/stringpiece.h | 0 internal/{ => binding}/cpp/re2/tostring.cc | 0 .../{ => binding}/cpp/re2/unicode_casefold.cc | 0 .../{ => binding}/cpp/re2/unicode_casefold.h | 0 .../{ => binding}/cpp/re2/unicode_groups.cc | 0 .../{ => binding}/cpp/re2/unicode_groups.h | 0 internal/{ => binding}/cpp/re2/walker-inl.h | 0 internal/{ => binding}/cpp/stemmer/api.cpp | 0 internal/{ => binding}/cpp/stemmer/api.h | 0 internal/{ => binding}/cpp/stemmer/header.h | 0 .../cpp/stemmer/stem_UTF_8_danish.cpp | 0 .../cpp/stemmer/stem_UTF_8_danish.h | 0 .../cpp/stemmer/stem_UTF_8_dutch.cpp | 0 .../cpp/stemmer/stem_UTF_8_dutch.h | 0 .../cpp/stemmer/stem_UTF_8_english.cpp | 0 .../cpp/stemmer/stem_UTF_8_english.h | 0 .../cpp/stemmer/stem_UTF_8_finnish.cpp | 0 .../cpp/stemmer/stem_UTF_8_finnish.h | 0 .../cpp/stemmer/stem_UTF_8_french.cpp | 0 .../cpp/stemmer/stem_UTF_8_french.h | 0 .../cpp/stemmer/stem_UTF_8_german.cpp | 0 .../cpp/stemmer/stem_UTF_8_german.h | 0 .../cpp/stemmer/stem_UTF_8_hungarian.cpp | 0 .../cpp/stemmer/stem_UTF_8_hungarian.h | 0 .../cpp/stemmer/stem_UTF_8_italian.cpp | 0 .../cpp/stemmer/stem_UTF_8_italian.h | 0 .../cpp/stemmer/stem_UTF_8_norwegian.cpp | 0 .../cpp/stemmer/stem_UTF_8_norwegian.h | 0 .../cpp/stemmer/stem_UTF_8_porter.cpp | 0 .../cpp/stemmer/stem_UTF_8_porter.h | 0 .../cpp/stemmer/stem_UTF_8_portuguese.cpp | 0 .../cpp/stemmer/stem_UTF_8_portuguese.h | 0 .../cpp/stemmer/stem_UTF_8_romanian.cpp | 0 .../cpp/stemmer/stem_UTF_8_romanian.h | 0 .../cpp/stemmer/stem_UTF_8_russian.cpp | 0 .../cpp/stemmer/stem_UTF_8_russian.h | 0 .../cpp/stemmer/stem_UTF_8_spanish.cpp | 0 .../cpp/stemmer/stem_UTF_8_spanish.h | 0 .../cpp/stemmer/stem_UTF_8_swedish.cpp | 0 .../cpp/stemmer/stem_UTF_8_swedish.h | 0 .../cpp/stemmer/stem_UTF_8_turkish.cpp | 0 .../cpp/stemmer/stem_UTF_8_turkish.h | 0 .../{ => binding}/cpp/stemmer/stemmer.cpp | 0 internal/{ => binding}/cpp/stemmer/stemmer.h | 0 .../{ => binding}/cpp/stemmer/utilities.cpp | 0 internal/{ => binding}/cpp/string_utils.h | 0 internal/{ => binding}/cpp/term.cpp | 0 internal/{ => binding}/cpp/term.h | 0 internal/{ => binding}/cpp/thinc_ner.cpp | 0 internal/{ => binding}/cpp/thinc_ner.h | 0 internal/{ => binding}/cpp/thinc_parser.cpp | 0 internal/{ => binding}/cpp/thinc_parser.h | 0 internal/{ => binding}/cpp/tokenizer.cpp | 0 internal/{ => binding}/cpp/tokenizer.h | 0 internal/{ => binding}/cpp/util/logging.h | 0 internal/{ => binding}/cpp/util/mix.h | 0 internal/{ => binding}/cpp/util/mutex.h | 0 internal/{ => binding}/cpp/util/rune.cc | 0 internal/{ => binding}/cpp/util/strutil.cc | 0 internal/{ => binding}/cpp/util/strutil.h | 0 internal/{ => binding}/cpp/util/utf.h | 0 internal/{ => binding}/cpp/util/util.h | 0 .../{ => binding}/cpp/wordnet_lemmatizer.cpp | 0 .../{ => binding}/cpp/wordnet_lemmatizer.h | 0 internal/binding/rag_analyzer.go | 8 +- internal/common/format.go | 5 + internal/development.md | 8 +- internal/handler/{datasets.go => dataset.go} | 18 + ...test.go => dataset_aggregate_tags_test.go} | 0 ...tags_test.go => dataset_list_tags_test.go} | 0 ...tag_test.go => dataset_rename_tag_test.go} | 0 ..._search_test.go => dataset_search_test.go} | 0 internal/handler/dify_retrieval_handler.go | 4 +- internal/handler/document.go | 55 --- internal/handler/kb.go | 435 ------------------ internal/handler/tenant.go | 69 +-- .../ingestion/compilation/extractor/ner.go | 8 +- .../compilation/extractor/ner_extractor.go | 2 +- .../compilation/extractor/parser_go.go | 2 +- internal/router/router.go | 251 ++++------ internal/service/chat_pipeline.go | 30 +- internal/service/dataset.go | 298 ++++++------ internal/service/deep_researcher.go | 4 +- internal/service/{kg => graph}/pipeline.go | 2 +- internal/service/{kg => graph}/retrieval.go | 2 +- .../service/{kg => graph}/retrieval_test.go | 2 +- internal/service/{kg => graph}/scoring.go | 2 +- internal/service/{kg => graph}/search.go | 2 +- internal/service/{kg => graph}/search_test.go | 2 +- .../service/{kg => graph}/testutil_test.go | 2 +- internal/service/{kg => graph}/types.go | 2 +- internal/service/kb.go | 377 --------------- web/vite.config.ts | 2 +- 169 files changed, 373 insertions(+), 1304 deletions(-) rename cmd/{ragflow_main.go => ragflow_server.go} (99%) rename internal/{ => binding}/cpp/CMakeLists.txt (100%) rename internal/{ => binding}/cpp/Makefile (100%) rename internal/{ => binding}/cpp/analyzer.h (100%) rename internal/{ => binding}/cpp/dart_trie.h (100%) rename internal/{ => binding}/cpp/darts/darts.h (100%) rename internal/{ => binding}/cpp/darts_trie.cpp (100%) rename internal/{ => binding}/cpp/main.cpp (100%) rename internal/{ => binding}/cpp/opencc/config_reader.c (100%) rename internal/{ => binding}/cpp/opencc/config_reader.h (100%) rename internal/{ => binding}/cpp/opencc/converter.c (100%) rename internal/{ => binding}/cpp/opencc/converter.h (100%) rename internal/{ => binding}/cpp/opencc/dictionary/abstract.c (100%) rename internal/{ => binding}/cpp/opencc/dictionary/abstract.h (100%) rename internal/{ => binding}/cpp/opencc/dictionary/datrie.c (100%) rename internal/{ => binding}/cpp/opencc/dictionary/datrie.h (100%) rename internal/{ => binding}/cpp/opencc/dictionary/text.c (100%) rename internal/{ => binding}/cpp/opencc/dictionary/text.h (100%) rename internal/{ => binding}/cpp/opencc/dictionary_group.c (100%) rename internal/{ => binding}/cpp/opencc/dictionary_group.h (100%) rename internal/{ => binding}/cpp/opencc/dictionary_set.c (100%) rename internal/{ => binding}/cpp/opencc/dictionary_set.h (100%) rename internal/{ => binding}/cpp/opencc/encoding.c (100%) rename internal/{ => binding}/cpp/opencc/encoding.h (100%) rename internal/{ => binding}/cpp/opencc/opencc.c (100%) rename internal/{ => binding}/cpp/opencc/opencc.h (100%) rename internal/{ => binding}/cpp/opencc/opencc_types.h (100%) rename internal/{ => binding}/cpp/opencc/openccxx.cpp (100%) rename internal/{ => binding}/cpp/opencc/openccxx.h (100%) rename internal/{ => binding}/cpp/opencc/utils.c (100%) rename internal/{ => binding}/cpp/opencc/utils.h (100%) rename internal/{ => binding}/cpp/pcre2.h (100%) rename internal/{ => binding}/cpp/pcre2posix.h (100%) rename internal/{ => binding}/cpp/rag_analyzer.cpp (100%) rename internal/{ => binding}/cpp/rag_analyzer.h (100%) rename internal/{ => binding}/cpp/rag_analyzer_c_api.cpp (100%) rename internal/{ => binding}/cpp/rag_analyzer_c_api.h (100%) rename internal/{ => binding}/cpp/rag_analyzer_c_api_debug.cpp (100%) rename internal/{ => binding}/cpp/rag_analyzer_c_test.cpp (100%) rename internal/{ => binding}/cpp/re2/bitmap256.cc (100%) rename internal/{ => binding}/cpp/re2/bitmap256.h (100%) rename internal/{ => binding}/cpp/re2/bitstate.cc (100%) rename internal/{ => binding}/cpp/re2/compile.cc (100%) rename internal/{ => binding}/cpp/re2/dfa.cc (100%) rename internal/{ => binding}/cpp/re2/filtered_re2.cc (100%) rename internal/{ => binding}/cpp/re2/filtered_re2.h (100%) rename internal/{ => binding}/cpp/re2/mimics_pcre.cc (100%) rename internal/{ => binding}/cpp/re2/nfa.cc (100%) rename internal/{ => binding}/cpp/re2/onepass.cc (100%) rename internal/{ => binding}/cpp/re2/parse.cc (100%) rename internal/{ => binding}/cpp/re2/perl_groups.cc (100%) rename internal/{ => binding}/cpp/re2/pod_array.h (100%) rename internal/{ => binding}/cpp/re2/prefilter.cc (100%) rename internal/{ => binding}/cpp/re2/prefilter.h (100%) rename internal/{ => binding}/cpp/re2/prefilter_tree.cc (100%) rename internal/{ => binding}/cpp/re2/prefilter_tree.h (100%) rename internal/{ => binding}/cpp/re2/prog.cc (100%) rename internal/{ => binding}/cpp/re2/prog.h (100%) rename internal/{ => binding}/cpp/re2/re2.cc (100%) rename internal/{ => binding}/cpp/re2/re2.h (100%) rename internal/{ => binding}/cpp/re2/regexp.cc (100%) rename internal/{ => binding}/cpp/re2/regexp.h (100%) rename internal/{ => binding}/cpp/re2/set.cc (100%) rename internal/{ => binding}/cpp/re2/set.h (100%) rename internal/{ => binding}/cpp/re2/simplify.cc (100%) rename internal/{ => binding}/cpp/re2/sparse_array.h (100%) rename internal/{ => binding}/cpp/re2/sparse_set.h (100%) rename internal/{ => binding}/cpp/re2/stringpiece.cc (100%) rename internal/{ => binding}/cpp/re2/stringpiece.h (100%) rename internal/{ => binding}/cpp/re2/tostring.cc (100%) rename internal/{ => binding}/cpp/re2/unicode_casefold.cc (100%) rename internal/{ => binding}/cpp/re2/unicode_casefold.h (100%) rename internal/{ => binding}/cpp/re2/unicode_groups.cc (100%) rename internal/{ => binding}/cpp/re2/unicode_groups.h (100%) rename internal/{ => binding}/cpp/re2/walker-inl.h (100%) rename internal/{ => binding}/cpp/stemmer/api.cpp (100%) rename internal/{ => binding}/cpp/stemmer/api.h (100%) rename internal/{ => binding}/cpp/stemmer/header.h (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_danish.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_danish.h (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_dutch.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_dutch.h (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_english.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_english.h (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_finnish.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_finnish.h (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_french.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_french.h (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_german.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_german.h (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_hungarian.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_hungarian.h (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_italian.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_italian.h (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_norwegian.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_norwegian.h (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_porter.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_porter.h (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_portuguese.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_portuguese.h (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_romanian.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_romanian.h (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_russian.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_russian.h (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_spanish.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_spanish.h (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_swedish.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_swedish.h (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_turkish.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stem_UTF_8_turkish.h (100%) rename internal/{ => binding}/cpp/stemmer/stemmer.cpp (100%) rename internal/{ => binding}/cpp/stemmer/stemmer.h (100%) rename internal/{ => binding}/cpp/stemmer/utilities.cpp (100%) rename internal/{ => binding}/cpp/string_utils.h (100%) rename internal/{ => binding}/cpp/term.cpp (100%) rename internal/{ => binding}/cpp/term.h (100%) rename internal/{ => binding}/cpp/thinc_ner.cpp (100%) rename internal/{ => binding}/cpp/thinc_ner.h (100%) rename internal/{ => binding}/cpp/thinc_parser.cpp (100%) rename internal/{ => binding}/cpp/thinc_parser.h (100%) rename internal/{ => binding}/cpp/tokenizer.cpp (100%) rename internal/{ => binding}/cpp/tokenizer.h (100%) rename internal/{ => binding}/cpp/util/logging.h (100%) rename internal/{ => binding}/cpp/util/mix.h (100%) rename internal/{ => binding}/cpp/util/mutex.h (100%) rename internal/{ => binding}/cpp/util/rune.cc (100%) rename internal/{ => binding}/cpp/util/strutil.cc (100%) rename internal/{ => binding}/cpp/util/strutil.h (100%) rename internal/{ => binding}/cpp/util/utf.h (100%) rename internal/{ => binding}/cpp/util/util.h (100%) rename internal/{ => binding}/cpp/wordnet_lemmatizer.cpp (100%) rename internal/{ => binding}/cpp/wordnet_lemmatizer.h (100%) rename internal/handler/{datasets.go => dataset.go} (98%) rename internal/handler/{datasets_aggregate_tags_test.go => dataset_aggregate_tags_test.go} (100%) rename internal/handler/{datasets_list_tags_test.go => dataset_list_tags_test.go} (100%) rename internal/handler/{datasets_rename_tag_test.go => dataset_rename_tag_test.go} (100%) rename internal/handler/{datasets_search_test.go => dataset_search_test.go} (100%) delete mode 100644 internal/handler/kb.go rename internal/service/{kg => graph}/pipeline.go (99%) rename internal/service/{kg => graph}/retrieval.go (99%) rename internal/service/{kg => graph}/retrieval_test.go (99%) rename internal/service/{kg => graph}/scoring.go (99%) rename internal/service/{kg => graph}/search.go (99%) rename internal/service/{kg => graph}/search_test.go (99%) rename internal/service/{kg => graph}/testutil_test.go (76%) rename internal/service/{kg => graph}/types.go (99%) delete mode 100644 internal/service/kb.go diff --git a/.dockerignore b/.dockerignore index 1eaf4258fa..1ed7cfa457 100644 --- a/.dockerignore +++ b/.dockerignore @@ -27,9 +27,9 @@ docker/oceanbase/ docker/seekdb/ # ── Go and C++ build outputs ──────────────────────────────────────────────── -internal/cpp/build/ -internal/cpp/cmake-build-release/ -internal/cpp/cmake-build-debug/ +internal/binding/cpp/build/ +internal/binding/cpp/cmake-build-release/ +internal/binding/cpp/cmake-build-debug/ target/ # ── ragflow_deps build context (built as a separate image, mounted ── diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d8f36e09cc..8fafaba6fd 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -350,8 +350,8 @@ jobs: set -euo pipefail cmake_args=( - -S internal/cpp - -B internal/cpp/cmake-build-release + -S internal/binding/cpp + -B internal/binding/cpp/cmake-build-release -DCMAKE_BUILD_TYPE=Release ) @@ -366,9 +366,9 @@ jobs: fi cmake "${cmake_args[@]}" - cmake --build internal/cpp/cmake-build-release --target rag_tokenizer_c_api --parallel + cmake --build internal/binding/cpp/cmake-build-release --target rag_tokenizer_c_api --parallel - test -f internal/cpp/cmake-build-release/librag_tokenizer_c_api.a + test -f internal/binding/cpp/cmake-build-release/librag_tokenizer_c_api.a - name: Build rag tokenizer native library if: runner.os == 'Windows' @@ -390,16 +390,16 @@ jobs: test -f "${MINGW_PREFIX}/include/simde/x86/sse4.1.h" - cmake -S internal/cpp -B internal/cpp/cmake-build-release -G Ninja \ + cmake -S internal/binding/cpp -B internal/binding/cpp/cmake-build-release -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER="$(cygpath -m "${cc_path}")" \ -DCMAKE_CXX_COMPILER="$(cygpath -m "${cxx_path}")" \ -DCMAKE_C_FLAGS="-I${MINGW_PREFIX}/include" \ -DCMAKE_CXX_FLAGS="-I${MINGW_PREFIX}/include" - cmake --build internal/cpp/cmake-build-release --target rag_tokenizer_c_api --parallel + cmake --build internal/binding/cpp/cmake-build-release --target rag_tokenizer_c_api --parallel - test -f internal/cpp/cmake-build-release/librag_tokenizer_c_api.a + test -f internal/binding/cpp/cmake-build-release/librag_tokenizer_c_api.a - name: Build Go CLI release binaries if: runner.os != 'Windows' @@ -492,7 +492,7 @@ jobs: Write-Error "liboffice_oxide.a does not exist: ${officeOxidePrefix}/lib/liboffice_oxide.a" exit 1 } - if (-not (Test-Path "internal/cpp/cmake-build-release/librag_tokenizer_c_api.a")) { + if (-not (Test-Path "internal/binding/cpp/cmake-build-release/librag_tokenizer_c_api.a")) { Write-Error "librag_tokenizer_c_api.a does not exist" exit 1 } @@ -501,7 +501,7 @@ jobs: exit 1 } - $ragTokenizerLib = (Resolve-Path "internal/cpp/cmake-build-release/librag_tokenizer_c_api.a").Path -replace '\\', '/' + $ragTokenizerLib = (Resolve-Path "internal/binding/cpp/cmake-build-release/librag_tokenizer_c_api.a").Path -replace '\\', '/' $pcre2LibDir = $env:PCRE2_LIBDIR -replace '\\', '/' $pcre2IncludeDir = $env:PCRE2_INCLUDEDIR -replace '\\', '/' diff --git a/.github/workflows/sep-tests.yml b/.github/workflows/sep-tests.yml index 0cca2714a5..76a5e8eb23 100644 --- a/.github/workflows/sep-tests.yml +++ b/.github/workflows/sep-tests.yml @@ -259,7 +259,7 @@ jobs: -e TZ="${TZ}" \ -e UV_INDEX=https://mirrors.aliyun.com/pypi/simple \ -v "${PWD}:/ragflow" \ - -v "${PWD}/internal/cpp/resource:/usr/share/infinity/resource" \ + -v "${PWD}/internal/binding/cpp/resource:/usr/share/infinity/resource" \ infiniflow/infinity_builder:ubuntu22_clang20 sudo docker exec "${BUILDER_CONTAINER}" bash -c 'git config --global safe.directory "*" && cd /ragflow && ./build.sh --cpp' ./build.sh --go @@ -716,7 +716,7 @@ jobs: -e TZ="${TZ}" \ -e UV_INDEX=https://mirrors.aliyun.com/pypi/simple \ -v "${PWD}:/ragflow" \ - -v "${PWD}/internal/cpp/resource:/usr/share/infinity/resource" \ + -v "${PWD}/internal/binding/cpp/resource:/usr/share/infinity/resource" \ infiniflow/infinity_builder:ubuntu22_clang20 sudo docker exec "${BUILDER_CONTAINER}" bash -c 'git config --global safe.directory "*" && cd /ragflow && ./build.sh --cpp' ./build.sh --go diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 211856ff4d..a42c094804 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -178,7 +178,7 @@ jobs: -e TZ="${TZ}" \ -e UV_INDEX=https://mirrors.aliyun.com/pypi/simple \ -v "${PWD}:/ragflow" \ - -v "${PWD}/internal/cpp/resource:/usr/share/infinity/resource" \ + -v "${PWD}/internal/binding/cpp/resource:/usr/share/infinity/resource" \ infiniflow/infinity_builder:ubuntu22_clang20 sudo docker exec "${BUILDER_CONTAINER}" bash -c 'git config --global safe.directory "*" && cd /ragflow && ./build.sh --cpp' ./build.sh --go @@ -622,7 +622,7 @@ jobs: -e TZ="${TZ}" \ -e UV_INDEX=https://mirrors.aliyun.com/pypi/simple \ -v "${PWD}:/ragflow" \ - -v "${PWD}/internal/cpp/resource:/usr/share/infinity/resource" \ + -v "${PWD}/internal/binding/cpp/resource:/usr/share/infinity/resource" \ infiniflow/infinity_builder:ubuntu22_clang20 sudo docker exec "${BUILDER_CONTAINER}" bash -c 'git config --global safe.directory "*" && cd /ragflow && ./build.sh --cpp' ./build.sh --go diff --git a/.gitignore b/.gitignore index 2a0629d120..00f3c81956 100644 --- a/.gitignore +++ b/.gitignore @@ -225,9 +225,9 @@ uv-aarch64-unknown-linux-gnu.tar.gz docker/launch_backend_service_windows.sh # C++ build directories -internal/cpp/build/ -internal/cpp/cmake-build-release/ -internal/cpp/cmake-build-debug/ +internal/binding/cpp/build/ +internal/binding/cpp/cmake-build-release/ +internal/binding/cpp/cmake-build-debug/ # Trae IDE config .trae/ diff --git a/.rooignore b/.rooignore index 0f8f6269da..3efdabf7eb 100644 --- a/.rooignore +++ b/.rooignore @@ -72,9 +72,9 @@ docker/seekdb # Native / compiled build dirs target/ bin/ -internal/cpp/build/ -internal/cpp/cmake-build-release/ -internal/cpp/cmake-build-debug/ +internal/binding/cpp/build/ +internal/binding/cpp/cmake-build-release/ +internal/binding/cpp/cmake-build-debug/ # Optional: skip tests and docs from indexing # test/ diff --git a/build.sh b/build.sh index 42082b8a7d..22ed55771f 100755 --- a/build.sh +++ b/build.sh @@ -12,9 +12,9 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$SCRIPT_DIR" # Build directories -CPP_DIR="$PROJECT_ROOT/internal/cpp" +CPP_DIR="$PROJECT_ROOT/internal/binding/cpp" BUILD_DIR="$CPP_DIR/cmake-build-release" -RAGFLOW_MAIN_BINARY="$PROJECT_ROOT/bin/ragflow_main" +RAGFLOW_SERVER_BINARY="$PROJECT_ROOT/bin/ragflow_server" RAGFLOW_CLI_BINARY="$PROJECT_ROOT/bin/ragflow-cli" # Strip symbols from Go binaries (set via --strip / -s) @@ -301,20 +301,20 @@ build_go() { local strip_flags=() [ -n "$STRIP_SYMBOLS" ] && strip_flags=(-ldflags="-s -w") - echo "Building RAGFlow binary: $RAGFLOW_MAIN_BINARY, and $RAGFLOW_CLI_BINARY" + echo "Building RAGFlow binary: $RAGFLOW_SERVER_BINARY, and $RAGFLOW_CLI_BINARY" GOPROXY=${GOPROXY:-https://goproxy.cn,https://proxy.golang.org,direct} CGO_ENABLED=1 \ CGO_CFLAGS="$CGO_CFLAGS" CGO_LDFLAGS="$CGO_LDFLAGS" \ - go build "${strip_flags[@]}" -o "$RAGFLOW_MAIN_BINARY" cmd/ragflow_main.go + go build "${strip_flags[@]}" -o "$RAGFLOW_SERVER_BINARY" cmd/ragflow_server.go GOPROXY=${GOPROXY:-https://goproxy.cn,https://proxy.golang.org,direct} CGO_ENABLED=1 \ CGO_CFLAGS="$CGO_CFLAGS" CGO_LDFLAGS="$CGO_LDFLAGS" \ go build "${strip_flags[@]}" -o "$RAGFLOW_CLI_BINARY" cmd/ragflow-cli.go - if [ ! -f "$RAGFLOW_MAIN_BINARY" ]; then + if [ ! -f "$RAGFLOW_SERVER_BINARY" ]; then echo -e "${RED}Error: Failed to build RAGFlow main binary${NC}" exit 1 fi - echo -e "${GREEN}✓ Go ragflow_main built successfully: $RAGFLOW_MAIN_BINARY${NC}" + echo -e "${GREEN}✓ Go ragflow_server built successfully: $RAGFLOW_SERVER_BINARY${NC}" echo -e "${GREEN}✓ Go ragflow-cli built successfully: $RAGFLOW_CLI_BINARY${NC}" } @@ -405,7 +405,7 @@ clean() { print_section "Cleaning build artifacts" rm -rf "$BUILD_DIR" - rm -f "$RAGFLOW_MAIN_BINARY" + rm -f "$RAGFLOW_SERVER_BINARY" rm -f "$RAGFLOW_CLI_BINARY" echo -e "${GREEN}✓ Build artifacts cleaned${NC}" @@ -413,8 +413,8 @@ clean() { # Run the server run() { - if [ ! -f "$RAGFLOW_MAIN_BINARY" ]; then - echo -e "${RED}Error: $RAGFLOW_MAIN_BINARY not found. Build first with --all or --go${NC}" + if [ ! -f "$RAGFLOW_SERVER_BINARY" ]; then + echo -e "${RED}Error: $RAGFLOW_SERVER_BINARY not found. Build first with --all or --go${NC}" exit 1 fi @@ -423,7 +423,7 @@ run() { # admin_server must be running before ragflow_server, otherwise ragflow_server's # heartbeats to admin will error out (see internal/development.md). print_section "Starting admin server (background)" - "$RAGFLOW_MAIN_BINARY" --admin & + "$RAGFLOW_SERVER_BINARY" --admin & ADMIN_PID=$! trap 'kill "$ADMIN_PID" 2>/dev/null || true' EXIT INT TERM @@ -432,13 +432,13 @@ run() { sleep 1 print_section "Starting ingestor (background)" - "$RAGFLOW_MAIN_BINARY" --ingestor & + "$RAGFLOW_SERVER_BINARY" --ingestor & INGESTOR_PID=$! trap 'kill "$INGESTOR_PID" 2>/dev/null || true' EXIT INT TERM sleep 1 print_section "Starting RAGFlow server (foreground)" - "$RAGFLOW_MAIN_BINARY" -- api + "$RAGFLOW_SERVER_BINARY" -- api } # Show help @@ -540,7 +540,7 @@ main() { build_cpp build_go echo -e "\n${GREEN}=== Build completed successfully! ===${NC}" - echo "Binary: $RAGFLOW_MAIN_BINARY, $RAGFLOW_CLI_BINARY" + echo "Binary: $RAGFLOW_SERVER_BINARY, $RAGFLOW_CLI_BINARY" ;; *) echo -e "${RED}Unknown option: $1${NC}" diff --git a/cmd/ragflow_main.go b/cmd/ragflow_server.go similarity index 99% rename from cmd/ragflow_main.go rename to cmd/ragflow_server.go index 60e3f8c00a..f933c864a5 100644 --- a/cmd/ragflow_main.go +++ b/cmd/ragflow_server.go @@ -661,7 +661,6 @@ func startServer(config *server.Config) { userService := service.NewUserService() documentService := service.NewDocumentService() datasetsService := service.NewDatasetService() - datasetService := service.NewKnowledgebaseService() metadataService := service.NewMetadataService() chunkService := chunk.NewChunkService() llmService := service.NewLLMService() @@ -689,11 +688,10 @@ func startServer(config *server.Config) { // Initialize handler layer authHandler := handler.NewAuthHandler() userHandler := handler.NewUserHandler(userService) - tenantHandler := handler.NewTenantHandler(tenantService, userService, datasetService) + tenantHandler := handler.NewTenantHandler(tenantService, userService, datasetsService) documentHandler := handler.NewDocumentHandler(documentService, datasetsService) datasetsHandler := handler.NewDatasetsHandler(datasetsService, metadataService) systemHandler := handler.NewSystemHandler(systemService) - datasetHandler := handler.NewKnowledgebaseHandler(datasetService, userService, documentService) chunkHandler := handler.NewChunkHandler(chunkService, userService) llmHandler := handler.NewLLMHandler(llmService, userService) chatHandler := handler.NewChatHandler(chatService, userService) @@ -775,7 +773,7 @@ func startServer(config *server.Config) { docDAO := documentDAO retrievalService := nlp.NewRetrievalService(docEngine, docDAO) difyRetrievalHandler := handler.NewDifyRetrievalHandler( - datasetService, + datasetsService, modelProviderService, metadataService, retrievalService, @@ -807,7 +805,6 @@ func startServer(config *server.Config) { documentHandler, datasetsHandler, systemHandler, - datasetHandler, chunkHandler, llmHandler, chatHandler, diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 915740f422..22339d829f 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -278,7 +278,7 @@ if [[ "${ENABLE_ADMIN_SERVER}" -eq 1 ]]; then if [[ "${API_PROXY_SCHEME}" == "hybrid" ]] || [[ "${API_PROXY_SCHEME}" == "go" ]]; then while true; do echo "Starting Admin go server..." - bin/ragflow_main --admin + bin/ragflow_server --admin echo "Admin go server started." sleep 1; done & @@ -301,7 +301,7 @@ if [[ "${ENABLE_WEBSERVER}" -eq 1 ]]; then if [[ "${API_PROXY_SCHEME}" == "hybrid" ]] || [[ "${API_PROXY_SCHEME}" == "go" ]]; then while true; do echo "Starting RAGFlow go server..." - bin/ragflow_main --api + bin/ragflow_server --api echo "RAGFlow go server started." sleep 1; done & @@ -335,7 +335,7 @@ if [[ "${ENABLE_TASKEXECUTOR}" -eq 1 ]]; then if [[ "${API_PROXY_SCHEME}" == "hybrid" ]] || [[ "${API_PROXY_SCHEME}" == "go" ]]; then while true; do echo "Starting go ingestor..." - bin/ragflow_main --ingestor + bin/ragflow_server --ingestor sleep 1; done & fi @@ -353,7 +353,7 @@ if [[ "${ENABLE_TASKEXECUTOR}" -eq 1 ]]; then if [[ "${API_PROXY_SCHEME}" == "hybrid" ]] || [[ "${API_PROXY_SCHEME}" == "go" ]]; then while true; do echo "Starting go ingestor..." - bin/ragflow_main --ingestor + bin/ragflow_server --ingestor sleep 1; done & fi diff --git a/internal/cpp/CMakeLists.txt b/internal/binding/cpp/CMakeLists.txt similarity index 100% rename from internal/cpp/CMakeLists.txt rename to internal/binding/cpp/CMakeLists.txt diff --git a/internal/cpp/Makefile b/internal/binding/cpp/Makefile similarity index 100% rename from internal/cpp/Makefile rename to internal/binding/cpp/Makefile diff --git a/internal/cpp/analyzer.h b/internal/binding/cpp/analyzer.h similarity index 100% rename from internal/cpp/analyzer.h rename to internal/binding/cpp/analyzer.h diff --git a/internal/cpp/dart_trie.h b/internal/binding/cpp/dart_trie.h similarity index 100% rename from internal/cpp/dart_trie.h rename to internal/binding/cpp/dart_trie.h diff --git a/internal/cpp/darts/darts.h b/internal/binding/cpp/darts/darts.h similarity index 100% rename from internal/cpp/darts/darts.h rename to internal/binding/cpp/darts/darts.h diff --git a/internal/cpp/darts_trie.cpp b/internal/binding/cpp/darts_trie.cpp similarity index 100% rename from internal/cpp/darts_trie.cpp rename to internal/binding/cpp/darts_trie.cpp diff --git a/internal/cpp/main.cpp b/internal/binding/cpp/main.cpp similarity index 100% rename from internal/cpp/main.cpp rename to internal/binding/cpp/main.cpp diff --git a/internal/cpp/opencc/config_reader.c b/internal/binding/cpp/opencc/config_reader.c similarity index 100% rename from internal/cpp/opencc/config_reader.c rename to internal/binding/cpp/opencc/config_reader.c diff --git a/internal/cpp/opencc/config_reader.h b/internal/binding/cpp/opencc/config_reader.h similarity index 100% rename from internal/cpp/opencc/config_reader.h rename to internal/binding/cpp/opencc/config_reader.h diff --git a/internal/cpp/opencc/converter.c b/internal/binding/cpp/opencc/converter.c similarity index 100% rename from internal/cpp/opencc/converter.c rename to internal/binding/cpp/opencc/converter.c diff --git a/internal/cpp/opencc/converter.h b/internal/binding/cpp/opencc/converter.h similarity index 100% rename from internal/cpp/opencc/converter.h rename to internal/binding/cpp/opencc/converter.h diff --git a/internal/cpp/opencc/dictionary/abstract.c b/internal/binding/cpp/opencc/dictionary/abstract.c similarity index 100% rename from internal/cpp/opencc/dictionary/abstract.c rename to internal/binding/cpp/opencc/dictionary/abstract.c diff --git a/internal/cpp/opencc/dictionary/abstract.h b/internal/binding/cpp/opencc/dictionary/abstract.h similarity index 100% rename from internal/cpp/opencc/dictionary/abstract.h rename to internal/binding/cpp/opencc/dictionary/abstract.h diff --git a/internal/cpp/opencc/dictionary/datrie.c b/internal/binding/cpp/opencc/dictionary/datrie.c similarity index 100% rename from internal/cpp/opencc/dictionary/datrie.c rename to internal/binding/cpp/opencc/dictionary/datrie.c diff --git a/internal/cpp/opencc/dictionary/datrie.h b/internal/binding/cpp/opencc/dictionary/datrie.h similarity index 100% rename from internal/cpp/opencc/dictionary/datrie.h rename to internal/binding/cpp/opencc/dictionary/datrie.h diff --git a/internal/cpp/opencc/dictionary/text.c b/internal/binding/cpp/opencc/dictionary/text.c similarity index 100% rename from internal/cpp/opencc/dictionary/text.c rename to internal/binding/cpp/opencc/dictionary/text.c diff --git a/internal/cpp/opencc/dictionary/text.h b/internal/binding/cpp/opencc/dictionary/text.h similarity index 100% rename from internal/cpp/opencc/dictionary/text.h rename to internal/binding/cpp/opencc/dictionary/text.h diff --git a/internal/cpp/opencc/dictionary_group.c b/internal/binding/cpp/opencc/dictionary_group.c similarity index 100% rename from internal/cpp/opencc/dictionary_group.c rename to internal/binding/cpp/opencc/dictionary_group.c diff --git a/internal/cpp/opencc/dictionary_group.h b/internal/binding/cpp/opencc/dictionary_group.h similarity index 100% rename from internal/cpp/opencc/dictionary_group.h rename to internal/binding/cpp/opencc/dictionary_group.h diff --git a/internal/cpp/opencc/dictionary_set.c b/internal/binding/cpp/opencc/dictionary_set.c similarity index 100% rename from internal/cpp/opencc/dictionary_set.c rename to internal/binding/cpp/opencc/dictionary_set.c diff --git a/internal/cpp/opencc/dictionary_set.h b/internal/binding/cpp/opencc/dictionary_set.h similarity index 100% rename from internal/cpp/opencc/dictionary_set.h rename to internal/binding/cpp/opencc/dictionary_set.h diff --git a/internal/cpp/opencc/encoding.c b/internal/binding/cpp/opencc/encoding.c similarity index 100% rename from internal/cpp/opencc/encoding.c rename to internal/binding/cpp/opencc/encoding.c diff --git a/internal/cpp/opencc/encoding.h b/internal/binding/cpp/opencc/encoding.h similarity index 100% rename from internal/cpp/opencc/encoding.h rename to internal/binding/cpp/opencc/encoding.h diff --git a/internal/cpp/opencc/opencc.c b/internal/binding/cpp/opencc/opencc.c similarity index 100% rename from internal/cpp/opencc/opencc.c rename to internal/binding/cpp/opencc/opencc.c diff --git a/internal/cpp/opencc/opencc.h b/internal/binding/cpp/opencc/opencc.h similarity index 100% rename from internal/cpp/opencc/opencc.h rename to internal/binding/cpp/opencc/opencc.h diff --git a/internal/cpp/opencc/opencc_types.h b/internal/binding/cpp/opencc/opencc_types.h similarity index 100% rename from internal/cpp/opencc/opencc_types.h rename to internal/binding/cpp/opencc/opencc_types.h diff --git a/internal/cpp/opencc/openccxx.cpp b/internal/binding/cpp/opencc/openccxx.cpp similarity index 100% rename from internal/cpp/opencc/openccxx.cpp rename to internal/binding/cpp/opencc/openccxx.cpp diff --git a/internal/cpp/opencc/openccxx.h b/internal/binding/cpp/opencc/openccxx.h similarity index 100% rename from internal/cpp/opencc/openccxx.h rename to internal/binding/cpp/opencc/openccxx.h diff --git a/internal/cpp/opencc/utils.c b/internal/binding/cpp/opencc/utils.c similarity index 100% rename from internal/cpp/opencc/utils.c rename to internal/binding/cpp/opencc/utils.c diff --git a/internal/cpp/opencc/utils.h b/internal/binding/cpp/opencc/utils.h similarity index 100% rename from internal/cpp/opencc/utils.h rename to internal/binding/cpp/opencc/utils.h diff --git a/internal/cpp/pcre2.h b/internal/binding/cpp/pcre2.h similarity index 100% rename from internal/cpp/pcre2.h rename to internal/binding/cpp/pcre2.h diff --git a/internal/cpp/pcre2posix.h b/internal/binding/cpp/pcre2posix.h similarity index 100% rename from internal/cpp/pcre2posix.h rename to internal/binding/cpp/pcre2posix.h diff --git a/internal/cpp/rag_analyzer.cpp b/internal/binding/cpp/rag_analyzer.cpp similarity index 100% rename from internal/cpp/rag_analyzer.cpp rename to internal/binding/cpp/rag_analyzer.cpp diff --git a/internal/cpp/rag_analyzer.h b/internal/binding/cpp/rag_analyzer.h similarity index 100% rename from internal/cpp/rag_analyzer.h rename to internal/binding/cpp/rag_analyzer.h diff --git a/internal/cpp/rag_analyzer_c_api.cpp b/internal/binding/cpp/rag_analyzer_c_api.cpp similarity index 100% rename from internal/cpp/rag_analyzer_c_api.cpp rename to internal/binding/cpp/rag_analyzer_c_api.cpp diff --git a/internal/cpp/rag_analyzer_c_api.h b/internal/binding/cpp/rag_analyzer_c_api.h similarity index 100% rename from internal/cpp/rag_analyzer_c_api.h rename to internal/binding/cpp/rag_analyzer_c_api.h diff --git a/internal/cpp/rag_analyzer_c_api_debug.cpp b/internal/binding/cpp/rag_analyzer_c_api_debug.cpp similarity index 100% rename from internal/cpp/rag_analyzer_c_api_debug.cpp rename to internal/binding/cpp/rag_analyzer_c_api_debug.cpp diff --git a/internal/cpp/rag_analyzer_c_test.cpp b/internal/binding/cpp/rag_analyzer_c_test.cpp similarity index 100% rename from internal/cpp/rag_analyzer_c_test.cpp rename to internal/binding/cpp/rag_analyzer_c_test.cpp diff --git a/internal/cpp/re2/bitmap256.cc b/internal/binding/cpp/re2/bitmap256.cc similarity index 100% rename from internal/cpp/re2/bitmap256.cc rename to internal/binding/cpp/re2/bitmap256.cc diff --git a/internal/cpp/re2/bitmap256.h b/internal/binding/cpp/re2/bitmap256.h similarity index 100% rename from internal/cpp/re2/bitmap256.h rename to internal/binding/cpp/re2/bitmap256.h diff --git a/internal/cpp/re2/bitstate.cc b/internal/binding/cpp/re2/bitstate.cc similarity index 100% rename from internal/cpp/re2/bitstate.cc rename to internal/binding/cpp/re2/bitstate.cc diff --git a/internal/cpp/re2/compile.cc b/internal/binding/cpp/re2/compile.cc similarity index 100% rename from internal/cpp/re2/compile.cc rename to internal/binding/cpp/re2/compile.cc diff --git a/internal/cpp/re2/dfa.cc b/internal/binding/cpp/re2/dfa.cc similarity index 100% rename from internal/cpp/re2/dfa.cc rename to internal/binding/cpp/re2/dfa.cc diff --git a/internal/cpp/re2/filtered_re2.cc b/internal/binding/cpp/re2/filtered_re2.cc similarity index 100% rename from internal/cpp/re2/filtered_re2.cc rename to internal/binding/cpp/re2/filtered_re2.cc diff --git a/internal/cpp/re2/filtered_re2.h b/internal/binding/cpp/re2/filtered_re2.h similarity index 100% rename from internal/cpp/re2/filtered_re2.h rename to internal/binding/cpp/re2/filtered_re2.h diff --git a/internal/cpp/re2/mimics_pcre.cc b/internal/binding/cpp/re2/mimics_pcre.cc similarity index 100% rename from internal/cpp/re2/mimics_pcre.cc rename to internal/binding/cpp/re2/mimics_pcre.cc diff --git a/internal/cpp/re2/nfa.cc b/internal/binding/cpp/re2/nfa.cc similarity index 100% rename from internal/cpp/re2/nfa.cc rename to internal/binding/cpp/re2/nfa.cc diff --git a/internal/cpp/re2/onepass.cc b/internal/binding/cpp/re2/onepass.cc similarity index 100% rename from internal/cpp/re2/onepass.cc rename to internal/binding/cpp/re2/onepass.cc diff --git a/internal/cpp/re2/parse.cc b/internal/binding/cpp/re2/parse.cc similarity index 100% rename from internal/cpp/re2/parse.cc rename to internal/binding/cpp/re2/parse.cc diff --git a/internal/cpp/re2/perl_groups.cc b/internal/binding/cpp/re2/perl_groups.cc similarity index 100% rename from internal/cpp/re2/perl_groups.cc rename to internal/binding/cpp/re2/perl_groups.cc diff --git a/internal/cpp/re2/pod_array.h b/internal/binding/cpp/re2/pod_array.h similarity index 100% rename from internal/cpp/re2/pod_array.h rename to internal/binding/cpp/re2/pod_array.h diff --git a/internal/cpp/re2/prefilter.cc b/internal/binding/cpp/re2/prefilter.cc similarity index 100% rename from internal/cpp/re2/prefilter.cc rename to internal/binding/cpp/re2/prefilter.cc diff --git a/internal/cpp/re2/prefilter.h b/internal/binding/cpp/re2/prefilter.h similarity index 100% rename from internal/cpp/re2/prefilter.h rename to internal/binding/cpp/re2/prefilter.h diff --git a/internal/cpp/re2/prefilter_tree.cc b/internal/binding/cpp/re2/prefilter_tree.cc similarity index 100% rename from internal/cpp/re2/prefilter_tree.cc rename to internal/binding/cpp/re2/prefilter_tree.cc diff --git a/internal/cpp/re2/prefilter_tree.h b/internal/binding/cpp/re2/prefilter_tree.h similarity index 100% rename from internal/cpp/re2/prefilter_tree.h rename to internal/binding/cpp/re2/prefilter_tree.h diff --git a/internal/cpp/re2/prog.cc b/internal/binding/cpp/re2/prog.cc similarity index 100% rename from internal/cpp/re2/prog.cc rename to internal/binding/cpp/re2/prog.cc diff --git a/internal/cpp/re2/prog.h b/internal/binding/cpp/re2/prog.h similarity index 100% rename from internal/cpp/re2/prog.h rename to internal/binding/cpp/re2/prog.h diff --git a/internal/cpp/re2/re2.cc b/internal/binding/cpp/re2/re2.cc similarity index 100% rename from internal/cpp/re2/re2.cc rename to internal/binding/cpp/re2/re2.cc diff --git a/internal/cpp/re2/re2.h b/internal/binding/cpp/re2/re2.h similarity index 100% rename from internal/cpp/re2/re2.h rename to internal/binding/cpp/re2/re2.h diff --git a/internal/cpp/re2/regexp.cc b/internal/binding/cpp/re2/regexp.cc similarity index 100% rename from internal/cpp/re2/regexp.cc rename to internal/binding/cpp/re2/regexp.cc diff --git a/internal/cpp/re2/regexp.h b/internal/binding/cpp/re2/regexp.h similarity index 100% rename from internal/cpp/re2/regexp.h rename to internal/binding/cpp/re2/regexp.h diff --git a/internal/cpp/re2/set.cc b/internal/binding/cpp/re2/set.cc similarity index 100% rename from internal/cpp/re2/set.cc rename to internal/binding/cpp/re2/set.cc diff --git a/internal/cpp/re2/set.h b/internal/binding/cpp/re2/set.h similarity index 100% rename from internal/cpp/re2/set.h rename to internal/binding/cpp/re2/set.h diff --git a/internal/cpp/re2/simplify.cc b/internal/binding/cpp/re2/simplify.cc similarity index 100% rename from internal/cpp/re2/simplify.cc rename to internal/binding/cpp/re2/simplify.cc diff --git a/internal/cpp/re2/sparse_array.h b/internal/binding/cpp/re2/sparse_array.h similarity index 100% rename from internal/cpp/re2/sparse_array.h rename to internal/binding/cpp/re2/sparse_array.h diff --git a/internal/cpp/re2/sparse_set.h b/internal/binding/cpp/re2/sparse_set.h similarity index 100% rename from internal/cpp/re2/sparse_set.h rename to internal/binding/cpp/re2/sparse_set.h diff --git a/internal/cpp/re2/stringpiece.cc b/internal/binding/cpp/re2/stringpiece.cc similarity index 100% rename from internal/cpp/re2/stringpiece.cc rename to internal/binding/cpp/re2/stringpiece.cc diff --git a/internal/cpp/re2/stringpiece.h b/internal/binding/cpp/re2/stringpiece.h similarity index 100% rename from internal/cpp/re2/stringpiece.h rename to internal/binding/cpp/re2/stringpiece.h diff --git a/internal/cpp/re2/tostring.cc b/internal/binding/cpp/re2/tostring.cc similarity index 100% rename from internal/cpp/re2/tostring.cc rename to internal/binding/cpp/re2/tostring.cc diff --git a/internal/cpp/re2/unicode_casefold.cc b/internal/binding/cpp/re2/unicode_casefold.cc similarity index 100% rename from internal/cpp/re2/unicode_casefold.cc rename to internal/binding/cpp/re2/unicode_casefold.cc diff --git a/internal/cpp/re2/unicode_casefold.h b/internal/binding/cpp/re2/unicode_casefold.h similarity index 100% rename from internal/cpp/re2/unicode_casefold.h rename to internal/binding/cpp/re2/unicode_casefold.h diff --git a/internal/cpp/re2/unicode_groups.cc b/internal/binding/cpp/re2/unicode_groups.cc similarity index 100% rename from internal/cpp/re2/unicode_groups.cc rename to internal/binding/cpp/re2/unicode_groups.cc diff --git a/internal/cpp/re2/unicode_groups.h b/internal/binding/cpp/re2/unicode_groups.h similarity index 100% rename from internal/cpp/re2/unicode_groups.h rename to internal/binding/cpp/re2/unicode_groups.h diff --git a/internal/cpp/re2/walker-inl.h b/internal/binding/cpp/re2/walker-inl.h similarity index 100% rename from internal/cpp/re2/walker-inl.h rename to internal/binding/cpp/re2/walker-inl.h diff --git a/internal/cpp/stemmer/api.cpp b/internal/binding/cpp/stemmer/api.cpp similarity index 100% rename from internal/cpp/stemmer/api.cpp rename to internal/binding/cpp/stemmer/api.cpp diff --git a/internal/cpp/stemmer/api.h b/internal/binding/cpp/stemmer/api.h similarity index 100% rename from internal/cpp/stemmer/api.h rename to internal/binding/cpp/stemmer/api.h diff --git a/internal/cpp/stemmer/header.h b/internal/binding/cpp/stemmer/header.h similarity index 100% rename from internal/cpp/stemmer/header.h rename to internal/binding/cpp/stemmer/header.h diff --git a/internal/cpp/stemmer/stem_UTF_8_danish.cpp b/internal/binding/cpp/stemmer/stem_UTF_8_danish.cpp similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_danish.cpp rename to internal/binding/cpp/stemmer/stem_UTF_8_danish.cpp diff --git a/internal/cpp/stemmer/stem_UTF_8_danish.h b/internal/binding/cpp/stemmer/stem_UTF_8_danish.h similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_danish.h rename to internal/binding/cpp/stemmer/stem_UTF_8_danish.h diff --git a/internal/cpp/stemmer/stem_UTF_8_dutch.cpp b/internal/binding/cpp/stemmer/stem_UTF_8_dutch.cpp similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_dutch.cpp rename to internal/binding/cpp/stemmer/stem_UTF_8_dutch.cpp diff --git a/internal/cpp/stemmer/stem_UTF_8_dutch.h b/internal/binding/cpp/stemmer/stem_UTF_8_dutch.h similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_dutch.h rename to internal/binding/cpp/stemmer/stem_UTF_8_dutch.h diff --git a/internal/cpp/stemmer/stem_UTF_8_english.cpp b/internal/binding/cpp/stemmer/stem_UTF_8_english.cpp similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_english.cpp rename to internal/binding/cpp/stemmer/stem_UTF_8_english.cpp diff --git a/internal/cpp/stemmer/stem_UTF_8_english.h b/internal/binding/cpp/stemmer/stem_UTF_8_english.h similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_english.h rename to internal/binding/cpp/stemmer/stem_UTF_8_english.h diff --git a/internal/cpp/stemmer/stem_UTF_8_finnish.cpp b/internal/binding/cpp/stemmer/stem_UTF_8_finnish.cpp similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_finnish.cpp rename to internal/binding/cpp/stemmer/stem_UTF_8_finnish.cpp diff --git a/internal/cpp/stemmer/stem_UTF_8_finnish.h b/internal/binding/cpp/stemmer/stem_UTF_8_finnish.h similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_finnish.h rename to internal/binding/cpp/stemmer/stem_UTF_8_finnish.h diff --git a/internal/cpp/stemmer/stem_UTF_8_french.cpp b/internal/binding/cpp/stemmer/stem_UTF_8_french.cpp similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_french.cpp rename to internal/binding/cpp/stemmer/stem_UTF_8_french.cpp diff --git a/internal/cpp/stemmer/stem_UTF_8_french.h b/internal/binding/cpp/stemmer/stem_UTF_8_french.h similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_french.h rename to internal/binding/cpp/stemmer/stem_UTF_8_french.h diff --git a/internal/cpp/stemmer/stem_UTF_8_german.cpp b/internal/binding/cpp/stemmer/stem_UTF_8_german.cpp similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_german.cpp rename to internal/binding/cpp/stemmer/stem_UTF_8_german.cpp diff --git a/internal/cpp/stemmer/stem_UTF_8_german.h b/internal/binding/cpp/stemmer/stem_UTF_8_german.h similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_german.h rename to internal/binding/cpp/stemmer/stem_UTF_8_german.h diff --git a/internal/cpp/stemmer/stem_UTF_8_hungarian.cpp b/internal/binding/cpp/stemmer/stem_UTF_8_hungarian.cpp similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_hungarian.cpp rename to internal/binding/cpp/stemmer/stem_UTF_8_hungarian.cpp diff --git a/internal/cpp/stemmer/stem_UTF_8_hungarian.h b/internal/binding/cpp/stemmer/stem_UTF_8_hungarian.h similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_hungarian.h rename to internal/binding/cpp/stemmer/stem_UTF_8_hungarian.h diff --git a/internal/cpp/stemmer/stem_UTF_8_italian.cpp b/internal/binding/cpp/stemmer/stem_UTF_8_italian.cpp similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_italian.cpp rename to internal/binding/cpp/stemmer/stem_UTF_8_italian.cpp diff --git a/internal/cpp/stemmer/stem_UTF_8_italian.h b/internal/binding/cpp/stemmer/stem_UTF_8_italian.h similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_italian.h rename to internal/binding/cpp/stemmer/stem_UTF_8_italian.h diff --git a/internal/cpp/stemmer/stem_UTF_8_norwegian.cpp b/internal/binding/cpp/stemmer/stem_UTF_8_norwegian.cpp similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_norwegian.cpp rename to internal/binding/cpp/stemmer/stem_UTF_8_norwegian.cpp diff --git a/internal/cpp/stemmer/stem_UTF_8_norwegian.h b/internal/binding/cpp/stemmer/stem_UTF_8_norwegian.h similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_norwegian.h rename to internal/binding/cpp/stemmer/stem_UTF_8_norwegian.h diff --git a/internal/cpp/stemmer/stem_UTF_8_porter.cpp b/internal/binding/cpp/stemmer/stem_UTF_8_porter.cpp similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_porter.cpp rename to internal/binding/cpp/stemmer/stem_UTF_8_porter.cpp diff --git a/internal/cpp/stemmer/stem_UTF_8_porter.h b/internal/binding/cpp/stemmer/stem_UTF_8_porter.h similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_porter.h rename to internal/binding/cpp/stemmer/stem_UTF_8_porter.h diff --git a/internal/cpp/stemmer/stem_UTF_8_portuguese.cpp b/internal/binding/cpp/stemmer/stem_UTF_8_portuguese.cpp similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_portuguese.cpp rename to internal/binding/cpp/stemmer/stem_UTF_8_portuguese.cpp diff --git a/internal/cpp/stemmer/stem_UTF_8_portuguese.h b/internal/binding/cpp/stemmer/stem_UTF_8_portuguese.h similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_portuguese.h rename to internal/binding/cpp/stemmer/stem_UTF_8_portuguese.h diff --git a/internal/cpp/stemmer/stem_UTF_8_romanian.cpp b/internal/binding/cpp/stemmer/stem_UTF_8_romanian.cpp similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_romanian.cpp rename to internal/binding/cpp/stemmer/stem_UTF_8_romanian.cpp diff --git a/internal/cpp/stemmer/stem_UTF_8_romanian.h b/internal/binding/cpp/stemmer/stem_UTF_8_romanian.h similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_romanian.h rename to internal/binding/cpp/stemmer/stem_UTF_8_romanian.h diff --git a/internal/cpp/stemmer/stem_UTF_8_russian.cpp b/internal/binding/cpp/stemmer/stem_UTF_8_russian.cpp similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_russian.cpp rename to internal/binding/cpp/stemmer/stem_UTF_8_russian.cpp diff --git a/internal/cpp/stemmer/stem_UTF_8_russian.h b/internal/binding/cpp/stemmer/stem_UTF_8_russian.h similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_russian.h rename to internal/binding/cpp/stemmer/stem_UTF_8_russian.h diff --git a/internal/cpp/stemmer/stem_UTF_8_spanish.cpp b/internal/binding/cpp/stemmer/stem_UTF_8_spanish.cpp similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_spanish.cpp rename to internal/binding/cpp/stemmer/stem_UTF_8_spanish.cpp diff --git a/internal/cpp/stemmer/stem_UTF_8_spanish.h b/internal/binding/cpp/stemmer/stem_UTF_8_spanish.h similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_spanish.h rename to internal/binding/cpp/stemmer/stem_UTF_8_spanish.h diff --git a/internal/cpp/stemmer/stem_UTF_8_swedish.cpp b/internal/binding/cpp/stemmer/stem_UTF_8_swedish.cpp similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_swedish.cpp rename to internal/binding/cpp/stemmer/stem_UTF_8_swedish.cpp diff --git a/internal/cpp/stemmer/stem_UTF_8_swedish.h b/internal/binding/cpp/stemmer/stem_UTF_8_swedish.h similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_swedish.h rename to internal/binding/cpp/stemmer/stem_UTF_8_swedish.h diff --git a/internal/cpp/stemmer/stem_UTF_8_turkish.cpp b/internal/binding/cpp/stemmer/stem_UTF_8_turkish.cpp similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_turkish.cpp rename to internal/binding/cpp/stemmer/stem_UTF_8_turkish.cpp diff --git a/internal/cpp/stemmer/stem_UTF_8_turkish.h b/internal/binding/cpp/stemmer/stem_UTF_8_turkish.h similarity index 100% rename from internal/cpp/stemmer/stem_UTF_8_turkish.h rename to internal/binding/cpp/stemmer/stem_UTF_8_turkish.h diff --git a/internal/cpp/stemmer/stemmer.cpp b/internal/binding/cpp/stemmer/stemmer.cpp similarity index 100% rename from internal/cpp/stemmer/stemmer.cpp rename to internal/binding/cpp/stemmer/stemmer.cpp diff --git a/internal/cpp/stemmer/stemmer.h b/internal/binding/cpp/stemmer/stemmer.h similarity index 100% rename from internal/cpp/stemmer/stemmer.h rename to internal/binding/cpp/stemmer/stemmer.h diff --git a/internal/cpp/stemmer/utilities.cpp b/internal/binding/cpp/stemmer/utilities.cpp similarity index 100% rename from internal/cpp/stemmer/utilities.cpp rename to internal/binding/cpp/stemmer/utilities.cpp diff --git a/internal/cpp/string_utils.h b/internal/binding/cpp/string_utils.h similarity index 100% rename from internal/cpp/string_utils.h rename to internal/binding/cpp/string_utils.h diff --git a/internal/cpp/term.cpp b/internal/binding/cpp/term.cpp similarity index 100% rename from internal/cpp/term.cpp rename to internal/binding/cpp/term.cpp diff --git a/internal/cpp/term.h b/internal/binding/cpp/term.h similarity index 100% rename from internal/cpp/term.h rename to internal/binding/cpp/term.h diff --git a/internal/cpp/thinc_ner.cpp b/internal/binding/cpp/thinc_ner.cpp similarity index 100% rename from internal/cpp/thinc_ner.cpp rename to internal/binding/cpp/thinc_ner.cpp diff --git a/internal/cpp/thinc_ner.h b/internal/binding/cpp/thinc_ner.h similarity index 100% rename from internal/cpp/thinc_ner.h rename to internal/binding/cpp/thinc_ner.h diff --git a/internal/cpp/thinc_parser.cpp b/internal/binding/cpp/thinc_parser.cpp similarity index 100% rename from internal/cpp/thinc_parser.cpp rename to internal/binding/cpp/thinc_parser.cpp diff --git a/internal/cpp/thinc_parser.h b/internal/binding/cpp/thinc_parser.h similarity index 100% rename from internal/cpp/thinc_parser.h rename to internal/binding/cpp/thinc_parser.h diff --git a/internal/cpp/tokenizer.cpp b/internal/binding/cpp/tokenizer.cpp similarity index 100% rename from internal/cpp/tokenizer.cpp rename to internal/binding/cpp/tokenizer.cpp diff --git a/internal/cpp/tokenizer.h b/internal/binding/cpp/tokenizer.h similarity index 100% rename from internal/cpp/tokenizer.h rename to internal/binding/cpp/tokenizer.h diff --git a/internal/cpp/util/logging.h b/internal/binding/cpp/util/logging.h similarity index 100% rename from internal/cpp/util/logging.h rename to internal/binding/cpp/util/logging.h diff --git a/internal/cpp/util/mix.h b/internal/binding/cpp/util/mix.h similarity index 100% rename from internal/cpp/util/mix.h rename to internal/binding/cpp/util/mix.h diff --git a/internal/cpp/util/mutex.h b/internal/binding/cpp/util/mutex.h similarity index 100% rename from internal/cpp/util/mutex.h rename to internal/binding/cpp/util/mutex.h diff --git a/internal/cpp/util/rune.cc b/internal/binding/cpp/util/rune.cc similarity index 100% rename from internal/cpp/util/rune.cc rename to internal/binding/cpp/util/rune.cc diff --git a/internal/cpp/util/strutil.cc b/internal/binding/cpp/util/strutil.cc similarity index 100% rename from internal/cpp/util/strutil.cc rename to internal/binding/cpp/util/strutil.cc diff --git a/internal/cpp/util/strutil.h b/internal/binding/cpp/util/strutil.h similarity index 100% rename from internal/cpp/util/strutil.h rename to internal/binding/cpp/util/strutil.h diff --git a/internal/cpp/util/utf.h b/internal/binding/cpp/util/utf.h similarity index 100% rename from internal/cpp/util/utf.h rename to internal/binding/cpp/util/utf.h diff --git a/internal/cpp/util/util.h b/internal/binding/cpp/util/util.h similarity index 100% rename from internal/cpp/util/util.h rename to internal/binding/cpp/util/util.h diff --git a/internal/cpp/wordnet_lemmatizer.cpp b/internal/binding/cpp/wordnet_lemmatizer.cpp similarity index 100% rename from internal/cpp/wordnet_lemmatizer.cpp rename to internal/binding/cpp/wordnet_lemmatizer.cpp diff --git a/internal/cpp/wordnet_lemmatizer.h b/internal/binding/cpp/wordnet_lemmatizer.h similarity index 100% rename from internal/cpp/wordnet_lemmatizer.h rename to internal/binding/cpp/wordnet_lemmatizer.h diff --git a/internal/binding/rag_analyzer.go b/internal/binding/rag_analyzer.go index 833b038eb8..90639fe4cd 100644 --- a/internal/binding/rag_analyzer.go +++ b/internal/binding/rag_analyzer.go @@ -18,13 +18,13 @@ package rag_analyzer /* #cgo CXXFLAGS: -std=c++20 -I${SRCDIR}/.. -#cgo linux LDFLAGS: ${SRCDIR}/../cpp/cmake-build-release/librag_tokenizer_c_api.a -lstdc++ -lm -lpthread -lpcre2-8 +#cgo linux LDFLAGS: ${SRCDIR}/cpp/cmake-build-release/librag_tokenizer_c_api.a -lstdc++ -lm -lpthread -lpcre2-8 // Apple Silicon: Homebrew installs to /opt/homebrew; Intel Macs keep /usr/local. -#cgo darwin,arm64 LDFLAGS: ${SRCDIR}/../cpp/cmake-build-release/librag_tokenizer_c_api.a -lstdc++ -lm -lpthread /opt/homebrew/lib/libpcre2-8.a -#cgo darwin,amd64 LDFLAGS: ${SRCDIR}/../cpp/cmake-build-release/librag_tokenizer_c_api.a -lstdc++ -lm -lpthread /usr/local/lib/libpcre2-8.a +#cgo darwin,arm64 LDFLAGS: ${SRCDIR}/cpp/cmake-build-release/librag_tokenizer_c_api.a -lstdc++ -lm -lpthread /opt/homebrew/lib/libpcre2-8.a +#cgo darwin,amd64 LDFLAGS: ${SRCDIR}/cpp/cmake-build-release/librag_tokenizer_c_api.a -lstdc++ -lm -lpthread /usr/local/lib/libpcre2-8.a #include -#include "../cpp/rag_analyzer_c_api.h" +#include "cpp/rag_analyzer_c_api.h" */ import "C" import ( diff --git a/internal/common/format.go b/internal/common/format.go index 8059b685e3..b2bc02ac91 100644 --- a/internal/common/format.go +++ b/internal/common/format.go @@ -85,3 +85,8 @@ func DecodeFromBase64(encoded string) (string, error) { } return string(decoded), nil } + +func IsValidString(v interface{}) bool { + str, ok := v.(string) + return ok && str != "" +} diff --git a/internal/development.md b/internal/development.md index 8cfb38a6ca..732f0db1cc 100644 --- a/internal/development.md +++ b/internal/development.md @@ -31,7 +31,7 @@ They are **not** downloaded by `build.sh` — use the included download script: uv run ragflow_deps/download_deps.py ``` -This also requires `lld` on Linux (for Chromium-built pdfium): +This also requires `lld-20` on Linux (for Chromium-built pdfium): ```bash sudo apt install lld-20 && sudo ln -s /usr/bin/ld.lld-20 /usr/bin/ld.lld @@ -61,17 +61,17 @@ Note: admin server must be started first; otherwise, api server will encounter e ```bash # Start admin server -./bin/ragflow_main --admin +./bin/ragflow_server --admin ``` ```bash # Start RAGFlow server -./bin/ragflow_main --api +./bin/ragflow_server --api ``` ```bash # Start RAGFlow ingestor -./bin/ragflow_main --ingestor +./bin/ragflow_server --ingestor ``` ```bash diff --git a/internal/handler/datasets.go b/internal/handler/dataset.go similarity index 98% rename from internal/handler/datasets.go rename to internal/handler/dataset.go index 64bbb417ba..009989272e 100644 --- a/internal/handler/datasets.go +++ b/internal/handler/dataset.go @@ -40,6 +40,24 @@ type DatasetsHandler struct { searchDatasetService searchDatasetService } +// jsonResponse sends a JSON response with code and message +func jsonResponse(c *gin.Context, code common.ErrorCode, data interface{}, message string) { + c.JSON(http.StatusOK, gin.H{ + "code": code, + "data": data, + "message": message, + }) +} + +// jsonError sends a JSON error response +func jsonError(c *gin.Context, code common.ErrorCode, message string) { + c.JSON(http.StatusOK, gin.H{ + "code": code, + "data": nil, + "message": message, + }) +} + type searchDatasetsService interface { SearchDatasets(req *service.SearchDatasetsRequest, userID string) (*service.SearchDatasetsResponse, error) } diff --git a/internal/handler/datasets_aggregate_tags_test.go b/internal/handler/dataset_aggregate_tags_test.go similarity index 100% rename from internal/handler/datasets_aggregate_tags_test.go rename to internal/handler/dataset_aggregate_tags_test.go diff --git a/internal/handler/datasets_list_tags_test.go b/internal/handler/dataset_list_tags_test.go similarity index 100% rename from internal/handler/datasets_list_tags_test.go rename to internal/handler/dataset_list_tags_test.go diff --git a/internal/handler/datasets_rename_tag_test.go b/internal/handler/dataset_rename_tag_test.go similarity index 100% rename from internal/handler/datasets_rename_tag_test.go rename to internal/handler/dataset_rename_tag_test.go diff --git a/internal/handler/datasets_search_test.go b/internal/handler/dataset_search_test.go similarity index 100% rename from internal/handler/datasets_search_test.go rename to internal/handler/dataset_search_test.go diff --git a/internal/handler/dify_retrieval_handler.go b/internal/handler/dify_retrieval_handler.go index 366e8b9b92..2047c3d1e5 100644 --- a/internal/handler/dify_retrieval_handler.go +++ b/internal/handler/dify_retrieval_handler.go @@ -31,7 +31,7 @@ import ( "ragflow/internal/entity" modelModule "ragflow/internal/entity/models" "ragflow/internal/service" - "ragflow/internal/service/kg" + "ragflow/internal/service/graph" "ragflow/internal/service/nlp" "github.com/gin-gonic/gin" @@ -291,7 +291,7 @@ func (h *DifyRetrievalHandler) Retrieval(c *gin.Context) { if kgErr != nil { common.Warn("KG retrieval: failed to get chat model", zap.String("kbID", req.KnowledgeID), zap.Error(kgErr)) } else if chatModel != nil { - kgPipeline := kg.NewPipeline( + kgPipeline := graph.NewPipeline( h.docEngine, []string{req.KnowledgeID}, []string{kb.TenantID}, diff --git a/internal/handler/document.go b/internal/handler/document.go index b3401676b1..13f04af7c9 100644 --- a/internal/handler/document.go +++ b/internal/handler/document.go @@ -1189,61 +1189,6 @@ func stringValue(value *string) string { return *value } -// GetDocumentsByAuthorID get documents by author ID -// @Summary Get Author Documents -// @Description Get paginated document list by author ID -// @Tags documents -// @Accept json -// @Produce json -// @Param author_id path int true "author ID" -// @Param page query int false "page number" default(1) -// @Param page_size query int false "items per page" default(10) -// @Success 200 {object} map[string]interface{} -// @Router /api/v1/authors/{author_id}/documents [get] -func (h *DocumentHandler) GetDocumentsByAuthorID(c *gin.Context) { - _, errorCode, errorMessage := GetUser(c) - if errorCode != common.CodeSuccess { - jsonError(c, errorCode, errorMessage) - return - } - - authorIDStr := c.Param("author_id") - authorID, err := strconv.Atoi(authorIDStr) - if err != nil { - c.JSON(http.StatusBadRequest, gin.H{ - "error": "invalid author id", - }) - return - } - - page, _ := strconv.Atoi(c.DefaultQuery("page", "1")) - pageSize, _ := strconv.Atoi(c.DefaultQuery("page_size", "10")) - - if page < 1 { - page = 1 - } - if pageSize < 1 || pageSize > 100 { - pageSize = 10 - } - - documents, total, err := h.documentService.GetDocumentsByAuthorID(authorID, page, pageSize) - if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{ - "error": "failed to get documents", - }) - return - } - - c.JSON(http.StatusOK, gin.H{ - "data": gin.H{ - "items": documents, - "total": total, - "page": page, - "page_size": pageSize, - }, - }) -} - // MetadataSummary handles the metadata summary request func (h *DocumentHandler) MetadataSummary(c *gin.Context) { _, errorCode, errorMessage := GetUser(c) diff --git a/internal/handler/kb.go b/internal/handler/kb.go deleted file mode 100644 index e2997ec34b..0000000000 --- a/internal/handler/kb.go +++ /dev/null @@ -1,435 +0,0 @@ -// -// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -package handler - -import ( - "net/http" - "ragflow/internal/common" - "ragflow/internal/service" - "strings" - - "github.com/gin-gonic/gin" -) - -// KnowledgebaseHandler handles knowledge base HTTP requests -type KnowledgebaseHandler struct { - kbService *service.KnowledgebaseService - userService *service.UserService - documentService *service.DocumentService -} - -// NewKnowledgebaseHandler creates a new knowledge base handler -func NewKnowledgebaseHandler(kbService *service.KnowledgebaseService, userService *service.UserService, documentService *service.DocumentService) *KnowledgebaseHandler { - return &KnowledgebaseHandler{ - kbService: kbService, - userService: userService, - documentService: documentService, - } -} - -// jsonResponse sends a JSON response with code and message -func jsonResponse(c *gin.Context, code common.ErrorCode, data interface{}, message string) { - c.JSON(http.StatusOK, gin.H{ - "code": code, - "data": data, - "message": message, - }) -} - -// jsonError sends a JSON error response -func jsonError(c *gin.Context, code common.ErrorCode, message string) { - c.JSON(http.StatusOK, gin.H{ - "code": code, - "data": nil, - "message": message, - }) -} - -// HTTPError represents an HTTP error -type HTTPError struct { - Code common.ErrorCode - Message string -} - -// Error implements the error interface -func (e *HTTPError) Error() string { - return e.Message -} - -var ( - // ErrMissingAuth indicates missing authorization header - ErrMissingAuth = &HTTPError{Code: common.CodeUnauthorized, Message: "Missing Authorization header"} - // ErrInvalidToken indicates invalid access token - ErrInvalidToken = &HTTPError{Code: common.CodeUnauthorized, Message: "Invalid access token"} - ErrForbidden = &HTTPError{Code: common.CodeForbidden, Message: "Forbidden user"} -) - -// @Summary Update Knowledge Base -// @Description Update an existing knowledge base -// @Tags knowledgebase -// @Accept json -// @Produce json -// @Security ApiKeyAuth -// @Param request body service.UpdateKBRequest true "knowledge base update info" -// @Success 200 {object} map[string]interface{} -// @Router /v1/kb/update [post] -func (h *KnowledgebaseHandler) UpdateKB(c *gin.Context) { - user, errorCode, errorMessage := GetUser(c) - if errorCode != common.CodeSuccess { - jsonError(c, errorCode, errorMessage) - return - } - - var req service.UpdateKBRequest - if err := c.ShouldBindJSON(&req); err != nil { - jsonError(c, common.CodeDataError, err.Error()) - return - } - - result, code, err := h.kbService.UpdateKB(&req, user.ID) - if err != nil { - if strings.Contains(err.Error(), "authorization") { - jsonError(c, common.CodeAuthenticationError, err.Error()) - return - } - jsonError(c, code, err.Error()) - return - } - - jsonResponse(c, common.CodeSuccess, result, "success") -} - -// UpdateMetadataSetting handles the update metadata setting request -// @Summary Update Metadata Setting -// @Description Update metadata settings for a knowledge base -// @Tags knowledgebase -// @Accept json -// @Produce json -// @Security ApiKeyAuth -// @Param request body service.UpdateMetadataSettingRequest true "metadata setting info" -// @Success 200 {object} map[string]interface{} -// @Router /v1/kb/update_metadata_setting [post] -func (h *KnowledgebaseHandler) UpdateMetadataSetting(c *gin.Context) { - user, errorCode, errorMessage := GetUser(c) - if errorCode != common.CodeSuccess { - jsonError(c, errorCode, errorMessage) - return - } - - var req service.UpdateMetadataSettingRequest - if err := c.ShouldBindJSON(&req); err != nil { - jsonError(c, common.CodeDataError, err.Error()) - return - } - - if !h.kbService.Accessible(req.KBID, user.ID) { - jsonError(c, common.CodeAuthenticationError, "No authorization.") - return - } - - result, code, err := h.kbService.UpdateMetadataSetting(&req, user.ID) - if err != nil { - if strings.Contains(err.Error(), "authorized") { - jsonError(c, common.CodeAuthenticationError, err.Error()) - return - } - jsonError(c, code, err.Error()) - return - } - - jsonResponse(c, common.CodeSuccess, result, "success") -} - -// GetDetail handles the get knowledge base detail request -// @Summary Get Knowledge Base Detail -// @Description Get detailed information about a knowledge base -// @Tags knowledgebase -// @Accept json -// @Produce json -// @Security ApiKeyAuth -// @Param kb_id query string true "Knowledge Base ID" -// @Success 200 {object} map[string]interface{} -// @Router /v1/kb/detail [get] -func (h *KnowledgebaseHandler) GetDetail(c *gin.Context) { - user, errorCode, errorMessage := GetUser(c) - if errorCode != common.CodeSuccess { - jsonError(c, errorCode, errorMessage) - return - } - - kbID := c.Query("kb_id") - if kbID == "" { - jsonError(c, common.CodeDataError, "kb_id is required") - return - } - - result, code, err := h.kbService.GetDetail(kbID, user.ID) - if err != nil { - if strings.Contains(err.Error(), "authorized") { - jsonError(c, common.CodeOperatingError, err.Error()) - return - } - jsonError(c, code, err.Error()) - return - } - - jsonResponse(c, common.CodeSuccess, result, "success") -} - -// ListTags handles the list tags request for a knowledge base -// @Summary List Tags -// @Description List tags for a knowledge base -// @Tags knowledgebase -// @Accept json -// @Produce json -// @Security ApiKeyAuth -// @Param kb_id path string true "Knowledge Base ID" -// @Success 200 {object} map[string]interface{} -// @Router /v1/kb/{kb_id}/tags [get] -func (h *KnowledgebaseHandler) ListTags(c *gin.Context) { - user, errorCode, errorMessage := GetUser(c) - if errorCode != common.CodeSuccess { - jsonError(c, errorCode, errorMessage) - return - } - - kbID := c.Param("kb_id") - if kbID == "" { - jsonError(c, common.CodeDataError, "kb_id is required") - return - } - - if !h.kbService.Accessible(kbID, user.ID) { - jsonError(c, common.CodeAuthenticationError, "No authorization.") - return - } - - jsonResponse(c, common.CodeSuccess, []string{}, "success") -} - -// ListTagsFromKbs handles the list tags from multiple knowledge bases request -// @Summary List Tags from Knowledge Bases -// @Description List tags from multiple knowledge bases -// @Tags knowledgebase -// @Accept json -// @Produce json -// @Security ApiKeyAuth -// @Param kb_ids query string true "Comma-separated Knowledge Base IDs" -// @Success 200 {object} map[string]interface{} -// @Router /v1/kb/tags [get] -func (h *KnowledgebaseHandler) ListTagsFromKbs(c *gin.Context) { - user, errorCode, errorMessage := GetUser(c) - if errorCode != common.CodeSuccess { - jsonError(c, errorCode, errorMessage) - return - } - - kbIDsStr := c.Query("kb_ids") - if kbIDsStr == "" { - jsonError(c, common.CodeDataError, "kb_ids is required") - return - } - - kbIDs := strings.Split(kbIDsStr, ",") - for _, kbID := range kbIDs { - if !h.kbService.Accessible(kbID, user.ID) { - jsonError(c, common.CodeAuthenticationError, "No authorization.") - return - } - } - - jsonResponse(c, common.CodeSuccess, []string{}, "success") -} - -// RenameTag handles the rename tag request -// @Summary Rename Tag -// @Description Rename a tag in a knowledge base -// @Tags knowledgebase -// @Accept json -// @Produce json -// @Security ApiKeyAuth -// @Param kb_id path string true "Knowledge Base ID" -// @Param request body object{from_tag string, to_tag string} true "tag rename info" -// @Success 200 {object} map[string]interface{} -// @Router /v1/kb/{kb_id}/rename_tag [post] -func (h *KnowledgebaseHandler) RenameTag(c *gin.Context) { - user, errorCode, errorMessage := GetUser(c) - if errorCode != common.CodeSuccess { - jsonError(c, errorCode, errorMessage) - return - } - - kbID := c.Param("kb_id") - if kbID == "" { - jsonError(c, common.CodeDataError, "kb_id is required") - return - } - - if !h.kbService.Accessible(kbID, user.ID) { - jsonError(c, common.CodeAuthenticationError, "No authorization.") - return - } - - var req struct { - FromTag string `json:"from_tag" binding:"required"` - ToTag string `json:"to_tag" binding:"required"` - } - if err := c.ShouldBindJSON(&req); err != nil { - jsonError(c, common.CodeDataError, err.Error()) - return - } - - jsonResponse(c, common.CodeSuccess, true, "success") -} - -// KnowledgeGraph handles the get knowledge graph request -// @Summary Get Knowledge Graph -// @Description Get knowledge graph for a knowledge base -// @Tags knowledgebase -// @Accept json -// @Produce json -// @Security ApiKeyAuth -// @Param kb_id path string true "Knowledge Base ID" -// @Success 200 {object} map[string]interface{} -// @Router /v1/kb/{kb_id}/knowledge_graph [get] -func (h *KnowledgebaseHandler) KnowledgeGraph(c *gin.Context) { - user, errorCode, errorMessage := GetUser(c) - if errorCode != common.CodeSuccess { - jsonError(c, errorCode, errorMessage) - return - } - - kbID := c.Param("kb_id") - if kbID == "" { - jsonError(c, common.CodeDataError, "kb_id is required") - return - } - - if !h.kbService.Accessible(kbID, user.ID) { - jsonError(c, common.CodeAuthenticationError, "No authorization.") - return - } - - result := map[string]interface{}{ - "graph": map[string]interface{}{}, - "mind_map": map[string]interface{}{}, - } - - jsonResponse(c, common.CodeSuccess, result, "success") -} - -// DeleteKnowledgeGraph handles the delete knowledge graph request -// @Summary Delete Knowledge Graph -// @Description Delete knowledge graph for a knowledge base -// @Tags knowledgebase -// @Accept json -// @Produce json -// @Security ApiKeyAuth -// @Param kb_id path string true "Knowledge Base ID" -// @Success 200 {object} map[string]interface{} -// @Router /v1/kb/{kb_id}/knowledge_graph [delete] -func (h *KnowledgebaseHandler) DeleteKnowledgeGraph(c *gin.Context) { - user, errorCode, errorMessage := GetUser(c) - if errorCode != common.CodeSuccess { - jsonError(c, errorCode, errorMessage) - return - } - - kbID := c.Param("kb_id") - if kbID == "" { - jsonError(c, common.CodeDataError, "kb_id is required") - return - } - - if !h.kbService.Accessible(kbID, user.ID) { - jsonError(c, common.CodeAuthenticationError, "No authorization.") - return - } - - jsonResponse(c, common.CodeSuccess, true, "success") -} - -// GetMeta handles the get metadata request -// @Summary Get Metadata -// @Description Get metadata for knowledge bases -// @Tags knowledgebase -// @Accept json -// @Produce json -// @Security ApiKeyAuth -// @Param kb_ids query string true "Comma-separated Knowledge Base IDs" -// @Success 200 {object} map[string]interface{} -// @Router /v1/kb/get_meta [get] -func (h *KnowledgebaseHandler) GetMeta(c *gin.Context) { - user, errorCode, errorMessage := GetUser(c) - if errorCode != common.CodeSuccess { - jsonError(c, errorCode, errorMessage) - return - } - - kbIDsStr := c.Query("kb_ids") - if kbIDsStr == "" { - jsonError(c, common.CodeDataError, "kb_ids is required") - return - } - - kbIDs := strings.Split(kbIDsStr, ",") - for _, kbID := range kbIDs { - if !h.kbService.Accessible(kbID, user.ID) { - jsonError(c, common.CodeAuthenticationError, "No authorization.") - return - } - } - - meta, err := h.documentService.GetMetadataByKBs(kbIDs) - if err != nil { - jsonError(c, common.CodeExceptionError, err.Error()) - return - } - - jsonResponse(c, common.CodeSuccess, meta, "success") -} - -// GetBasicInfo handles the get basic info request -// @Summary Get Basic Info -// @Description Get basic information for a knowledge base -// @Tags knowledgebase -// @Accept json -// @Produce json -// @Security ApiKeyAuth -// @Param kb_id query string true "Knowledge Base ID" -// @Success 200 {object} map[string]interface{} -// @Router /v1/kb/basic_info [get] -func (h *KnowledgebaseHandler) GetBasicInfo(c *gin.Context) { - user, errorCode, errorMessage := GetUser(c) - if errorCode != common.CodeSuccess { - jsonError(c, errorCode, errorMessage) - return - } - - kbID := c.Query("kb_id") - if kbID == "" { - jsonError(c, common.CodeDataError, "kb_id is required") - return - } - - if !h.kbService.Accessible(kbID, user.ID) { - jsonError(c, common.CodeAuthenticationError, "No authorization.") - return - } - - jsonResponse(c, common.CodeSuccess, map[string]interface{}{}, "success") -} diff --git a/internal/handler/tenant.go b/internal/handler/tenant.go index 225ceda768..7cc135ffb0 100644 --- a/internal/handler/tenant.go +++ b/internal/handler/tenant.go @@ -30,63 +30,20 @@ import ( // TenantHandler tenant handler type TenantHandler struct { - tenantService *service.TenantService - userService *service.UserService - kbService *service.KnowledgebaseService + tenantService *service.TenantService + userService *service.UserService + datasetService *service.DatasetService } // NewTenantHandler create tenant handler -func NewTenantHandler(tenantService *service.TenantService, userService *service.UserService, kbService *service.KnowledgebaseService) *TenantHandler { +func NewTenantHandler(tenantService *service.TenantService, userService *service.UserService, datasetService *service.DatasetService) *TenantHandler { return &TenantHandler{ - tenantService: tenantService, - userService: userService, - kbService: kbService, + tenantService: tenantService, + userService: userService, + datasetService: datasetService, } } -func (h *TenantHandler) GetModels(c *gin.Context) { - user, errorCode, errorMessage := GetUser(c) - if errorCode != common.CodeSuccess { - jsonError(c, errorCode, errorMessage) - return - } - - defaultModels, err := h.tenantService.ListTenantDefaultModels(user.ID) - if err != nil { - c.JSON(http.StatusOK, gin.H{ - "code": common.CodeExceptionError, - "message": err.Error(), - "data": false, - }) - return - } - - // Always return success with an array. The previous contract returned - // code=102 "No default models" for an empty list, which (a) tripped the - // global error toast in web/src/utils/next-request.ts:141 and (b) was - // inconsistent with the Python counterpart in - // api/apps/restful_apis/models_api.py:30 which returns - // get_result(data=[]) on the no-rows path. Frontend hooks (e.g. - // useFetchAllAddedModels) coerce `null` to `[]` already, so `[]` is - // strictly safer. - if defaultModels == nil { - defaultModels = []service.ModelItem{} - } - c.JSON(http.StatusOK, gin.H{ - "code": common.CodeSuccess, - "message": "success", - "data": defaultModels, - }) -} - -type SetModelRequest struct { - ModelProvider string `json:"model_provider"` - ModelInstance string `json:"model_instance"` - ModelName string `json:"model_name"` - ModelID string `json:"model_id"` - ModelType string `json:"model_type" binding:"required"` -} - func (h *TenantHandler) SetModels(c *gin.Context) { h.setDefaultModels(c, false) } @@ -95,6 +52,14 @@ func (h *TenantHandler) SetDefaultModels(c *gin.Context) { h.setDefaultModels(c, true) } +type SetModelRequest struct { + ModelProvider string `json:"model_provider"` + ModelInstance string `json:"model_instance"` + ModelName string `json:"model_name"` + ModelID string `json:"model_id"` + ModelType string `json:"model_type" binding:"required"` +} + func (h *TenantHandler) setDefaultModels(c *gin.Context, wrapModels bool) { user, errorCode, errorMessage := GetUser(c) if errorCode != common.CodeSuccess { @@ -344,7 +309,7 @@ func (h *TenantHandler) CreateChunkStore(c *gin.Context) { } // Check authorization - user must have access to this kb - if !h.kbService.Accessible(req.KBID, user.ID) { + if !h.datasetService.Accessible(req.KBID, user.ID) { jsonError(c, common.CodeAuthenticationError, "No authorization.") return } @@ -395,7 +360,7 @@ func (h *TenantHandler) DeleteChunkStore(c *gin.Context) { } // Check authorization - if !h.kbService.Accessible(req.KBID, user.ID) { + if !h.datasetService.Accessible(req.KBID, user.ID) { jsonError(c, common.CodeAuthenticationError, "No authorization.") return } diff --git a/internal/ingestion/compilation/extractor/ner.go b/internal/ingestion/compilation/extractor/ner.go index ac3b49bc16..4618dfd35a 100644 --- a/internal/ingestion/compilation/extractor/ner.go +++ b/internal/ingestion/compilation/extractor/ner.go @@ -23,12 +23,12 @@ package extractor // #cgo CXXFLAGS: -std=c++20 -I${SRCDIR}/../../.. -// #cgo linux LDFLAGS: ${SRCDIR}/../../../cpp/cmake-build-release/librag_tokenizer_c_api.a -lstdc++ -lm -lpthread -lpcre2-8 -// #cgo darwin LDFLAGS: ${SRCDIR}/../../../cpp/cmake-build-release/librag_tokenizer_c_api.a -lstdc++ -lm -lpthread -lpcre2-8 +// #cgo linux LDFLAGS: ${SRCDIR}/../../../binding/cpp/cmake-build-release/librag_tokenizer_c_api.a -lstdc++ -lm -lpthread -lpcre2-8 +// #cgo darwin LDFLAGS: ${SRCDIR}/../../../binding/cpp/cmake-build-release/librag_tokenizer_c_api.a -lstdc++ -lm -lpthread -lpcre2-8 // // #include -// #include "../../../cpp/rag_analyzer_c_api.h" -// #include "../../../cpp/thinc_parser.h" +// #include "../../../binding/cpp/rag_analyzer_c_api.h" +// #include "../../../binding/cpp/thinc_parser.h" import "C" import ( "encoding/json" diff --git a/internal/ingestion/compilation/extractor/ner_extractor.go b/internal/ingestion/compilation/extractor/ner_extractor.go index 3c8045c839..f5452877f9 100644 --- a/internal/ingestion/compilation/extractor/ner_extractor.go +++ b/internal/ingestion/compilation/extractor/ner_extractor.go @@ -18,7 +18,7 @@ // pipeline. It mirrors the Python rag/graphrag/ner package so that both // code paths produce identical output. // -// The C++ ThincNER engine (internal/cpp/) loads spaCy model.ckpt+model.bin +// The C++ ThincNER engine (internal/binding/cpp/) loads spaCy model.ckpt+model.bin // directly for NER inference. Relation extraction is pure Go regex. // // Usage: diff --git a/internal/ingestion/compilation/extractor/parser_go.go b/internal/ingestion/compilation/extractor/parser_go.go index f23af87f1e..9994c2a94f 100644 --- a/internal/ingestion/compilation/extractor/parser_go.go +++ b/internal/ingestion/compilation/extractor/parser_go.go @@ -2,7 +2,7 @@ package extractor /* #include -#include "../../../cpp/thinc_parser.h" +#include "../../../binding/cpp/thinc_parser.h" */ import "C" import ( diff --git a/internal/router/router.go b/internal/router/router.go index dfe79240e8..56b956296f 100644 --- a/internal/router/router.go +++ b/internal/router/router.go @@ -30,7 +30,6 @@ type Router struct { documentHandler *handler.DocumentHandler datasetsHandler *handler.DatasetsHandler systemHandler *handler.SystemHandler - knowledgebaseHandler *handler.KnowledgebaseHandler chunkHandler *handler.ChunkHandler llmHandler *handler.LLMHandler chatHandler *handler.ChatHandler @@ -64,7 +63,6 @@ func NewRouter( documentHandler *handler.DocumentHandler, datasetsHandler *handler.DatasetsHandler, systemHandler *handler.SystemHandler, - knowledgebaseHandler *handler.KnowledgebaseHandler, chunkHandler *handler.ChunkHandler, llmHandler *handler.LLMHandler, chatHandler *handler.ChatHandler, @@ -96,7 +94,6 @@ func NewRouter( documentHandler: documentHandler, datasetsHandler: datasetsHandler, systemHandler: systemHandler, - knowledgebaseHandler: knowledgebaseHandler, chunkHandler: chunkHandler, llmHandler: llmHandler, chatHandler: chatHandler, @@ -188,6 +185,8 @@ func (r *Router) Setup(engine *gin.Engine) { apiNoAuth.POST("/auth/password/forgot/otp", r.userHandler.ForgotSendOTP) apiNoAuth.POST("/auth/password/forgot/otp/verify", r.userHandler.ForgotVerifyOTP) apiNoAuth.POST("/auth/password/reset", r.userHandler.ForgotResetPassword) + + apiNoAuth.GET("/dify/retrieval/health", r.difyRetrievalHandler.HealthCheck) } // Beta-token routes. Mirrors python's @@ -269,7 +268,17 @@ func (r *Router) Setup(engine *gin.Engine) { tenants.DELETE("/:tenant_id/users", r.tenantHandler.RemoveTenantMember) } - v1.GET("/tenant/list", r.tenantHandler.TenantList) + // Tenant routes (per-tenant resources) + tenant := v1.Group("/tenant") + { + tenant.GET("/list", r.tenantHandler.TenantList) + tenant.POST("/chunk_store", r.tenantHandler.CreateChunkStore) // Internal API only for GO + tenant.DELETE("/chunk_store", r.tenantHandler.DeleteChunkStore) // Internal API only for GO + tenant.POST("/metadata_store", r.tenantHandler.CreateMetadataStore) // Internal API only for GO + tenant.DELETE("/metadata_store", r.tenantHandler.DeleteMetadataStore) // Internal API only for GO + tenant.POST("/insert_chunks_from_file", r.tenantHandler.InsertChunksFromFile) // Internal API only for GO + tenant.POST("/insert_metadata_from_file", r.tenantHandler.InsertMetadataFromFile) // Internal API only for GO + } // Document routes documents := v1.Group("/documents") @@ -285,8 +294,6 @@ func (r *Router) Setup(engine *gin.Engine) { } // Chat routes - v1.POST("/chat/mindmap", r.chatHandler.MindMap) - v1.POST("/chat/recommendation", r.chatHandler.Recommendation) chats := v1.Group("/chats") { chats.GET("", r.chatHandler.ListChats) @@ -307,15 +314,11 @@ func (r *Router) Setup(engine *gin.Engine) { chat := v1.Group("/chat") { - // Chat completions route chat.POST("/completions", r.chatSessionHandler.ChatCompletions) + chat.POST("/mindmap", r.chatHandler.MindMap) + chat.POST("/recommendation", r.chatHandler.Recommendation) } - - // OpenAI-compatible chat completions route - openai := v1.Group("/openai") - { - openai.POST("/:chat_id/chat/completions", r.openaiChatHandler.OpenAIChatCompletions) - } + v1.POST("/openai/:chat_id/chat/completions", r.openaiChatHandler.OpenAIChatCompletions) // Dataset routes datasets := v1.Group("/datasets") @@ -391,17 +394,25 @@ func (r *Router) Setup(engine *gin.Engine) { searches.POST("/:search_id/completions", r.searchHandler.Completion) } - file := v1.Group("/files") + files := v1.Group("/files") { - file.POST("", r.fileHandler.UploadFile) - file.GET("", r.fileHandler.ListFiles) - file.DELETE("", r.fileHandler.DeleteFiles) - file.POST("/move", r.fileHandler.MoveFiles) - file.POST("/link-to-datasets", r.fileHandler.LinkToDatasets) - file.GET("/:id/ancestors", r.fileHandler.GetFileAncestors) - file.GET("/:id/parent", r.fileHandler.GetParentFolder) - file.GET("/:id", r.fileHandler.Download) - file.GET("/:id/versions", r.fileCommitHandler.GetFileVersionHistory) + files.POST("", r.fileHandler.UploadFile) + files.GET("", r.fileHandler.ListFiles) + files.DELETE("", r.fileHandler.DeleteFiles) + files.POST("/move", r.fileHandler.MoveFiles) + files.POST("/link-to-datasets", r.fileHandler.LinkToDatasets) + files.GET("/:id/ancestors", r.fileHandler.GetFileAncestors) + files.GET("/:id/parent", r.fileHandler.GetParentFolder) + files.GET("/:id", r.fileHandler.Download) + files.GET("/:id/versions", r.fileCommitHandler.GetFileVersionHistory) + } + + // File routes + file := authorized.Group("/v1/file") + { + file.GET("/root_folder", r.fileHandler.GetRootFolder) + file.GET("/parent_folder", r.fileHandler.GetParentFolder) + file.GET("/all_parent_folder", r.fileHandler.GetAllParentFolders) } // File commit routes — /folders/ takes folder_id directly @@ -444,12 +455,6 @@ func (r *Router) Setup(engine *gin.Engine) { commitDatasets.GET("/changes", r.fileCommitHandler.GetUncommittedChanges) } - // Author routes - authors := v1.Group("/authors") - { - authors.GET("/:author_id/documents", r.documentHandler.GetDocumentsByAuthorID) - } - // Memory routes memory := v1.Group("/memories") { @@ -529,19 +534,10 @@ func (r *Router) Setup(engine *gin.Engine) { model := v1.Group("/models") { // GET /models returns the tenant's added models across - // all instances, matching Python's - // models_api_service.list_tenant_added_models. Front-end - // useFetchAllAddedModels consumes this. Routed to the - // provider handler because that's where the - // modelProviderService is wired. + // all instances. Front-end useFetchAllAddedModels consumes this. model.GET("/", r.providerHandler.ListTenantAddedModels) - - // TODO: list default models? - //model.GET("/", r.tenantHandler.GetModels) model.PATCH("/", r.tenantHandler.SetModels) - // Tenant default-model selection (used by the agent - // page's useFetchDefaultModels hook). Mirrors the - // Python contract at api/apps/restful_apis/models_api.py:84. + // Tenant default-model selection (used by the agent page's useFetchDefaultModels hook) model.GET("/default", r.tenantHandler.GetDefaultModels) model.PATCH("/default", r.tenantHandler.SetDefaultModels) } @@ -568,27 +564,31 @@ func (r *Router) Setup(engine *gin.Engine) { admin := v1.Group("/admin") RegisterAdminRuntimeRoutes(admin, r.adminRuntimeHandler) - connector := v1.Group("/connectors") + connectors := v1.Group("/connectors") { - connector.GET("/", r.connectorHandler.ListConnectors) - connector.POST("/", r.connectorHandler.CreateConnector) - connector.POST("/google/oauth/web/start", r.connectorHandler.StartGoogleWebOAuth) - connector.POST("/google/oauth/web/result", r.connectorHandler.PollGoogleWebOAuthResult) - connector.POST("/box/oauth/web/start", r.connectorHandler.StartBoxWebOAuth) - connector.POST("/box/oauth/web/result", r.connectorHandler.PollBoxWebOAuthResult) - connector.GET("/:connector_id", r.connectorHandler.GetConnector) - connector.PATCH("/:connector_id", r.connectorHandler.UpdateConnector) - connector.GET("/:connector_id/logs", r.connectorHandler.ListLogs) - connector.DELETE("/:connector_id", r.connectorHandler.DeleteConnector) - connector.POST("/:connector_id/rebuild", r.connectorHandler.RebuildConnector) - connector.POST("/:connector_id/test", r.connectorHandler.TestConnector) + connectors.GET("/", r.connectorHandler.ListConnectors) + connectors.POST("/", r.connectorHandler.CreateConnector) + connectors.POST("/google/oauth/web/start", r.connectorHandler.StartGoogleWebOAuth) + connectors.POST("/google/oauth/web/result", r.connectorHandler.PollGoogleWebOAuthResult) + connectors.POST("/box/oauth/web/start", r.connectorHandler.StartBoxWebOAuth) + connectors.POST("/box/oauth/web/result", r.connectorHandler.PollBoxWebOAuthResult) + connectors.GET("/:connector_id", r.connectorHandler.GetConnector) + connectors.PATCH("/:connector_id", r.connectorHandler.UpdateConnector) + connectors.GET("/:connector_id/logs", r.connectorHandler.ListLogs) + connectors.DELETE("/:connector_id", r.connectorHandler.DeleteConnector) + connectors.POST("/:connector_id/rebuild", r.connectorHandler.RebuildConnector) + connectors.POST("/:connector_id/test", r.connectorHandler.TestConnector) } - // MCP server routes. Per-server CRUD ships via separate PRs that - // share the same handler/service: GET list (#15253), GET by id - // (#15254), POST create (#15260, merged), PUT (#15261), DELETE - // (#15262, merged). This PR adds only the non-overlapping - // endpoints: import and test. + // Connector routes + connector := authorized.Group("/v1/connector") + { + connector.GET("/list", r.connectorHandler.ListConnectors) + connector.GET("/:connector_id", r.connectorHandler.GetConnector) + connector.POST("/:connector_id/rebuild", r.connectorHandler.RebuildConnector) + } + + // MCP server routes. mcp := v1.Group("/mcp") { mcp.POST("/servers", r.mcpHandler.CreateMCPServer) @@ -620,14 +620,6 @@ func (r *Router) Setup(engine *gin.Engine) { // Environments system.GET("/environments", r.systemHandler.ListEnvironments) - //log := system.Group("/log") - //{ - // // /api/v1/system/log GET - // log.GET("", r.systemHandler.GetLogLevel) - // // /api/v1/system/log PUT - // log.PUT("", r.systemHandler.SetLogLevel) - //} - tokens := system.Group("/tokens") { // list tokens /api/v1/system/tokens GET @@ -648,100 +640,51 @@ func (r *Router) Setup(engine *gin.Engine) { keys.DELETE("/:key", r.systemHandler.DeleteKey) } } - } - // Knowledge base routes - kb := v1.Group("/kb") - { - kb.POST("/update", r.knowledgebaseHandler.UpdateKB) - kb.POST("/update_metadata_setting", r.knowledgebaseHandler.UpdateMetadataSetting) - kb.GET("/detail", r.knowledgebaseHandler.GetDetail) - kb.GET("/tags", r.knowledgebaseHandler.ListTagsFromKbs) - kb.GET("/get_meta", r.knowledgebaseHandler.GetMeta) - kb.GET("/basic_info", r.knowledgebaseHandler.GetBasicInfo) - - // KB ID specific routes - kbByID := kb.Group("/:kb_id") + // Document routes + doc := v1.Group("/document") { - kbByID.GET("/tags", r.knowledgebaseHandler.ListTags) - kbByID.POST("/rename_tag", r.knowledgebaseHandler.RenameTag) - kbByID.GET("/knowledge_graph", r.knowledgebaseHandler.KnowledgeGraph) - kbByID.DELETE("/knowledge_graph", r.knowledgebaseHandler.DeleteKnowledgeGraph) + doc.POST("/list", r.documentHandler.ListDocuments) + doc.POST("/metadata/summary", r.documentHandler.MetadataSummary) + doc.POST("/set_meta", r.documentHandler.SetMeta) + doc.POST("/delete_meta", r.documentHandler.DeleteMeta) // Internal API only for GO + } + + // Chunk routes + chunk := v1.Group("/chunk") + { + chunk.POST("/list", r.chunkHandler.List) + chunk.POST("/update", r.chunkHandler.UpdateChunk) // Internal API only for GO + } + + // Chat Channel + chanChannel := v1.Group("/chat-channels") + { + chanChannel.POST("", r.chatChannelHandler.CreateChatChannel) + chanChannel.GET("", r.chatChannelHandler.ListChatChannel) + chanChannel.GET("/:channel_id", r.chatChannelHandler.GetChatChannel) + chanChannel.PATCH("/:channel_id", r.chatChannelHandler.UpdateChatChannel) + chanChannel.DELETE("/:channel_id", r.chatChannelHandler.DeleteChatChannel) + } + + // Langfuse tracing keys + langfuse := v1.Group("/langfuse") + { + langfuse.POST("/api-key", r.langfuseHandler.SetAPIKey) + langfuse.PUT("/api-key", r.langfuseHandler.SetAPIKey) + langfuse.GET("/api-key", r.langfuseHandler.GetAPIKey) + langfuse.DELETE("/api-key", r.langfuseHandler.DeleteAPIKey) + } + + // Dify retrieval routes + dify := v1.Group("/dify") + { + dify.POST("/retrieval", r.difyRetrievalHandler.Retrieval) + dify.GET("/retrieval", r.difyRetrievalHandler.Retrieval) } } - - // Tenant routes (per-tenant resources) - tenant := v1.Group("/tenant") - { - tenant.POST("/chunk_store", r.tenantHandler.CreateChunkStore) // Internal API only for GO - tenant.DELETE("/chunk_store", r.tenantHandler.DeleteChunkStore) // Internal API only for GO - tenant.POST("/metadata_store", r.tenantHandler.CreateMetadataStore) // Internal API only for GO - tenant.DELETE("/metadata_store", r.tenantHandler.DeleteMetadataStore) // Internal API only for GO - tenant.POST("/insert_chunks_from_file", r.tenantHandler.InsertChunksFromFile) // Internal API only for GO - tenant.POST("/insert_metadata_from_file", r.tenantHandler.InsertMetadataFromFile) // Internal API only for GO - } - - // Document routes - doc := v1.Group("/document") - { - doc.POST("/list", r.documentHandler.ListDocuments) - doc.POST("/metadata/summary", r.documentHandler.MetadataSummary) - doc.POST("/set_meta", r.documentHandler.SetMeta) - doc.POST("/delete_meta", r.documentHandler.DeleteMeta) // Internal API only for GO - } - - // Chunk routes - chunk := v1.Group("/chunk") - { - chunk.POST("/list", r.chunkHandler.List) - chunk.POST("/update", r.chunkHandler.UpdateChunk) // Internal API only for GO - } - - // Chat Channel - chanChannel := v1.Group("/chat-channels") - { - chanChannel.POST("", r.chatChannelHandler.CreateChatChannel) - chanChannel.GET("", r.chatChannelHandler.ListChatChannel) - chanChannel.GET("/:channel_id", r.chatChannelHandler.GetChatChannel) - chanChannel.PATCH("/:channel_id", r.chatChannelHandler.UpdateChatChannel) - chanChannel.DELETE("/:channel_id", r.chatChannelHandler.DeleteChatChannel) - } - - // Langfuse tracing keys - langfuse := v1.Group("/langfuse") - { - langfuse.POST("/api-key", r.langfuseHandler.SetAPIKey) - langfuse.PUT("/api-key", r.langfuseHandler.SetAPIKey) - langfuse.GET("/api-key", r.langfuseHandler.GetAPIKey) - langfuse.DELETE("/api-key", r.langfuseHandler.DeleteAPIKey) - } - - // Connector routes - connector := authorized.Group("/v1/connector") - { - connector.GET("/list", r.connectorHandler.ListConnectors) - connector.GET("/:connector_id", r.connectorHandler.GetConnector) - connector.POST("/:connector_id/rebuild", r.connectorHandler.RebuildConnector) - } - - // File routes - file := authorized.Group("/v1/file") - { - file.GET("/root_folder", r.fileHandler.GetRootFolder) - file.GET("/parent_folder", r.fileHandler.GetParentFolder) - file.GET("/all_parent_folder", r.fileHandler.GetAllParentFolders) - } - } - // Dify retrieval routes - dify := authorized.Group("/api/v1/dify") - { - dify.POST("/retrieval", r.difyRetrievalHandler.Retrieval) - dify.GET("/retrieval", r.difyRetrievalHandler.Retrieval) - } - apiNoAuth.GET("/dify/retrieval/health", r.difyRetrievalHandler.HealthCheck) - // Handle undefined routes engine.NoRoute(handler.HandleNoRoute) } diff --git a/internal/service/chat_pipeline.go b/internal/service/chat_pipeline.go index 0be17e0348..d535e2696a 100644 --- a/internal/service/chat_pipeline.go +++ b/internal/service/chat_pipeline.go @@ -26,7 +26,7 @@ import ( "ragflow/internal/engine" "ragflow/internal/entity" modelModule "ragflow/internal/entity/models" - "ragflow/internal/service/kg" + "ragflow/internal/service/graph" "ragflow/internal/service/nlp" "regexp" "sort" @@ -48,7 +48,7 @@ import ( type ChatPipelineService struct { ModelProviderSvc *ModelProviderService MetadataSvc *MetadataService - KbService *KnowledgebaseService + datasetService *DatasetService } // NewChatPipelineService creates a new ChatPipelineService with all required dependencies. @@ -56,7 +56,7 @@ func NewChatPipelineService() *ChatPipelineService { return &ChatPipelineService{ ModelProviderSvc: NewModelProviderService(), MetadataSvc: NewMetadataService(), - KbService: NewKnowledgebaseService(), + datasetService: NewDatasetService(), } } @@ -339,7 +339,7 @@ func (s *ChatPipelineService) AsyncChat( // === Phase 6: SQL Retrieval === // Retrieve field_map for SQL retrieval (preferred over vector search) promptConfig := chat.PromptConfig - fieldMap, fmErr := s.KbService.GetFieldMap(kbIDStrings(kbs)) + fieldMap, fmErr := s.datasetService.GetFieldMap(kbIDStrings(kbs)) if fmErr != nil { common.Warn("get_field_map failed; proceeding without field_map", zap.Error(fmErr)) fieldMap = nil @@ -788,7 +788,7 @@ func (s *ChatPipelineService) AsyncChat( if useKG, _ := chat.PromptConfig["use_kg"].(bool); useKG && chatModel != nil && len(kbs) > 0 { kgIDs := kbIDStrings(kbs) if len(kgIDs) > 0 { - kgPipeline := kg.NewPipeline(engine.Get(), kgIDs, kbTenantIDStrings(kbs), searchQuestion) + kgPipeline := graph.NewPipeline(engine.Get(), kgIDs, kbTenantIDStrings(kbs), searchQuestion) kgPipeline.SetChatModel(chatModel) if embModel != nil { kgPipeline.SetEmbModel(embModel) @@ -2367,7 +2367,7 @@ func (s *ChatPipelineService) kbPrompt(kbinfos map[string]interface{}, maxTokens usedTokenCount := 0 chunksNum := 0 for _, cc := range contents { - usedTokenCount += kg.NumTokensFromString(cc.content) + usedTokenCount += graph.NumTokensFromString(cc.content) chunksNum++ if float64(maxTokens)*0.97 < float64(usedTokenCount) { common.Warn("Not all the retrieval into prompt", @@ -2462,28 +2462,28 @@ func (s *ChatPipelineService) messageFitIn(messages []map[string]interface{}, ma } // Step 3: trim content to fit. - ll := kg.NumTokensFromString(s.stringContent(result[0])) - ll2 := kg.NumTokensFromString(s.stringContent(result[len(result)-1])) + ll := graph.NumTokensFromString(s.stringContent(result[0])) + ll2 := graph.NumTokensFromString(s.stringContent(result[len(result)-1])) total := ll + ll2 if total <= 0 { return 0, result } if len(result) == 1 { - result[0]["content"] = kg.TrimContentToTokenLimit(s.stringContent(result[0]), maxTokens) + result[0]["content"] = graph.TrimContentToTokenLimit(s.stringContent(result[0]), maxTokens) return s.countAllTokens(result), result } if float64(ll)/float64(total) > 0.8 { preservedLast := min(ll2, maxTokens) - result[len(result)-1]["content"] = kg.TrimContentToTokenLimit(s.stringContent(result[len(result)-1]), preservedLast) + result[len(result)-1]["content"] = graph.TrimContentToTokenLimit(s.stringContent(result[len(result)-1]), preservedLast) remaining := max(0, maxTokens-preservedLast) - result[0]["content"] = kg.TrimContentToTokenLimit(s.stringContent(result[0]), remaining) + result[0]["content"] = graph.TrimContentToTokenLimit(s.stringContent(result[0]), remaining) } else { preservedSystem := min(ll, maxTokens) - result[0]["content"] = kg.TrimContentToTokenLimit(s.stringContent(result[0]), preservedSystem) + result[0]["content"] = graph.TrimContentToTokenLimit(s.stringContent(result[0]), preservedSystem) remaining := max(0, maxTokens-preservedSystem) - result[len(result)-1]["content"] = kg.TrimContentToTokenLimit(s.stringContent(result[len(result)-1]), remaining) + result[len(result)-1]["content"] = graph.TrimContentToTokenLimit(s.stringContent(result[len(result)-1]), remaining) } return s.countAllTokens(result), result @@ -2493,7 +2493,7 @@ func (s *ChatPipelineService) messageFitIn(messages []map[string]interface{}, ma func (s *ChatPipelineService) countAllTokens(messages []map[string]interface{}) int { total := 0 for _, m := range messages { - total += kg.NumTokensFromString(s.stringContent(m)) + total += graph.NumTokensFromString(s.stringContent(m)) } return total } @@ -2816,7 +2816,7 @@ func (s *ChatPipelineService) decorateAnswer( // token-count / token-speed lines that the existing OpenAI endpoint // already exposes. Total wall-clock is rounded to ms. totalMs := timer.Total().Seconds() * 1000 - tkNum := kg.NumTokensFromString(think + ans) + tkNum := graph.NumTokensFromString(think + ans) prompt += fmt.Sprintf("\n\n### Query:\n%s", strings.Join(questions, " ")) diff --git a/internal/service/dataset.go b/internal/service/dataset.go index aade8907de..facb928452 100644 --- a/internal/service/dataset.go +++ b/internal/service/dataset.go @@ -141,15 +141,15 @@ func NewDatasetService() *DatasetService { } } -func (s *DatasetService) UpdateDocumentMetadataConfig(userID, datasetID, documentID string, req map[string]interface{}) (*entity.Document, common.ErrorCode, error) { - if _, err := s.kbDAO.GetByIDAndTenantID(datasetID, userID); err != nil { +func (d *DatasetService) UpdateDocumentMetadataConfig(userID, datasetID, documentID string, req map[string]interface{}) (*entity.Document, common.ErrorCode, error) { + if _, err := d.kbDAO.GetByIDAndTenantID(datasetID, userID); err != nil { if dao.IsNotFoundErr(err) { return nil, common.CodeDataError, errors.New("You don't own the dataset.") } return nil, common.CodeServerError, errors.New("Database operation failed") } - doc, err := s.documentDAO.GetByDocumentIDAndDatasetID(documentID, datasetID) + doc, err := d.documentDAO.GetByDocumentIDAndDatasetID(documentID, datasetID) if err != nil { if dao.IsNotFoundErr(err) { return nil, common.CodeDataError, fmt.Errorf("Document %s not found in dataset %s", documentID, datasetID) @@ -168,11 +168,11 @@ func (s *DatasetService) UpdateDocumentMetadataConfig(userID, datasetID, documen } parserConfig["metadata"] = metadata - if err := s.documentDAO.UpdateByID(doc.ID, map[string]interface{}{"parser_config": parserConfig}); err != nil { + if err := d.documentDAO.UpdateByID(doc.ID, map[string]interface{}{"parser_config": parserConfig}); err != nil { return nil, common.CodeExceptionError, err } - updatedDoc, err := s.documentDAO.GetByID(doc.ID) + updatedDoc, err := d.documentDAO.GetByID(doc.ID) if err != nil { if dao.IsNotFoundErr(err) { return nil, common.CodeDataError, errors.New("Document not found!") @@ -194,7 +194,7 @@ func checkType(indexType string) bool { return haveType } -func (s *DatasetService) newRaptorOrGraphRagTask(sampleDoc *entity.Document, taskType string, taskDocID string, queueDocID string, docIDs []string) (*entity.Task, map[string]interface{}, error) { +func (d *DatasetService) newRaptorOrGraphRagTask(sampleDoc *entity.Document, taskType string, taskDocID string, queueDocID string, docIDs []string) (*entity.Task, map[string]interface{}, error) { if docIDs == nil || len(docIDs) == 0 { docIDs = make([]string, 0) } @@ -202,7 +202,7 @@ func (s *DatasetService) newRaptorOrGraphRagTask(sampleDoc *entity.Document, tas return nil, nil, errors.New("type should be graphrag, raptor or mindmap") } - chunkingConfig, err := s.documentDAO.GetChunkingConfig(sampleDoc.ID) + chunkingConfig, err := d.documentDAO.GetChunkingConfig(sampleDoc.ID) if err != nil { return nil, nil, err } @@ -431,7 +431,7 @@ func clearGraphPhaseMarkers(redisClient *redisengine.RedisClient, datasetID stri } // RunIndex Run an indexing task (graph/raptor/mindmap) for a dataset. -func (s *DatasetService) RunIndex(userID, datasetID, indexType string) (map[string]interface{}, common.ErrorCode, error) { +func (d *DatasetService) RunIndex(userID, datasetID, indexType string) (map[string]interface{}, common.ErrorCode, error) { if !checkType(indexType) { return nil, common.CodeDataError, fmt.Errorf("Invalid index type '%s'. Must be one of %v", indexType, validIndexTypes) } @@ -439,11 +439,11 @@ func (s *DatasetService) RunIndex(userID, datasetID, indexType string) (map[stri if datasetID == "" { return nil, common.CodeDataError, errors.New(`Lack of "Dataset ID"`) } - if !s.kbDAO.Accessible(datasetID, userID) { + if !d.kbDAO.Accessible(datasetID, userID) { return nil, common.CodeDataError, errors.New("No authorization.") } - kb, err := s.kbDAO.GetByID(datasetID) + kb, err := d.kbDAO.GetByID(datasetID) if err != nil { if dao.IsNotFoundErr(err) { return nil, common.CodeDataError, errors.New("Invalid Dataset ID") @@ -454,7 +454,7 @@ func (s *DatasetService) RunIndex(userID, datasetID, indexType string) (map[stri taskType := indexTypeToTaskType[indexType] displayName := indexTypeToDisplayName[indexType] - documents, code, err := s.getDocumentsByDatasetForIndex(datasetID) + documents, code, err := d.getDocumentsByDatasetForIndex(datasetID) if err != nil { return nil, code, err } @@ -467,7 +467,7 @@ func (s *DatasetService) RunIndex(userID, datasetID, indexType string) (map[stri documentIDs[i] = doc.ID } - task, queueMessage, err := s.newRaptorOrGraphRagTask(sampleDocument, taskType, sampleDocument.ID, graphRaptorQueueDocID, documentIDs) + task, queueMessage, err := d.newRaptorOrGraphRagTask(sampleDocument, taskType, sampleDocument.ID, graphRaptorQueueDocID, documentIDs) if err != nil { common.Warn("Failed to build dataset index task", zap.String("dataset_id", datasetID), zap.String("task_type", taskType), zap.Error(err)) return nil, common.CodeDataError, errors.New("Internal server error") @@ -523,8 +523,8 @@ func (s *DatasetService) RunIndex(userID, datasetID, indexType string) (map[stri return map[string]interface{}{"task_id": task.ID}, common.CodeSuccess, nil } -func (s *DatasetService) getDocumentsByDatasetForIndex(datasetID string) ([]*entity.Document, common.ErrorCode, error) { - documents, _, err := s.documentDAO.GetByKBID(datasetID) +func (d *DatasetService) getDocumentsByDatasetForIndex(datasetID string) ([]*entity.Document, common.ErrorCode, error) { + documents, _, err := d.documentDAO.GetByKBID(datasetID) if err != nil { common.Warn("Failed to load dataset documents for index", zap.String("dataset_id", datasetID), zap.Error(err)) return nil, common.CodeDataError, errors.New("Internal server error") @@ -540,7 +540,7 @@ type TraceIndexRequest struct { } // TraceIndex Trace an indexing task (graph/raptor/mindmap) for a dataset. -func (s *DatasetService) TraceIndex(datasetID, userID, indexType string) (*entity.Task, common.ErrorCode, error) { +func (d *DatasetService) TraceIndex(datasetID, userID, indexType string) (*entity.Task, common.ErrorCode, error) { if !checkType(indexType) { return nil, common.CodeDataError, fmt.Errorf("Invalid index type '%s'. Must be one of %v", indexType, validIndexTypes) } @@ -548,11 +548,11 @@ func (s *DatasetService) TraceIndex(datasetID, userID, indexType string) (*entit if datasetID == "" { return nil, common.CodeDataError, errors.New(`Lack of "Dataset ID"`) } - if !s.kbDAO.Accessible(datasetID, userID) { + if !d.kbDAO.Accessible(datasetID, userID) { return nil, common.CodeDataError, errors.New("No authorization.") } - kb, err := s.kbDAO.GetByID(datasetID) + kb, err := d.kbDAO.GetByID(datasetID) if err != nil { if dao.IsNotFoundErr(err) { return nil, common.CodeDataError, errors.New("Invalid Dataset ID") @@ -564,7 +564,7 @@ func (s *DatasetService) TraceIndex(datasetID, userID, indexType string) (*entit var task *entity.Task if taskID != "" { - task, err = s.taskDAO.GetByID(taskID) + task, err = d.taskDAO.GetByID(taskID) if err != nil { if dao.IsNotFoundErr(err) { return nil, common.CodeSuccess, nil @@ -630,15 +630,15 @@ type datasetParsePageRange struct { } // RunEmbedding runs embedding for all documents in a dataset. -func (s *DatasetService) RunEmbedding(userID, datasetID string) (map[string]interface{}, common.ErrorCode, error) { +func (d *DatasetService) RunEmbedding(userID, datasetID string) (map[string]interface{}, common.ErrorCode, error) { if datasetID == "" { return nil, common.CodeDataError, errors.New(`Lack of "Dataset ID"`) } - if !s.kbDAO.Accessible(datasetID, userID) { + if !d.kbDAO.Accessible(datasetID, userID) { return nil, common.CodeDataError, errors.New("No authorization.") } - kb, err := s.kbDAO.GetByID(datasetID) + kb, err := d.kbDAO.GetByID(datasetID) if err != nil { if dao.IsNotFoundErr(err) { return nil, common.CodeDataError, errors.New("Invalid Dataset ID") @@ -646,7 +646,7 @@ func (s *DatasetService) RunEmbedding(userID, datasetID string) (map[string]inte return nil, common.CodeServerError, errors.New("Internal server error") } - documents, _, err := s.documentDAO.GetByKBID(datasetID) + documents, _, err := d.documentDAO.GetByKBID(datasetID) if err != nil { return nil, common.CodeServerError, errors.New("Internal server error") } @@ -660,7 +660,7 @@ func (s *DatasetService) RunEmbedding(userID, datasetID string) (map[string]inte if doc == nil { continue } - if err := s.runEmbeddingDocument(kb, doc, tableDoneCountByKB); err != nil { + if err := d.runEmbeddingDocument(kb, doc, tableDoneCountByKB); err != nil { common.Warn("Failed to schedule dataset embedding document", zap.String("datasetID", datasetID), zap.String("docID", doc.ID), @@ -675,22 +675,22 @@ func (s *DatasetService) RunEmbedding(userID, datasetID string) (map[string]inte }, common.CodeSuccess, nil } -func (s *DatasetService) runEmbeddingDocument(kb *entity.Knowledgebase, doc *entity.Document, tableDoneCountByKB map[string]int64) error { +func (d *DatasetService) runEmbeddingDocument(kb *entity.Knowledgebase, doc *entity.Document, tableDoneCountByKB map[string]int64) error { if doc.PipelineID != nil && strings.TrimSpace(*doc.PipelineID) != "" { - return s.queueDatasetDataflowTask(kb, doc, strings.TrimSpace(*doc.PipelineID), 0) + return d.queueDatasetDataflowTask(kb, doc, strings.TrimSpace(*doc.PipelineID), 0) } if doc.ParserID == string(entity.ParserTypeTable) { doneCount, ok := tableDoneCountByKB[doc.KbID] if !ok { - count, err := s.countDoneDocuments(doc.KbID) + count, err := d.countDoneDocuments(doc.KbID) if err != nil { return err } doneCount = count tableDoneCountByKB[doc.KbID] = doneCount if doneCount <= 0 { - if err := s.kbDAO.DeleteFieldMap(doc.KbID); err != nil && !dao.IsNotFoundErr(err) { + if err := d.kbDAO.DeleteFieldMap(doc.KbID); err != nil && !dao.IsNotFoundErr(err) { return err } } @@ -698,12 +698,12 @@ func (s *DatasetService) runEmbeddingDocument(kb *entity.Knowledgebase, doc *ent } indexName := fmt.Sprintf("ragflow_%s", kb.TenantID) - if s.docEngine != nil { - if _, err := s.docEngine.DeleteChunks(context.Background(), map[string]interface{}{"doc_id": doc.ID}, indexName, doc.KbID); err != nil { + if d.docEngine != nil { + if _, err := d.docEngine.DeleteChunks(context.Background(), map[string]interface{}{"doc_id": doc.ID}, indexName, doc.KbID); err != nil { return err } } - if _, err := s.taskDAO.DeleteByDocIDs([]string{doc.ID}); err != nil { + if _, err := d.taskDAO.DeleteByDocIDs([]string{doc.ID}); err != nil { return err } @@ -711,11 +711,11 @@ func (s *DatasetService) runEmbeddingDocument(kb *entity.Knowledgebase, doc *ent if err != nil { return err } - if err := s.queueDatasetParseTasks(doc, bucket, objectName, 0); err != nil { + if err := d.queueDatasetParseTasks(doc, bucket, objectName, 0); err != nil { return err } - if err := s.beginDatasetParseDocument(doc.ID); err != nil { - if _, delErr := s.taskDAO.DeleteByDocIDs([]string{doc.ID}); delErr != nil { + if err := d.beginDatasetParseDocument(doc.ID); err != nil { + if _, delErr := d.taskDAO.DeleteByDocIDs([]string{doc.ID}); delErr != nil { common.Warn("Failed to clean parse tasks after document state update failure", zap.String("docID", doc.ID), zap.Error(delErr)) @@ -725,11 +725,11 @@ func (s *DatasetService) runEmbeddingDocument(kb *entity.Knowledgebase, doc *ent return nil } -func (s *DatasetService) queueDatasetDataflowTask(kb *entity.Knowledgebase, doc *entity.Document, flowID string, priority int64) error { - if _, err := s.taskDAO.DeleteByDocIDs([]string{doc.ID}); err != nil { +func (d *DatasetService) queueDatasetDataflowTask(kb *entity.Knowledgebase, doc *entity.Document, flowID string, priority int64) error { + if _, err := d.taskDAO.DeleteByDocIDs([]string{doc.ID}); err != nil { return err } - if err := s.beginDatasetParseDocument(doc.ID); err != nil { + if err := d.beginDatasetParseDocument(doc.ID); err != nil { return err } @@ -744,7 +744,7 @@ func (s *DatasetService) queueDatasetDataflowTask(kb *entity.Knowledgebase, doc BeginAt: &now, Progress: 0, } - if err := s.taskDAO.CreateMany([]*entity.Task{task}); err != nil { + if err := d.taskDAO.CreateMany([]*entity.Task{task}); err != nil { return err } @@ -760,7 +760,7 @@ func (s *DatasetService) queueDatasetDataflowTask(kb *entity.Knowledgebase, doc return nil } -func (s *DatasetService) countDoneDocuments(datasetID string) (int64, error) { +func (d *DatasetService) countDoneDocuments(datasetID string) (int64, error) { var count int64 err := dao.GetDB().Model(&entity.Document{}). Where("kb_id = ? AND run = ?", datasetID, string(entity.TaskStatusDone)). @@ -768,15 +768,15 @@ func (s *DatasetService) countDoneDocuments(datasetID string) (int64, error) { return count, err } -func (s *DatasetService) queueDatasetParseTasks(doc *entity.Document, bucket, objectName string, priority int64) error { - tasks, err := s.buildDatasetParseTasks(doc, bucket, objectName, priority) +func (d *DatasetService) queueDatasetParseTasks(doc *entity.Document, bucket, objectName string, priority int64) error { + tasks, err := d.buildDatasetParseTasks(doc, bucket, objectName, priority) if err != nil { return err } if len(tasks) == 0 { return nil } - if err := s.taskDAO.CreateMany(tasks); err != nil { + if err := d.taskDAO.CreateMany(tasks); err != nil { return err } queueName := datasetParseQueueName(doc, priority) @@ -785,7 +785,7 @@ func (s *DatasetService) queueDatasetParseTasks(doc *entity.Document, bucket, ob continue } if redisClient := redisengine.Get(); redisClient == nil || !redisClient.QueueProduct(queueName, datasetParseTaskMessage(task)) { - if _, delErr := s.taskDAO.DeleteByDocIDs([]string{doc.ID}); delErr != nil { + if _, delErr := d.taskDAO.DeleteByDocIDs([]string{doc.ID}); delErr != nil { common.Warn("Failed to clean parse tasks after Redis enqueue failure", zap.String("docID", doc.ID), zap.Error(delErr)) @@ -796,7 +796,7 @@ func (s *DatasetService) queueDatasetParseTasks(doc *entity.Document, bucket, ob return nil } -func (s *DatasetService) buildDatasetParseTasks(doc *entity.Document, bucket, objectName string, priority int64) ([]*entity.Task, error) { +func (d *DatasetService) buildDatasetParseTasks(doc *entity.Document, bucket, objectName string, priority int64) ([]*entity.Task, error) { ranges, err := datasetParseTaskRanges(doc, bucket, objectName) if err != nil { return nil, err @@ -824,7 +824,7 @@ func (s *DatasetService) buildDatasetParseTasks(doc *entity.Document, bucket, ob return tasks, nil } -func (s *DatasetService) beginDatasetParseDocument(docID string) error { +func (d *DatasetService) beginDatasetParseDocument(docID string) error { now := time.Now() return dao.GetDB().Model(&entity.Document{}).Where("id = ?", docID).Updates(map[string]interface{}{ "progress_msg": "Task is queued...", @@ -837,15 +837,15 @@ func (s *DatasetService) beginDatasetParseDocument(docID string) error { } // CheckEmbedding checks whether a new embedding model is compatible with stored vectors. -func (s *DatasetService) CheckEmbedding(userID, datasetID string, req *CheckEmbeddingRequest) (*EmbeddingCheckResponse, common.ErrorCode, error) { +func (d *DatasetService) CheckEmbedding(userID, datasetID string, req *CheckEmbeddingRequest) (*EmbeddingCheckResponse, common.ErrorCode, error) { if datasetID == "" { return nil, common.CodeDataError, errors.New(`Lack of "Dataset ID"`) } - if !s.kbDAO.Accessible(datasetID, userID) { + if !d.kbDAO.Accessible(datasetID, userID) { return nil, common.CodeDataError, errors.New("No authorization.") } - kb, err := s.kbDAO.GetByID(datasetID) + kb, err := d.kbDAO.GetByID(datasetID) if err != nil { if dao.IsNotFoundErr(err) { return nil, common.CodeDataError, errors.New("Invalid Dataset ID") @@ -857,10 +857,10 @@ func (s *DatasetService) CheckEmbedding(userID, datasetID string, req *CheckEmbe return nil, common.CodeDataError, errors.New("`embd_id` is required.") } embeddingID := strings.TrimSpace(req.EmbeddingID) - if ok, message := s.verifyEmbeddingAvailability(embeddingID, userID); !ok { + if ok, message := d.verifyEmbeddingAvailability(embeddingID, userID); !ok { return nil, common.CodeDataError, errors.New(message) } - if s.docEngine == nil { + if d.docEngine == nil { return nil, common.CodeServerError, errors.New("doc engine not initialized") } @@ -878,7 +878,7 @@ func (s *DatasetService) CheckEmbedding(userID, datasetID string, req *CheckEmbe checkNum = defaultEmbeddingCheckNum } - samples, err := s.sampleRandomChunksWithVectors(context.Background(), kb.TenantID, datasetID, checkNum) + samples, err := d.sampleRandomChunksWithVectors(context.Background(), kb.TenantID, datasetID, checkNum) if err != nil { return nil, common.CodeServerError, err } @@ -949,9 +949,9 @@ func (s *DatasetService) CheckEmbedding(userID, datasetID string, req *CheckEmbe return response, common.CodeNotEffective, errors.New("Embedding model switch failed: the average similarity between old and new vectors is below 0.9, indicating incompatible vector spaces.") } -func (s *DatasetService) sampleRandomChunksWithVectors(ctx context.Context, tenantID, datasetID string, n int) ([]embeddingCheckSample, error) { +func (d *DatasetService) sampleRandomChunksWithVectors(ctx context.Context, tenantID, datasetID string, n int) ([]embeddingCheckSample, error) { indexName := fmt.Sprintf("ragflow_%s", tenantID) - totalResult, err := s.docEngine.Search(ctx, &enginetypes.SearchRequest{ + totalResult, err := d.docEngine.Search(ctx, &enginetypes.SearchRequest{ IndexNames: []string{indexName}, KbIDs: []string{datasetID}, Offset: 0, @@ -1000,7 +1000,7 @@ func (s *DatasetService) sampleRandomChunksWithVectors(ctx context.Context, tena // (embeddingCheckSample is a small struct). samples := make([]embeddingCheckSample, 0, n) for _, offset := range offsets { - searchResult, err := s.docEngine.Search(ctx, &enginetypes.SearchRequest{ + searchResult, err := d.docEngine.Search(ctx, &enginetypes.SearchRequest{ IndexNames: []string{indexName}, KbIDs: []string{datasetID}, Offset: offset, @@ -1021,7 +1021,7 @@ func (s *DatasetService) sampleRandomChunksWithVectors(ctx context.Context, tena if chunkID == "" { continue } - fullChunk, err := s.docEngine.GetChunk(ctx, indexName, chunkID, []string{datasetID}) + fullChunk, err := d.docEngine.GetChunk(ctx, indexName, chunkID, []string{datasetID}) if err != nil { return nil, err } @@ -1613,7 +1613,7 @@ func datasetCountWorksheetRows(file *zip.File) (int, error) { return rows, nil } -func (s *DatasetService) DeleteIndex(userID, datasetID, indexType string, wipe bool) (common.ErrorCode, error) { +func (d *DatasetService) DeleteIndex(userID, datasetID, indexType string, wipe bool) (common.ErrorCode, error) { if !checkType(indexType) { return common.CodeArgumentError, fmt.Errorf("Invalid index type '%s'", indexType) } @@ -1622,11 +1622,11 @@ func (s *DatasetService) DeleteIndex(userID, datasetID, indexType string, wipe b return common.CodeDataError, errors.New(`Lack of "Dataset ID"`) } - if !s.kbDAO.Accessible(datasetID, userID) { + if !d.kbDAO.Accessible(datasetID, userID) { return common.CodeDataError, errors.New("No authorization.") } - kb, err := s.kbDAO.GetByID(datasetID) + kb, err := d.kbDAO.GetByID(datasetID) if err != nil { if dao.IsNotFoundErr(err) { return common.CodeDataError, errors.New("Invalid Dataset ID") @@ -1652,11 +1652,11 @@ func (s *DatasetService) DeleteIndex(userID, datasetID, indexType string, wipe b } if wipe && indexType == "graph" { - if s.docEngine == nil { + if d.docEngine == nil { return common.CodeServerError, errors.New("Document engine is not initialized") } indexName := fmt.Sprintf("ragflow_%s", kb.TenantID) - _, err = s.docEngine.DeleteChunks(context.Background(), map[string]interface{}{ + _, err = d.docEngine.DeleteChunks(context.Background(), map[string]interface{}{ "knowledge_graph_kwd": interfaceSlice("graph", "subgraph", "entity", "relation", "community_report"), "kb_id": datasetID, }, indexName, datasetID) @@ -1667,11 +1667,11 @@ func (s *DatasetService) DeleteIndex(userID, datasetID, indexType string, wipe b clearGraphPhaseMarkers(redisengine.Get(), datasetID) common.Info("delete_index: cleared GraphRAG artefacts and phase markers", zap.String("dataset_id", datasetID)) } else if wipe && indexType == "raptor" { - if s.docEngine == nil { + if d.docEngine == nil { return common.CodeServerError, errors.New("Document engine is not initialized") } indexName := fmt.Sprintf("ragflow_%s", kb.TenantID) - _, err = s.docEngine.DeleteChunks(context.Background(), map[string]interface{}{ + _, err = d.docEngine.DeleteChunks(context.Background(), map[string]interface{}{ "raptor_kwd": interfaceSlice("raptor"), "kb_id": datasetID, }, indexName, datasetID) @@ -1760,17 +1760,17 @@ func (req *SearchDatasetRequest) ToSearchDatasetsRequest(datasetID string) *Sear } // SearchDataset searches chunks within one knowledge base based on a question. -func (s *DatasetService) SearchDataset(datasetID, userID string, req *SearchDatasetRequest) (*SearchDatasetsResponse, error) { +func (d *DatasetService) SearchDataset(datasetID, userID string, req *SearchDatasetRequest) (*SearchDatasetsResponse, error) { if datasetID == "" { return nil, fmt.Errorf("dataset_id is required") } - return s.SearchDatasets(req.ToSearchDatasetsRequest(datasetID), userID) + return d.SearchDatasets(req.ToSearchDatasetsRequest(datasetID), userID) } // SearchDatasets searches chunks across one or more knowledge bases based on a question. // It retrieves relevant chunks using embedding and optional reranking, applying filters, // cross-language translation, and keyword extraction as configured. -func (s *DatasetService) SearchDatasets(req *SearchDatasetsRequest, userID string) (*SearchDatasetsResponse, error) { +func (d *DatasetService) SearchDatasets(req *SearchDatasetsRequest, userID string) (*SearchDatasetsResponse, error) { if req.Question == "" { return nil, fmt.Errorf("question is required") } @@ -1854,12 +1854,12 @@ func (s *DatasetService) SearchDatasets(req *SearchDatasetsRequest, userID strin var kbRecords []*entity.Knowledgebase seenTenants := make(map[string]bool) for _, datasetID := range datasetIDs { - if !s.kbDAO.Accessible(datasetID, userID) { + if !d.kbDAO.Accessible(datasetID, userID) { common.Warn("SearchDatasets access denied", zap.String("datasetID", datasetID), zap.String("userID", userID)) return nil, fmt.Errorf("only owner of dataset %s is authorized for this operation", datasetID) } - kb, err := s.kbDAO.GetByID(datasetID) + kb, err := d.kbDAO.GetByID(datasetID) if err != nil || kb == nil { common.Warn("SearchDatasets dataset not found", zap.String("datasetID", datasetID)) return nil, fmt.Errorf("dataset %s not found", datasetID) @@ -1884,11 +1884,11 @@ func (s *DatasetService) SearchDatasets(req *SearchDatasetsRequest, userID strin // Override request fields with values from saved search config (if search_id is provided) var chatID string if searchID != "" { - if s.searchService == nil { + if d.searchService == nil { common.Warn("Search service is not initialized for search_id", zap.String("searchID", searchID)) return nil, fmt.Errorf("Invalid search_id") } - searchDetail, err := s.searchService.GetDetail(searchID) + searchDetail, err := d.searchService.GetDetail(searchID) if err != nil || searchDetail == nil || len(searchDetail) == 0 { common.Warn("Invalid search_id", zap.String("searchID", searchID), zap.Error(err)) return nil, fmt.Errorf("Invalid search_id") @@ -2097,7 +2097,7 @@ func (s *DatasetService) SearchDatasets(req *SearchDatasetsRequest, userID strin EmbeddingModel: embeddingModel, } - retrievalResult, err := nlp.NewRetrievalService(s.docEngine, s.documentDAO).Retrieval(ctx, retrievalReq) + retrievalResult, err := nlp.NewRetrievalService(d.docEngine, d.documentDAO).Retrieval(ctx, retrievalReq) if err != nil { return nil, fmt.Errorf("retrieval search failed: %w", err) } @@ -2108,7 +2108,7 @@ func (s *DatasetService) SearchDatasets(req *SearchDatasetsRequest, userID strin common.Warn("use_kg is not yet implemented in Go - skipping KG retrieval") } - filteredChunks = nlp.RetrievalByChildren(filteredChunks, tenantIDs, s.docEngine, ctx) + filteredChunks = nlp.RetrievalByChildren(filteredChunks, tenantIDs, d.docEngine, ctx) for i := range filteredChunks { delete(filteredChunks[i], "vector") @@ -2172,7 +2172,7 @@ type CreateDatasetRequest struct { } // ListDatasets lists datasets with pagination and filtering. -func (s *DatasetService) ListDatasets(id, name string, page, pageSize int, orderby string, desc bool, keywords string, ownerIDs []string, parserID, userID string) ([]map[string]interface{}, int64, common.ErrorCode, error) { +func (d *DatasetService) ListDatasets(id, name string, page, pageSize int, orderby string, desc bool, keywords string, ownerIDs []string, parserID, userID string) ([]map[string]interface{}, int64, common.ErrorCode, error) { id = strings.TrimSpace(id) if id != "" { normalizedID, err := normalizeDatasetID(id) @@ -2181,7 +2181,7 @@ func (s *DatasetService) ListDatasets(id, name string, page, pageSize int, order } id = normalizedID - kbs, err := s.kbDAO.GetKBByIDAndUserID(id, userID) + kbs, err := d.kbDAO.GetKBByIDAndUserID(id, userID) if err != nil { return nil, 0, common.CodeServerError, errors.New("Database operation failed") } @@ -2192,7 +2192,7 @@ func (s *DatasetService) ListDatasets(id, name string, page, pageSize int, order name = strings.TrimSpace(name) if name != "" { - kbs, err := s.kbDAO.GetKBByNameAndUserID(name, userID) + kbs, err := d.kbDAO.GetKBByNameAndUserID(name, userID) if err != nil { return nil, 0, common.CodeServerError, errors.New("Database operation failed") } @@ -2225,7 +2225,7 @@ func (s *DatasetService) ListDatasets(id, name string, page, pageSize int, order } } if len(tenantIDs) == 0 { - joinedTenants, err := s.tenantDAO.GetJoinedTenantsByUserID(userID) + joinedTenants, err := d.tenantDAO.GetJoinedTenantsByUserID(userID) if err != nil { return nil, 0, common.CodeServerError, errors.New("Database operation failed") } @@ -2237,7 +2237,7 @@ func (s *DatasetService) ListDatasets(id, name string, page, pageSize int, order } } - kbs, total, err := s.kbDAO.GetByTenantIDs(tenantIDs, userID, page, pageSize, orderby, desc, keywords, parserID) + kbs, total, err := d.kbDAO.GetByTenantIDs(tenantIDs, userID, page, pageSize, orderby, desc, keywords, parserID) if err != nil { return nil, 0, common.CodeServerError, errors.New("Database operation failed") } @@ -2254,8 +2254,8 @@ func (s *DatasetService) ListDatasets(id, name string, page, pageSize int, order } // CreateDataset creates a new dataset. -func (s *DatasetService) CreateDataset(req *CreateDatasetRequest, tenantID string) (map[string]interface{}, common.ErrorCode, error) { - if !isValidString(req.Name) { +func (d *DatasetService) CreateDataset(req *CreateDatasetRequest, tenantID string) (map[string]interface{}, common.ErrorCode, error) { + if !common.IsValidString(req.Name) { return nil, common.CodeDataError, errors.New("Dataset name must be string.") } @@ -2267,7 +2267,7 @@ func (s *DatasetService) CreateDataset(req *CreateDatasetRequest, tenantID strin return nil, common.CodeDataError, fmt.Errorf("Dataset name length is %d which is large than %d", len(name), entity.DatasetNameLimit) } - tenant, err := s.tenantDAO.GetByID(tenantID) + tenant, err := d.tenantDAO.GetByID(tenantID) if err != nil || tenant == nil { return nil, common.CodeDataError, errors.New("Tenant not found.") } @@ -2447,7 +2447,7 @@ func (s *DatasetService) CreateDataset(req *CreateDatasetRequest, tenantID strin embdID := tenant.EmbdID if embeddingModel != "" { - ok, message := s.verifyEmbeddingAvailability(embeddingModel, tenantID) + ok, message := d.verifyEmbeddingAvailability(embeddingModel, tenantID) if !ok { return nil, common.CodeDataError, errors.New(message) } @@ -2459,7 +2459,7 @@ func (s *DatasetService) CreateDataset(req *CreateDatasetRequest, tenantID strin status := string(entity.StatusValid) // Deduplicate name within tenant duplicateName, err := common.DuplicateName(func(n, tid string) bool { - existing, err := s.kbDAO.GetByName(n, tid) + existing, err := d.kbDAO.GetByName(n, tid) return err == nil && existing != nil }, name, tenantID) if err != nil { @@ -2489,11 +2489,11 @@ func (s *DatasetService) CreateDataset(req *CreateDatasetRequest, tenantID strin kb.Language = language } - if err = s.kbDAO.Create(kb); err != nil { + if err = d.kbDAO.Create(kb); err != nil { return nil, common.CodeServerError, errors.New("Failed to save dataset") } - createdKB, err := s.kbDAO.GetByID(kbID) + createdKB, err := d.kbDAO.GetByID(kbID) if err != nil || createdKB == nil { return nil, common.CodeServerError, errors.New("Dataset created failed") } @@ -2502,7 +2502,7 @@ func (s *DatasetService) CreateDataset(req *CreateDatasetRequest, tenantID strin } // DeleteDatasets deletes multiple datasets. -func (s *DatasetService) DeleteDatasets(ids []string, deleteAll bool, tenantID string) (map[string]interface{}, common.ErrorCode, error) { +func (d *DatasetService) DeleteDatasets(ids []string, deleteAll bool, tenantID string) (map[string]interface{}, common.ErrorCode, error) { normalizedIDs := make([]string, 0, len(ids)) seenIDs := make(map[string]struct{}, len(ids)) @@ -2524,7 +2524,7 @@ func (s *DatasetService) DeleteDatasets(ids []string, deleteAll bool, tenantID s return map[string]interface{}{"success_count": 0}, common.CodeSuccess, nil } - kbs, err := s.kbDAO.Query(map[string]interface{}{"tenant_id": tenantID}) + kbs, err := d.kbDAO.Query(map[string]interface{}{"tenant_id": tenantID}) if err != nil { return nil, common.CodeServerError, errors.New("Database operation failed") } @@ -2536,7 +2536,7 @@ func (s *DatasetService) DeleteDatasets(ids []string, deleteAll bool, tenantID s kbs := make([]*entity.Knowledgebase, 0, len(normalizedIDs)) unauthorizedIDs := make([]string, 0) for _, id := range normalizedIDs { - kb, err := s.kbDAO.GetByIDAndTenantID(id, tenantID) + kb, err := d.kbDAO.GetByIDAndTenantID(id, tenantID) if err != nil || kb == nil { unauthorizedIDs = append(unauthorizedIDs, id) continue @@ -2550,7 +2550,7 @@ func (s *DatasetService) DeleteDatasets(ids []string, deleteAll bool, tenantID s errorsList := make([]string, 0) successCount := 0 for _, kb := range kbs { - if err := s.deleteDataset(tenantID, kb); err != nil { + if err := d.deleteDataset(tenantID, kb); err != nil { errorsList = append(errorsList, err.Error()) continue } @@ -2582,7 +2582,7 @@ func (s *DatasetService) DeleteDatasets(ids []string, deleteAll bool, tenantID s } // GetDataset gets a single dataset with its size and linked connectors. -func (s *DatasetService) GetDataset(datasetID, userID string) (map[string]interface{}, common.ErrorCode, error) { +func (d *DatasetService) GetDataset(datasetID, userID string) (map[string]interface{}, common.ErrorCode, error) { datasetID = strings.TrimSpace(datasetID) if datasetID == "" { return nil, common.CodeDataError, errors.New("Lack of \"Dataset ID\"") @@ -2594,24 +2594,24 @@ func (s *DatasetService) GetDataset(datasetID, userID string) (map[string]interf } datasetID = normalizedID - if !s.kbDAO.Accessible(datasetID, userID) { + if !d.kbDAO.Accessible(datasetID, userID) { return nil, common.CodeDataError, fmt.Errorf("User '%s' lacks permission for dataset '%s'", userID, datasetID) } - kb, err := s.kbDAO.GetByID(datasetID) + kb, err := d.kbDAO.GetByID(datasetID) if err != nil || kb == nil { return nil, common.CodeDataError, errors.New("Invalid Dataset ID") } data := datasetToMap(kb) - size, err := s.documentDAO.SumSizeByDatasetID(datasetID) + size, err := d.documentDAO.SumSizeByDatasetID(datasetID) if err != nil { return nil, common.CodeServerError, errors.New("Database operation failed") } data["size"] = size - connectors, err := s.connectorDAO.ListByDatasetID(datasetID) + connectors, err := d.connectorDAO.ListByDatasetID(datasetID) if err != nil { return nil, common.CodeServerError, errors.New("Database operation failed") } @@ -2644,8 +2644,8 @@ type UpdateDatasetRequest struct { } // UpdateDataset Update a dataset -func (s *DatasetService) UpdateDataset(datasetID, tenantID string, req UpdateDatasetRequest) (map[string]interface{}, common.ErrorCode, error) { - kb, err := s.kbDAO.GetByID(datasetID) +func (d *DatasetService) UpdateDataset(datasetID, tenantID string, req UpdateDatasetRequest) (map[string]interface{}, common.ErrorCode, error) { + kb, err := d.kbDAO.GetByID(datasetID) if err != nil { if dao.IsNotFoundErr(err) { return nil, common.CodeDataError, errors.New("Dataset not found") @@ -2765,7 +2765,7 @@ func (s *DatasetService) UpdateDataset(datasetID, tenantID string, req UpdateDat if embdID == "" { embdID = kb.EmbdID } - ok, message := s.verifyEmbeddingAvailability(embdID, tenantID) + ok, message := d.verifyEmbeddingAvailability(embdID, tenantID) if !ok { return nil, common.CodeDataError, errors.New(message) } @@ -2789,14 +2789,14 @@ func (s *DatasetService) UpdateDataset(datasetID, tenantID string, req UpdateDat if *req.Pagerank < 0 || *req.Pagerank > 100 { return nil, common.CodeDataError, errors.New("Input should be less than or equal to 100") } - if s.engineType == server.EngineInfinity { + if d.engineType == server.EngineInfinity { return nil, common.CodeDataError, errors.New("'pagerank' can only be set when doc_engine is elasticsearch") } indexName := fmt.Sprintf("ragflow_%s", kb.TenantID) if *req.Pagerank > 0 { - err = s.docEngine.UpdateChunks(context.Background(), map[string]interface{}{"kb_id": kb.ID}, map[string]interface{}{common.PAGERANK_FLD: *req.Pagerank}, indexName, kb.ID) + err = d.docEngine.UpdateChunks(context.Background(), map[string]interface{}{"kb_id": kb.ID}, map[string]interface{}{common.PAGERANK_FLD: *req.Pagerank}, indexName, kb.ID) } else { - err = s.docEngine.UpdateChunks(context.Background(), map[string]interface{}{"exists": common.PAGERANK_FLD}, map[string]interface{}{"remove": common.PAGERANK_FLD}, indexName, kb.ID) + err = d.docEngine.UpdateChunks(context.Background(), map[string]interface{}{"exists": common.PAGERANK_FLD}, map[string]interface{}{"remove": common.PAGERANK_FLD}, indexName, kb.ID) } if err != nil { return nil, common.CodeServerError, err @@ -2816,7 +2816,7 @@ func (s *DatasetService) UpdateDataset(datasetID, tenantID string, req UpdateDat } if nameValue, ok := updates["name"].(string); ok && strings.ToLower(nameValue) != strings.ToLower(kb.Name) { - existing, lookupErr := s.kbDAO.GetByName(nameValue, tenantID) + existing, lookupErr := d.kbDAO.GetByName(nameValue, tenantID) if lookupErr != nil && !dao.IsNotFoundErr(lookupErr) { return nil, common.CodeServerError, errors.New("Database operation failed") } @@ -2830,7 +2830,7 @@ func (s *DatasetService) UpdateDataset(datasetID, tenantID string, req UpdateDat } if len(updates) > 0 { - if err = s.kbDAO.UpdateByID(kb.ID, updates); err != nil { + if err = d.kbDAO.UpdateByID(kb.ID, updates); err != nil { return nil, common.CodeServerError, errors.New("Update dataset error.(Database error)") } } @@ -2847,18 +2847,18 @@ func (s *DatasetService) UpdateDataset(datasetID, tenantID string, req UpdateDat AutoParse: connector.AutoParse, }) } - if err = s.connectorDAO.LinkDatasetConnectors(kb.ID, connectorLinks); err != nil { + if err = d.connectorDAO.LinkDatasetConnectors(kb.ID, connectorLinks); err != nil { return nil, common.CodeServerError, errors.New("Database operation failed") } } - updatedKB, err := s.kbDAO.GetByID(kb.ID) + updatedKB, err := d.kbDAO.GetByID(kb.ID) if err != nil { return nil, common.CodeDataError, errors.New("Dataset updated failed") } data := datasetToMap(updatedKB) - linkedConnectors, err := s.connectorDAO.ListByDatasetID(kb.ID) + linkedConnectors, err := d.connectorDAO.ListByDatasetID(kb.ID) if err != nil { return nil, common.CodeServerError, errors.New("Database operation failed") } @@ -2988,8 +2988,8 @@ func datasetBoolValue(value interface{}) bool { } // GetMetadataConfig gets the auto-metadata configuration for a dataset. -func (s *DatasetService) GetMetadataConfig(datasetID, tenantID string) (map[string]interface{}, common.ErrorCode, error) { - kb, err := s.kbDAO.GetByIDAndTenantID(datasetID, tenantID) +func (d *DatasetService) GetMetadataConfig(datasetID, tenantID string) (map[string]interface{}, common.ErrorCode, error) { + kb, err := d.kbDAO.GetByIDAndTenantID(datasetID, tenantID) if err != nil { if dao.IsNotFoundErr(err) { return nil, common.CodeDataError, fmt.Errorf("User '%s' lacks permission for dataset '%s'", tenantID, datasetID) @@ -3007,11 +3007,11 @@ func (s *DatasetService) GetMetadataConfig(datasetID, tenantID string) (map[stri } // UpdateMetadataConfig updates the auto-metadata configuration for a dataset. -func (s *DatasetService) UpdateMetadataConfig(datasetID, tenantID string, req *MetadataConfigRequest) (map[string]interface{}, common.ErrorCode, error) { +func (d *DatasetService) UpdateMetadataConfig(datasetID, tenantID string, req *MetadataConfigRequest) (map[string]interface{}, common.ErrorCode, error) { datasetID = strings.TrimSpace(datasetID) tenantID = strings.TrimSpace(tenantID) - kb, err := s.kbDAO.GetByIDAndTenantID(datasetID, tenantID) + kb, err := d.kbDAO.GetByIDAndTenantID(datasetID, tenantID) if err != nil { if dao.IsNotFoundErr(err) { return nil, common.CodeDataError, fmt.Errorf("User '%s' lacks permission for dataset '%s'", tenantID, datasetID) @@ -3042,7 +3042,7 @@ func (s *DatasetService) UpdateMetadataConfig(datasetID, tenantID string, req *M parserConfig["metadata"] = metadata parserConfig["built_in_metadata"] = builtInMetadata - if err := s.kbDAO.UpdateByID(kb.ID, map[string]interface{}{"parser_config": parserConfig}); err != nil { + if err = d.kbDAO.UpdateByID(kb.ID, map[string]interface{}{"parser_config": parserConfig}); err != nil { return nil, common.CodeServerError, errors.New("Update auto-metadata error.(Database error)") } @@ -3053,13 +3053,17 @@ func (s *DatasetService) UpdateMetadataConfig(datasetID, tenantID string, req *M } // Accessible checks if a knowledge base is accessible by a user -func (s *DatasetService) Accessible(kbID, userID string) bool { - return s.kbDAO.Accessible(kbID, userID) +func (d *DatasetService) Accessible(kbID, userID string) bool { + return d.kbDAO.Accessible(kbID, userID) +} + +func (d *DatasetService) GetByID(kbID string) (*entity.Knowledgebase, error) { + return d.kbDAO.GetByID(kbID) } // GetKnowledgebaseByID resolves a dataset entity without applying permission // checks. Upload needs the same existence-then-auth ordering as Python. -func (s *DatasetService) GetKnowledgebaseByID(datasetID string) (*entity.Knowledgebase, error) { +func (d *DatasetService) GetKnowledgebaseByID(datasetID string) (*entity.Knowledgebase, error) { datasetID = strings.TrimSpace(datasetID) if datasetID == "" { return nil, errors.New("Lack of \"Dataset ID\"") @@ -3068,19 +3072,19 @@ func (s *DatasetService) GetKnowledgebaseByID(datasetID string) (*entity.Knowled if err != nil { return nil, err } - return s.kbDAO.GetByID(normalizedID) + return d.kbDAO.GetByID(normalizedID) } // CheckKBTeamPermission mirrors Python check_kb_team_permission. -func (s *DatasetService) CheckKBTeamPermission(kb *entity.Knowledgebase, userID string) bool { - return hasKBTeamPermission(kb, userID, s.tenantDAO) +func (d *DatasetService) CheckKBTeamPermission(kb *entity.Knowledgebase, userID string) bool { + return hasKBTeamPermission(kb, userID, d.tenantDAO) } -func (s *DatasetService) AggregateTags(datasetIDs []string, userID string) ([]map[string]interface{}, common.ErrorCode, error) { +func (d *DatasetService) AggregateTags(datasetIDs []string, userID string) ([]map[string]interface{}, common.ErrorCode, error) { if len(datasetIDs) == 0 { return nil, common.CodeDataError, errors.New("Lack of dataset_ids in query parameters") } - if s.docEngine == nil { + if d.docEngine == nil { return nil, common.CodeServerError, errors.New("Document engine is not initialized") } @@ -3094,10 +3098,10 @@ func (s *DatasetService) AggregateTags(datasetIDs []string, userID string) ([]ma if err != nil { return nil, common.CodeDataError, err } - if !s.kbDAO.Accessible(datasetID, userID) { + if !d.kbDAO.Accessible(datasetID, userID) { return nil, common.CodeDataError, fmt.Errorf("No authorization for dataset '%s'", datasetID) } - kb, err := s.kbDAO.GetByID(datasetID) + kb, err := d.kbDAO.GetByID(datasetID) if err != nil { if dao.IsNotFoundErr(err) { return nil, common.CodeDataError, fmt.Errorf("Invalid Dataset ID '%s'", datasetID) @@ -3114,7 +3118,7 @@ func (s *DatasetService) AggregateTags(datasetIDs []string, userID string) ([]ma merged := make(map[string]int) for tenantID, kbIDs := range datasetIDsByTenant { for offset := 0; ; offset += pageSize { - searchResp, err := s.docEngine.Search(context.Background(), &types.SearchRequest{ + searchResp, err := d.docEngine.Search(context.Background(), &types.SearchRequest{ IndexNames: []string{fmt.Sprintf("ragflow_%s", tenantID)}, KbIDs: kbIDs, Offset: offset, @@ -3124,7 +3128,7 @@ func (s *DatasetService) AggregateTags(datasetIDs []string, userID string) ([]ma if err != nil { return nil, common.CodeServerError, fmt.Errorf("failed to aggregate tags: %w", err) } - for _, agg := range s.docEngine.GetAggregation(searchResp.Chunks, "tag_kwd") { + for _, agg := range d.docEngine.GetAggregation(searchResp.Chunks, "tag_kwd") { tag, _ := agg["key"].(string) if tag == "" { continue @@ -3160,7 +3164,7 @@ func (s *DatasetService) AggregateTags(datasetIDs []string, userID string) ([]ma return result, common.CodeSuccess, nil } -func (s *DatasetService) ListTags(datasetID, userID string) ([]map[string]interface{}, common.ErrorCode, error) { +func (d *DatasetService) ListTags(datasetID, userID string) ([]map[string]interface{}, common.ErrorCode, error) { datasetID = strings.TrimSpace(datasetID) if datasetID == "" { return nil, common.CodeDataError, errors.New("Lack of \"Dataset ID\"") @@ -3172,14 +3176,14 @@ func (s *DatasetService) ListTags(datasetID, userID string) ([]map[string]interf } datasetID = normalizedID - if !s.kbDAO.Accessible(datasetID, userID) { + if !d.kbDAO.Accessible(datasetID, userID) { return nil, common.CodeDataError, errors.New("No authorization.") } - if s.docEngine == nil { + if d.docEngine == nil { return nil, common.CodeServerError, errors.New("Document engine is not initialized") } - kb, err := s.kbDAO.GetByID(datasetID) + kb, err := d.kbDAO.GetByID(datasetID) if err != nil || kb == nil { return nil, common.CodeDataError, errors.New("Invalid Dataset ID") } @@ -3189,7 +3193,7 @@ func (s *DatasetService) ListTags(datasetID, userID string) ([]map[string]interf ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() - exists, err := s.docEngine.ChunkStoreExists(ctx, indexName, datasetID) + exists, err := d.docEngine.ChunkStoreExists(ctx, indexName, datasetID) if err != nil { return nil, common.CodeServerError, fmt.Errorf("failed to inspect chunk store: %w", err) } @@ -3200,11 +3204,11 @@ func (s *DatasetService) ListTags(datasetID, userID string) ([]map[string]interf const pageSize = 10000 counts := make(map[string]int) for offset := 0; ; offset += pageSize { - if err := ctx.Err(); err != nil { + if err = ctx.Err(); err != nil { return nil, common.CodeServerError, fmt.Errorf("list tags timeout or canceled: %w", err) } - searchResp, err := s.docEngine.Search(ctx, &types.SearchRequest{ + searchResp, err := d.docEngine.Search(ctx, &types.SearchRequest{ IndexNames: []string{indexName}, KbIDs: []string{datasetID}, Offset: offset, @@ -3215,7 +3219,7 @@ func (s *DatasetService) ListTags(datasetID, userID string) ([]map[string]interf return nil, common.CodeServerError, fmt.Errorf("failed to list tags: %w", err) } - for _, agg := range s.docEngine.GetAggregation(searchResp.Chunks, "tag_kwd") { + for _, agg := range d.docEngine.GetAggregation(searchResp.Chunks, "tag_kwd") { tag, _ := agg["key"].(string) if tag == "" { continue @@ -3270,22 +3274,22 @@ func (s *DatasetService) ListTags(datasetID, userID string) ([]map[string]interf // GetIngestionSummary returns dataset-level ingestion counters together with // the aggregated document parsing status, mirroring // dataset_api_service.get_ingestion_summary. -func (s *DatasetService) GetIngestionSummary(datasetID, userID string) (map[string]interface{}, common.ErrorCode, error) { +func (d *DatasetService) GetIngestionSummary(datasetID, userID string) (map[string]interface{}, common.ErrorCode, error) { datasetID = strings.TrimSpace(datasetID) if datasetID == "" { return nil, common.CodeDataError, errors.New("Lack of \"Dataset ID\"") } - if !s.kbDAO.Accessible(datasetID, userID) { + if !d.kbDAO.Accessible(datasetID, userID) { return nil, common.CodeDataError, fmt.Errorf("User '%s' lacks permission for dataset '%s'", userID, datasetID) } - kb, err := s.kbDAO.GetByID(datasetID) + kb, err := d.kbDAO.GetByID(datasetID) if err != nil || kb == nil { return nil, common.CodeDataError, errors.New("Invalid Dataset ID") } - status, err := s.documentDAO.GetParsingStatusByKBID(datasetID) + status, err := d.documentDAO.GetParsingStatusByKBID(datasetID) if err != nil { return nil, common.CodeServerError, errors.New("Database operation failed") } @@ -3301,13 +3305,13 @@ func (s *DatasetService) GetIngestionSummary(datasetID, userID string) (map[stri // ListIngestionLogs lists ingestion logs for a dataset, mirroring // dataset_api_service.list_ingestion_logs. log_type selects between // dataset-level logs ("dataset") and per-file logs ("file"). -func (s *DatasetService) ListIngestionLogs(datasetID, userID string, page, pageSize int, orderby string, desc bool, operationStatus []string, createDateFrom, createDateTo, logType, keywords string) (map[string]interface{}, common.ErrorCode, error) { +func (d *DatasetService) ListIngestionLogs(datasetID, userID string, page, pageSize int, orderby string, desc bool, operationStatus []string, createDateFrom, createDateTo, logType, keywords string) (map[string]interface{}, common.ErrorCode, error) { datasetID = strings.TrimSpace(datasetID) if datasetID == "" { return nil, common.CodeDataError, errors.New("Lack of \"Dataset ID\"") } - if !s.kbDAO.Accessible(datasetID, userID) { + if !d.kbDAO.Accessible(datasetID, userID) { return nil, common.CodeDataError, errors.New("No authorization.") } @@ -3321,9 +3325,9 @@ func (s *DatasetService) ListIngestionLogs(datasetID, userID string, page, pageS err error ) if logType == "file" { - logs, total, err = s.pipelineLogDAO.GetFileLogsByKBID(datasetID, page, pageSize, orderby, desc, keywords, operationStatus, createDateFrom, createDateTo) + logs, total, err = d.pipelineLogDAO.GetFileLogsByKBID(datasetID, page, pageSize, orderby, desc, keywords, operationStatus, createDateFrom, createDateTo) } else { - logs, total, err = s.pipelineLogDAO.GetDatasetLogsByKBID(datasetID, page, pageSize, orderby, desc, operationStatus, createDateFrom, createDateTo, keywords) + logs, total, err = d.pipelineLogDAO.GetDatasetLogsByKBID(datasetID, page, pageSize, orderby, desc, operationStatus, createDateFrom, createDateTo, keywords) } if err != nil { return nil, common.CodeServerError, errors.New("Database operation failed") @@ -3353,17 +3357,17 @@ func (s *DatasetService) ListIngestionLogs(datasetID, userID string, page, pageS // dataflow-result page can render the pipeline timeline and chunks. The // file-level converter is a superset of the dataset-level fields, so it is // correct for both dataset-level (graph/raptor/mindmap) and per-file logs. -func (s *DatasetService) GetIngestionLog(datasetID, userID, logID string) (map[string]interface{}, common.ErrorCode, error) { +func (d *DatasetService) GetIngestionLog(datasetID, userID, logID string) (map[string]interface{}, common.ErrorCode, error) { datasetID = strings.TrimSpace(datasetID) if datasetID == "" { return nil, common.CodeDataError, errors.New("Lack of \"Dataset ID\"") } - if !s.kbDAO.Accessible(datasetID, userID) { + if !d.kbDAO.Accessible(datasetID, userID) { return nil, common.CodeDataError, errors.New("No authorization.") } - log, err := s.pipelineLogDAO.GetByIDAndKBID(logID, datasetID) + log, err := d.pipelineLogDAO.GetByIDAndKBID(logID, datasetID) if err != nil { if errors.Is(err, gorm.ErrRecordNotFound) { return nil, common.CodeDataError, errors.New("Log not found") @@ -3451,7 +3455,7 @@ func jsonMapValue(m entity.JSONMap) interface{} { return m } -func (s *DatasetService) deleteDataset(tenantID string, kb *entity.Knowledgebase) error { +func (d *DatasetService) deleteDataset(tenantID string, kb *entity.Knowledgebase) error { return dao.DB.Transaction(func(tx *gorm.DB) error { if taskIDs := datasetIndexTaskIDs(kb); len(taskIDs) > 0 { if err := tx.Where("id IN ?", taskIDs).Delete(&entity.Task{}).Error; err != nil { @@ -3609,7 +3613,7 @@ func normalizeDatasetID(id string) (string, error) { return strings.ReplaceAll(parsedUUID.String(), "-", ""), nil } -func (s *DatasetService) verifyEmbeddingAvailability(embdID string, tenantID string) (bool, string) { +func (d *DatasetService) verifyEmbeddingAvailability(embdID string, tenantID string) (bool, string) { _, _, _, _, err := NewModelProviderService().GetModelConfigFromProviderInstance(tenantID, entity.ModelTypeEmbedding, embdID) if err != nil { return false, err.Error() @@ -3781,7 +3785,7 @@ func limitStrings(values []string, limit int) []string { return values[:limit] } -func (s *DatasetService) RenameTag(datasetID, userID, fromTag, toTag string) (map[string]interface{}, common.ErrorCode, error) { +func (d *DatasetService) RenameTag(datasetID, userID, fromTag, toTag string) (map[string]interface{}, common.ErrorCode, error) { fromTag = strings.TrimSpace(fromTag) toTag = strings.TrimSpace(toTag) @@ -3792,14 +3796,14 @@ func (s *DatasetService) RenameTag(datasetID, userID, fromTag, toTag string) (ma if strings.TrimSpace(datasetID) == "" { return nil, common.CodeDataError, errors.New("Lack of \"Dataset ID\"") } - if !s.kbDAO.Accessible(datasetID, userID) { + if !d.kbDAO.Accessible(datasetID, userID) { return nil, common.CodeDataError, errors.New("No authorization.") } - if s.docEngine == nil { + if d.docEngine == nil { return nil, common.CodeServerError, errors.New("Document engine is not initialized") } - kb, err := s.kbDAO.GetByID(datasetID) + kb, err := d.kbDAO.GetByID(datasetID) if err != nil || kb == nil { return nil, common.CodeDataError, errors.New("Invalid Dataset ID") } @@ -3818,7 +3822,7 @@ func (s *DatasetService) RenameTag(datasetID, userID, fromTag, toTag string) (ma }, } - err = s.docEngine.UpdateChunks(context.Background(), condition, newValue, indexName, datasetID) + err = d.docEngine.UpdateChunks(context.Background(), condition, newValue, indexName, datasetID) if err != nil { return nil, common.CodeServerError, fmt.Errorf("failed to rename tag: %w", err) } @@ -3828,3 +3832,7 @@ func (s *DatasetService) RenameTag(datasetID, userID, fromTag, toTag string) (ma "to": toTag, }, common.CodeSuccess, nil } + +func (d *DatasetService) GetFieldMap(ids []string) (map[string]interface{}, error) { + return d.kbDAO.GetFieldMap(ids) +} diff --git a/internal/service/deep_researcher.go b/internal/service/deep_researcher.go index 33243d26ac..e6139e170f 100644 --- a/internal/service/deep_researcher.go +++ b/internal/service/deep_researcher.go @@ -31,7 +31,7 @@ import ( "ragflow/internal/common" "ragflow/internal/engine" modelModule "ragflow/internal/entity/models" - "ragflow/internal/service/kg" + "ragflow/internal/service/graph" "ragflow/internal/service/nlp" "ragflow/internal/tokenizer" @@ -386,7 +386,7 @@ func (dr *DeepResearcher) _retrieve_information(ctx context.Context, query strin // 3. Knowledge graph retrieval if useKG, _ := dr.PromptConfig["use_kg"].(bool); useKG && dr.ChatModel != nil && len(dr.KbIDs) > 0 { - kgPipeline := kg.NewPipeline(dr.DocEngine, dr.KbIDs, dr.TenantIDs, query) + kgPipeline := graph.NewPipeline(dr.DocEngine, dr.KbIDs, dr.TenantIDs, query) kgPipeline.SetChatModel(dr.ChatModel) if dr.EmbModel != nil { kgPipeline.SetEmbModel(dr.EmbModel) diff --git a/internal/service/kg/pipeline.go b/internal/service/graph/pipeline.go similarity index 99% rename from internal/service/kg/pipeline.go rename to internal/service/graph/pipeline.go index 300dc51cc8..07a1b5506c 100644 --- a/internal/service/kg/pipeline.go +++ b/internal/service/graph/pipeline.go @@ -14,7 +14,7 @@ // limitations under the License. // -package kg +package graph import ( "context" diff --git a/internal/service/kg/retrieval.go b/internal/service/graph/retrieval.go similarity index 99% rename from internal/service/kg/retrieval.go rename to internal/service/graph/retrieval.go index 3fcf357948..abc9ca843f 100644 --- a/internal/service/kg/retrieval.go +++ b/internal/service/graph/retrieval.go @@ -1,4 +1,4 @@ -package kg +package graph import ( "context" diff --git a/internal/service/kg/retrieval_test.go b/internal/service/graph/retrieval_test.go similarity index 99% rename from internal/service/kg/retrieval_test.go rename to internal/service/graph/retrieval_test.go index 51ee73d680..1b827aa1b9 100644 --- a/internal/service/kg/retrieval_test.go +++ b/internal/service/graph/retrieval_test.go @@ -14,7 +14,7 @@ // limitations under the License. // -package kg +package graph import ( "context" diff --git a/internal/service/kg/scoring.go b/internal/service/graph/scoring.go similarity index 99% rename from internal/service/kg/scoring.go rename to internal/service/graph/scoring.go index b6e4c339a8..825439cadd 100644 --- a/internal/service/kg/scoring.go +++ b/internal/service/graph/scoring.go @@ -14,7 +14,7 @@ // limitations under the License. // -package kg +package graph import ( "bytes" diff --git a/internal/service/kg/search.go b/internal/service/graph/search.go similarity index 99% rename from internal/service/kg/search.go rename to internal/service/graph/search.go index ff43302fe0..369ca7974a 100644 --- a/internal/service/kg/search.go +++ b/internal/service/graph/search.go @@ -1,4 +1,4 @@ -package kg +package graph import ( "context" diff --git a/internal/service/kg/search_test.go b/internal/service/graph/search_test.go similarity index 99% rename from internal/service/kg/search_test.go rename to internal/service/graph/search_test.go index 2c289ccbe6..eb4398c248 100644 --- a/internal/service/kg/search_test.go +++ b/internal/service/graph/search_test.go @@ -14,7 +14,7 @@ // limitations under the License. // -package kg +package graph import ( "context" diff --git a/internal/service/kg/testutil_test.go b/internal/service/graph/testutil_test.go similarity index 76% rename from internal/service/kg/testutil_test.go rename to internal/service/graph/testutil_test.go index 015e1d4198..f0c1567bde 100644 --- a/internal/service/kg/testutil_test.go +++ b/internal/service/graph/testutil_test.go @@ -1,3 +1,3 @@ -package kg +package graph func strPtr(s string) *string { return &s } diff --git a/internal/service/kg/types.go b/internal/service/graph/types.go similarity index 99% rename from internal/service/kg/types.go rename to internal/service/graph/types.go index 2c502c3e09..4d76971886 100644 --- a/internal/service/kg/types.go +++ b/internal/service/graph/types.go @@ -1,4 +1,4 @@ -package kg +package graph // KGEntity represents a knowledge graph entity. type KGEntity struct { diff --git a/internal/service/kb.go b/internal/service/kb.go deleted file mode 100644 index e97922b2a3..0000000000 --- a/internal/service/kb.go +++ /dev/null @@ -1,377 +0,0 @@ -// -// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -package service - -import ( - "context" - "errors" - "fmt" - "ragflow/internal/common" - "ragflow/internal/dao" - "ragflow/internal/engine" - "ragflow/internal/entity" - - "ragflow/internal/utility" - "strings" -) - -// KnowledgebaseService service class for managing dataset operations -type KnowledgebaseService struct { - kbDAO *dao.KnowledgebaseDAO - userTenantDAO *dao.UserTenantDAO - userDAO *dao.UserDAO - tenantDAO *dao.TenantDAO - connectorDAO *dao.ConnectorDAO - docEngine engine.DocEngine -} - -// NewKnowledgebaseService creates a new knowledge base service -func NewKnowledgebaseService() *KnowledgebaseService { - return &KnowledgebaseService{ - kbDAO: dao.NewKnowledgebaseDAO(), - userTenantDAO: dao.NewUserTenantDAO(), - userDAO: dao.NewUserDAO(), - tenantDAO: dao.NewTenantDAO(), - connectorDAO: dao.NewConnectorDAO(), - docEngine: engine.Get(), - } -} - -// UpdateKBRequest represents the request for updating a knowledge base -type UpdateKBRequest struct { - KBID string `json:"kb_id" binding:"required"` - Name string `json:"name" binding:"required"` - Description *string `json:"description"` - ParserID string `json:"parser_id" binding:"required"` - Permission *string `json:"permission,omitempty"` - Language *string `json:"language,omitempty"` - Avatar *string `json:"avatar,omitempty"` - Pagerank *int64 `json:"pagerank,omitempty"` - ParserConfig map[string]interface{} `json:"parser_config,omitempty"` - Connectors []string `json:"connectors,omitempty"` -} - -// UpdateMetadataSettingRequest represents the request for updating metadata settings -type UpdateMetadataSettingRequest struct { - KBID string `json:"kb_id" binding:"required"` - Metadata map[string]interface{} `json:"metadata" binding:"required"` - EnableMetadata *bool `json:"enable_metadata,omitempty"` -} - -// ListKbsResponse represents the response for listing knowledge bases -type ListKbsResponse struct { - KBs []map[string]interface{} `json:"kbs"` - Total int64 `json:"total"` -} - -// UpdateKB updates an existing knowledge base -// This matches the Python update endpoint in kb_app.py -func (s *KnowledgebaseService) UpdateKB(req *UpdateKBRequest, userID string) (map[string]interface{}, common.ErrorCode, error) { - // Validate name is a string - if !isValidString(req.Name) { - return nil, common.CodeDataError, errors.New("Dataset name must be string.") - } - - // Trim and validate name - name := strings.TrimSpace(req.Name) - if name == "" { - return nil, common.CodeDataError, errors.New("Dataset name can't be empty.") - } - - // Check name length - if len(name) > entity.DatasetNameLimit { - return nil, common.CodeDataError, fmt.Errorf("Dataset name length is %d which is large than %d", len(name), entity.DatasetNameLimit) - } - - // Check authorization - if !s.kbDAO.Accessible4Deletion(req.KBID, userID) { - return nil, common.CodeAuthenticationError, errors.New("No authorization.") - } - - // Verify ownership - kbs, err := s.kbDAO.Query(map[string]interface{}{"created_by": userID, "id": req.KBID}) - if err != nil || len(kbs) == 0 { - return nil, common.CodeOperatingError, errors.New("only owner of dataset authorized for this operation") - } - - // Get existing KB - kb, err := s.kbDAO.GetByID(req.KBID) - if err != nil { - return nil, common.CodeDataError, errors.New("can't find this dataset") - } - - // Check for duplicate name - if strings.ToLower(name) != strings.ToLower(kb.Name) { - existingKB, _ := s.kbDAO.GetByName(name, userID) - if existingKB != nil { - return nil, common.CodeDataError, errors.New("duplicated dataset name") - } - } - - // Build updates - updates := map[string]interface{}{ - "name": name, - "parser_id": req.ParserID, - } - - if req.Description != nil { - updates["description"] = *req.Description - } - if req.Permission != nil { - updates["permission"] = *req.Permission - } - if req.Language != nil { - updates["language"] = *req.Language - } - if req.Avatar != nil { - updates["avatar"] = *req.Avatar - } - if req.Pagerank != nil { - updates["pagerank"] = *req.Pagerank - } - if req.ParserConfig != nil { - updates["parser_config"] = req.ParserConfig - } - - // Update in database - if err := s.kbDAO.UpdateByID(req.KBID, updates); err != nil { - return nil, common.CodeServerError, fmt.Errorf("failed to update knowledge base: %w", err) - } - - // Get updated KB - updatedKB, err := s.kbDAO.GetByID(req.KBID) - if err != nil { - return nil, common.CodeDataError, errors.New("database error (knowledgebase rename)") - } - - result := updatedKB.ToMap() - result["connectors"] = req.Connectors - - return result, common.CodeSuccess, nil -} - -// UpdateMetadataSetting updates the metadata settings for a knowledge base. -// The userID must be a member of the owning tenant; this is the same authorization -// boundary applied by GetDetail and the handler-level guard, duplicated here so -// the security check cannot be regressed by future handler refactors that drop it. -func (s *KnowledgebaseService) UpdateMetadataSetting(req *UpdateMetadataSettingRequest, userID string) (map[string]interface{}, common.ErrorCode, error) { - if !s.kbDAO.Accessible(req.KBID, userID) { - return nil, common.CodeOperatingError, errors.New("only owner of dataset authorized for this operation") - } - - kb, err := s.kbDAO.GetByID(req.KBID) - if err != nil { - return nil, common.CodeDataError, errors.New("database error (knowledgebase not found)") - } - - parserConfig := kb.ParserConfig - if parserConfig == nil { - parserConfig = make(map[string]interface{}) - } - - parserConfig["metadata"] = req.Metadata - enableMetadata := true - if req.EnableMetadata != nil { - enableMetadata = *req.EnableMetadata - } - parserConfig["enable_metadata"] = enableMetadata - - if err := s.kbDAO.UpdateParserConfig(req.KBID, parserConfig); err != nil { - return nil, common.CodeServerError, fmt.Errorf("failed to update metadata setting: %w", err) - } - - result := kb.ToMap() - result["parser_config"] = parserConfig - - return result, common.CodeSuccess, nil -} - -// GetDetail retrieves detailed information about a knowledge base -// This matches the Python kb_detail endpoint in kb_app.py -func (s *KnowledgebaseService) GetDetail(kbID, userID string) (*entity.KnowledgebaseDetail, common.ErrorCode, error) { - // Check authorization - if !s.kbDAO.Accessible(kbID, userID) { - return nil, common.CodeOperatingError, errors.New("only owner of dataset authorized for this operation") - } - - // Get detail - detail, err := s.kbDAO.GetDetail(kbID) - if err != nil { - return nil, common.CodeDataError, errors.New("can't find this dataset") - } - - // Set connectors (empty for now) - detail.Connectors = []string{} - - return detail, common.CodeSuccess, nil -} - -// Accessible checks if a knowledge base is accessible by a user -func (s *KnowledgebaseService) Accessible(kbID, userID string) bool { - return s.kbDAO.Accessible(kbID, userID) -} - -// RemoveTag removes a tag from documents in a dataset -func (s *KnowledgebaseService) RemoveTag(condition map[string]interface{}, newValue map[string]interface{}, indexName, kbID string) error { - return s.docEngine.UpdateChunks(context.Background(), condition, newValue, indexName, kbID) -} - -// GetByID retrieves a knowledge base by ID -func (s *KnowledgebaseService) GetByID(kbID string) (*entity.Knowledgebase, error) { - return s.kbDAO.GetByID(kbID) -} - -// GetKBIDsByTenantID retrieves all knowledge base IDs for a tenant -func (s *KnowledgebaseService) GetKBIDsByTenantID(tenantID string) ([]string, error) { - return s.kbDAO.GetKBIDsByTenantID(tenantID) -} - -// isValidString checks if a value is a non-empty string -func isValidString(v interface{}) bool { - str, ok := v.(string) - return ok && str != "" -} - -// getParserConfig returns the parser configuration with defaults -// This matches the Python get_parser_config function -func getParserConfig(parserID string, customConfig map[string]interface{}) map[string]interface{} { - config := map[string]interface{}{ - "pages": [][]int{{1, 1000000}}, - "table_context_size": 0, - "image_context_size": 0, - } - - switch parserID { - case "table": - config["layout_recognize"] = false - config["chunk_token_num"] = 128 - config["delimiter"] = "\n!?;。;!?" - config["html4excel"] = false - case "naive": - config["chunk_token_num"] = 128 - config["delimiter"] = "\n!?;。;!?" - config["html4excel"] = false - default: - config["raptor"] = map[string]interface{}{ - "use_raptor": false, - } - } - - // Merge custom config if provided - if customConfig != nil { - config = mergeParserConfig(config, customConfig) - } - - return config -} - -// mergeParserConfig merges two parser configurations -func mergeParserConfig(base, override map[string]interface{}) map[string]interface{} { - result := make(map[string]interface{}) - for k, v := range base { - result[k] = v - } - - for k, v := range override { - if existing, ok := result[k]; ok { - if existingMap, ok := existing.(map[string]interface{}); ok { - if newMap, ok := v.(map[string]interface{}); ok { - result[k] = mergeParserConfig(existingMap, newMap) - continue - } - } - } - result[k] = v - } - - return result -} - -// GetUserByToken gets user by authorization token -func (s *KnowledgebaseService) GetUserByToken(authorization string) (*entity.User, common.ErrorCode, error) { - userService := NewUserService() - return userService.GetUserByToken(authorization) -} - -// GetUserByID gets user by ID -func (s *KnowledgebaseService) GetUserByID(id string) (*entity.User, error) { - return s.userDAO.GetByAccessToken(id) -} - -// GetTenantIDsByUserID gets tenant IDs for a user -func (s *KnowledgebaseService) GetTenantIDsByUserID(userID string) ([]string, error) { - return s.userTenantDAO.GetTenantIDsByUserID(userID) -} - -// GetConnectorsByTenantID gets connectors for a tenant -func (s *KnowledgebaseService) GetConnectorsByTenantID(tenantID string) ([]*dao.ConnectorListItem, error) { - return s.connectorDAO.ListByTenantID(tenantID) -} - -// GetKBList retrieves knowledge bases with ID and name filtering -func (s *KnowledgebaseService) GetKBList(tenantIDs []string, userID string, page, pageSize int, orderby string, desc bool, id, name string) ([]*entity.Knowledgebase, int64, common.ErrorCode, error) { - kbs, total, err := s.kbDAO.GetList(tenantIDs, userID, page, pageSize, orderby, desc, id, name) - if err != nil { - return nil, 0, common.CodeServerError, err - } - return kbs, total, common.CodeSuccess, nil -} - -// GetKBByIDAndUserID retrieves a knowledge base by ID and user ID -func (s *KnowledgebaseService) GetKBByIDAndUserID(kbID, userID string) ([]*entity.Knowledgebase, error) { - return s.kbDAO.GetKBByIDAndUserID(kbID, userID) -} - -// GetKBByNameAndUserID retrieves a knowledge base by name and user ID -func (s *KnowledgebaseService) GetKBByNameAndUserID(kbName, userID string) ([]*entity.Knowledgebase, error) { - return s.kbDAO.GetKBByNameAndUserID(kbName, userID) -} - -// AtomicIncreaseDocNumByID atomically increments the document count -func (s *KnowledgebaseService) AtomicIncreaseDocNumByID(kbID string) error { - return s.kbDAO.AtomicIncreaseDocNumByID(kbID) -} - -// DecreaseDocumentNum decreases document, chunk, and token counts -func (s *KnowledgebaseService) DecreaseDocumentNum(kbID string, docNum, chunkNum, tokenNum int64) error { - return s.kbDAO.DecreaseDocumentNum(kbID, docNum, chunkNum, tokenNum) -} - -// UpdateParserConfig updates the parser configuration -func (s *KnowledgebaseService) UpdateParserConfig(id string, config map[string]interface{}) error { - return s.kbDAO.UpdateParserConfig(id, config) -} - -// DeleteFieldMap removes the field_map from parser_config -func (s *KnowledgebaseService) DeleteFieldMap(id string) error { - return s.kbDAO.DeleteFieldMap(id) -} - -// GetFieldMap retrieves field mappings from multiple knowledge bases -func (s *KnowledgebaseService) GetFieldMap(ids []string) (map[string]interface{}, error) { - return s.kbDAO.GetFieldMap(ids) -} - -// GetAllIDs retrieves all knowledge base IDs -func (s *KnowledgebaseService) GetAllIDs() ([]string, error) { - return s.kbDAO.GetAllIDs() -} - -// ExtractAccessToken extracts access token from authorization header -func ExtractAccessToken(authorization, secretKey string) (string, error) { - return utility.ExtractAccessToken(authorization, secretKey) -} diff --git a/web/vite.config.ts b/web/vite.config.ts index 4bc3f1fcad..e81a5db1d2 100644 --- a/web/vite.config.ts +++ b/web/vite.config.ts @@ -75,7 +75,7 @@ export default defineConfig(({ mode }) => { }, }, hybrid: { - '^(/v1/kb)|^(/v1/document)|^(/v1/llm/list)|^(/api/v1/datasets)|^(/api/v1/memories)|^(/v1/user)|^(/v1/user/tenant_info)|^(/v1/tenant/list)|^(/v1/system/config)|^(/v1/user/login)|^(/v1/user/logout)|^(/api/v1/files)': + '^(/v1/document)|^(/v1/llm/list)|^(/api/v1/datasets)|^(/api/v1/memories)|^(/v1/user)|^(/v1/user/tenant_info)|^(/v1/tenant/list)|^(/v1/system/config)|^(/v1/user/login)|^(/v1/user/logout)|^(/api/v1/files)': { target: 'http://127.0.0.1:9384/', changeOrigin: true,