diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index 2b3ba46151..3a12f78266 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -778,6 +778,14 @@ async def run_raptor_for_kb(row, kb_parser_config, chat_mdl, embd_mdl, vector_si res = [] tk_count = 0 max_errors = int(os.environ.get("RAPTOR_MAX_ERRORS", 3)) + doc_name_by_id = {} + for doc_id in set(doc_ids): + ok, source_doc = DocumentService.get_by_id(doc_id) + if not ok or not source_doc: + continue + source_name = getattr(source_doc, "name", "") + if source_name: + doc_name_by_id[doc_id] = source_name async def generate(chunks, did): nonlocal tk_count, res @@ -792,11 +800,12 @@ async def run_raptor_for_kb(row, kb_parser_config, chat_mdl, embd_mdl, vector_si ) original_length = len(chunks) chunks = await raptor(chunks, kb_parser_config["raptor"]["random_seed"], callback, row["id"]) + effective_doc_name = row["name"] if did == fake_doc_id else doc_name_by_id.get(did, row["name"]) doc = { "doc_id": did, "kb_id": [str(row["kb_id"])], - "docnm_kwd": row["name"], - "title_tks": rag_tokenizer.tokenize(row["name"]), + "docnm_kwd": effective_doc_name, + "title_tks": rag_tokenizer.tokenize(effective_doc_name), "raptor_kwd": "raptor" } if row["pagerank"]: @@ -1047,7 +1056,7 @@ async def do_handle_task(task): return # bind LLM for raptor - chat_model_config = get_model_config_by_type_and_name(task_dataset_id, LLMType.CHAT, kb_task_llm_id) + chat_model_config = get_model_config_by_type_and_name(task_tenant_id, LLMType.CHAT, kb_task_llm_id) chat_model = LLMBundle(task_tenant_id, chat_model_config, lang=task_language) # run RAPTOR async with kg_limiter: