mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-03 01:01:56 +08:00
Fix: guard missing task language (#15136)
### What problem does this PR solve? guard missing task language ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@@ -799,6 +799,7 @@ def chunk(filename, binary=None, from_page=0, to_page=MAXIMUM_PAGE_NUMBER, lang=
|
||||
urls = set()
|
||||
url_res = []
|
||||
|
||||
lang = lang or "Chinese"
|
||||
is_english = lang.lower() == "english" # is_english(cks)
|
||||
parser_config = kwargs.get("parser_config", {"chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize": "DeepDOC", "analyze_hyperlink": True})
|
||||
|
||||
|
||||
@@ -299,13 +299,14 @@ async def build_chunks(task, progress_callback):
|
||||
|
||||
try:
|
||||
async with chunk_limiter:
|
||||
task_language = task.get("language") or "Chinese"
|
||||
cks = await thread_pool_exec(
|
||||
chunker.chunk,
|
||||
task["name"],
|
||||
binary=binary,
|
||||
from_page=task["from_page"],
|
||||
to_page=task["to_page"],
|
||||
lang=task["language"],
|
||||
lang=task_language,
|
||||
callback=progress_callback,
|
||||
kb_id=task["kb_id"],
|
||||
parser_config=parser_config_for_chunk,
|
||||
@@ -1286,7 +1287,9 @@ async def do_handle_task(task):
|
||||
task_to_page = task["to_page"]
|
||||
task_tenant_id = task["tenant_id"]
|
||||
task_embedding_id = task["embd_id"]
|
||||
task_language = task["language"]
|
||||
task_language = task.get("language") or "Chinese"
|
||||
if not task.get("language"):
|
||||
logging.warning("Task %s has no language set, falling back to Chinese", task_id)
|
||||
doc_task_llm_id = task["parser_config"].get("llm_id") or task["llm_id"]
|
||||
kb_task_llm_id = task['kb_parser_config'].get("llm_id") or task["llm_id"]
|
||||
task['llm_id'] = kb_task_llm_id
|
||||
|
||||
Reference in New Issue
Block a user