mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-05 10:58:34 +08:00
fix: use context manager for ThreadPoolExecutor in file_service.py (#14144)
## Summary - Wrap 2 `ThreadPoolExecutor` instances in `file_service.py` with `with` statement - Ensures threads are properly shut down after all futures complete ## Problem `parse_docs()` (line 532) and the file processing method (line 694) create `ThreadPoolExecutor` instances that are never shut down. In a long-running server process, this leaks thread resources on every invocation — threads remain alive consuming memory even after all submitted work is complete. ## Fix Replace bare `ThreadPoolExecutor()` with `with ThreadPoolExecutor() as exe:` context manager, which calls `executor.shutdown(wait=True)` on exit. ## Test plan - [x] Verified both call sites use `with` statement after fix - [x] No remaining bare `ThreadPoolExecutor` in `file_service.py` - [x] `document_service.py:1066` is a module-level executor (different pattern, not changed in this PR) Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
@@ -562,8 +562,13 @@ class FileService(CommonService):
|
||||
@staticmethod
|
||||
def parse_docs(file_objs, user_id):
|
||||
with ThreadPoolExecutor(max_workers=12) as exe:
|
||||
threads = [exe.submit(FileService.parse, file.filename, file.read(), False) for file in file_objs]
|
||||
res = [th.result() for th in threads]
|
||||
threads = []
|
||||
for file in file_objs:
|
||||
threads.append(exe.submit(FileService.parse, file.filename, file.read(), False))
|
||||
|
||||
res = []
|
||||
for th in threads:
|
||||
res.append(th.result())
|
||||
|
||||
return "\n\n".join(res)
|
||||
|
||||
@@ -788,9 +793,9 @@ class FileService(CommonService):
|
||||
def image_to_base64(file):
|
||||
return "data:{};base64,{}".format(file["mime_type"],
|
||||
base64.b64encode(FileService.get_blob(file["created_by"], file["id"])).decode("utf-8"))
|
||||
threads = []
|
||||
imgs = []
|
||||
with ThreadPoolExecutor(max_workers=5) as exe:
|
||||
threads = []
|
||||
imgs = []
|
||||
for file in files:
|
||||
if file["mime_type"].find("image") >=0:
|
||||
if raw:
|
||||
@@ -800,9 +805,7 @@ class FileService(CommonService):
|
||||
continue
|
||||
threads.append(exe.submit(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"], layout_recognize))
|
||||
|
||||
results = [th.result() for th in threads]
|
||||
|
||||
if raw:
|
||||
return results, imgs
|
||||
else:
|
||||
return results
|
||||
if raw:
|
||||
return [th.result() for th in threads], imgs
|
||||
else:
|
||||
return [th.result() for th in threads]
|
||||
|
||||
Reference in New Issue
Block a user