fix: use context manager for ThreadPoolExecutor in file_service.py (#14144)

## Summary
- Wrap 2 `ThreadPoolExecutor` instances in `file_service.py` with `with`
statement
- Ensures threads are properly shut down after all futures complete

## Problem

`parse_docs()` (line 532) and the file processing method (line 694)
create `ThreadPoolExecutor` instances that are never shut down. In a
long-running server process, this leaks thread resources on every
invocation — threads remain alive consuming memory even after all
submitted work is complete.

## Fix

Replace bare `ThreadPoolExecutor()` with `with ThreadPoolExecutor() as
exe:` context manager, which calls `executor.shutdown(wait=True)` on
exit.

## Test plan
- [x] Verified both call sites use `with` statement after fix
- [x] No remaining bare `ThreadPoolExecutor` in `file_service.py`
- [x] `document_service.py:1066` is a module-level executor (different
pattern, not changed in this PR)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
Ricardo-M-L
2026-05-11 14:02:45 +08:00
committed by GitHub
parent a03b95f8c4
commit 5ef7f50eef

View File

@@ -562,8 +562,13 @@ class FileService(CommonService):
@staticmethod
def parse_docs(file_objs, user_id):
with ThreadPoolExecutor(max_workers=12) as exe:
threads = [exe.submit(FileService.parse, file.filename, file.read(), False) for file in file_objs]
res = [th.result() for th in threads]
threads = []
for file in file_objs:
threads.append(exe.submit(FileService.parse, file.filename, file.read(), False))
res = []
for th in threads:
res.append(th.result())
return "\n\n".join(res)
@@ -788,9 +793,9 @@ class FileService(CommonService):
def image_to_base64(file):
return "data:{};base64,{}".format(file["mime_type"],
base64.b64encode(FileService.get_blob(file["created_by"], file["id"])).decode("utf-8"))
threads = []
imgs = []
with ThreadPoolExecutor(max_workers=5) as exe:
threads = []
imgs = []
for file in files:
if file["mime_type"].find("image") >=0:
if raw:
@@ -800,9 +805,7 @@ class FileService(CommonService):
continue
threads.append(exe.submit(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"], layout_recognize))
results = [th.result() for th in threads]
if raw:
return results, imgs
else:
return results
if raw:
return [th.result() for th in threads], imgs
else:
return [th.result() for th in threads]