From 5ef7f50eef15fbe74e566649fe92e43b865e0070 Mon Sep 17 00:00:00 2001 From: Ricardo-M-L <69202550+Ricardo-M-L@users.noreply.github.com> Date: Mon, 11 May 2026 14:02:45 +0800 Subject: [PATCH] fix: use context manager for ThreadPoolExecutor in file_service.py (#14144) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Wrap 2 `ThreadPoolExecutor` instances in `file_service.py` with `with` statement - Ensures threads are properly shut down after all futures complete ## Problem `parse_docs()` (line 532) and the file processing method (line 694) create `ThreadPoolExecutor` instances that are never shut down. In a long-running server process, this leaks thread resources on every invocation — threads remain alive consuming memory even after all submitted work is complete. ## Fix Replace bare `ThreadPoolExecutor()` with `with ThreadPoolExecutor() as exe:` context manager, which calls `executor.shutdown(wait=True)` on exit. ## Test plan - [x] Verified both call sites use `with` statement after fix - [x] No remaining bare `ThreadPoolExecutor` in `file_service.py` - [x] `document_service.py:1066` is a module-level executor (different pattern, not changed in this PR) Co-authored-by: Claude Opus 4.6 (1M context) Co-authored-by: Kevin Hu --- api/db/services/file_service.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/api/db/services/file_service.py b/api/db/services/file_service.py index 34776a6797..511624799f 100644 --- a/api/db/services/file_service.py +++ b/api/db/services/file_service.py @@ -562,8 +562,13 @@ class FileService(CommonService): @staticmethod def parse_docs(file_objs, user_id): with ThreadPoolExecutor(max_workers=12) as exe: - threads = [exe.submit(FileService.parse, file.filename, file.read(), False) for file in file_objs] - res = [th.result() for th in threads] + threads = [] + for file in file_objs: + threads.append(exe.submit(FileService.parse, file.filename, file.read(), False)) + + res = [] + for th in threads: + res.append(th.result()) return "\n\n".join(res) @@ -788,9 +793,9 @@ class FileService(CommonService): def image_to_base64(file): return "data:{};base64,{}".format(file["mime_type"], base64.b64encode(FileService.get_blob(file["created_by"], file["id"])).decode("utf-8")) - threads = [] - imgs = [] with ThreadPoolExecutor(max_workers=5) as exe: + threads = [] + imgs = [] for file in files: if file["mime_type"].find("image") >=0: if raw: @@ -800,9 +805,7 @@ class FileService(CommonService): continue threads.append(exe.submit(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"], layout_recognize)) - results = [th.result() for th in threads] - - if raw: - return results, imgs - else: - return results + if raw: + return [th.result() for th in threads], imgs + else: + return [th.result() for th in threads]