From c22811f0968cda6347c6f8782901f4c4a09f26a0 Mon Sep 17 00:00:00 2001 From: Ricardo-M-L <69202550+Ricardo-M-L@users.noreply.github.com> Date: Tue, 14 Apr 2026 11:43:58 +0800 Subject: [PATCH] fix: close file handles in json.load() calls in resume parser (#14061) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Replace `json.load(open(...))` with `with open(...) as f: json.load(f)` in 2 resume parser files - Fixes 4 leaked file descriptors in `corporations.py` (3) and `schools.py` (1) ## Why In a long-running server process like RAGFlow, leaked file handles can accumulate and hit the OS file descriptor limit (`OSError: [Errno 24] Too many open files`). The other instances mentioned in the issue (`infinity_conn_base.py` and `init_data.py`) have already been fixed. ## Test plan - [x] Verified affected files use `with` statement after fix - [x] Grep confirms no remaining `json.load(open(` patterns in codebase Fixes #13996 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 (1M context) --- deepdoc/parser/resume/entities/corporations.py | 11 ++++++----- deepdoc/parser/resume/entities/schools.py | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/deepdoc/parser/resume/entities/corporations.py b/deepdoc/parser/resume/entities/corporations.py index 0396281dee..5035967303 100644 --- a/deepdoc/parser/resume/entities/corporations.py +++ b/deepdoc/parser/resume/entities/corporations.py @@ -29,11 +29,12 @@ GOODS = pd.read_csv( ).fillna(0) GOODS["cid"] = GOODS["cid"].astype(str) GOODS = GOODS.set_index(["cid"]) -CORP_TKS = json.load( - open(os.path.join(current_file_path, "res/corp.tks.freq.json"), "r",encoding="utf-8") -) -GOOD_CORP = json.load(open(os.path.join(current_file_path, "res/good_corp.json"), "r",encoding="utf-8")) -CORP_TAG = json.load(open(os.path.join(current_file_path, "res/corp_tag.json"), "r",encoding="utf-8")) +with open(os.path.join(current_file_path, "res/corp.tks.freq.json"), "r", encoding="utf-8") as f: + CORP_TKS = json.load(f) +with open(os.path.join(current_file_path, "res/good_corp.json"), "r", encoding="utf-8") as f: + GOOD_CORP = json.load(f) +with open(os.path.join(current_file_path, "res/corp_tag.json"), "r", encoding="utf-8") as f: + CORP_TAG = json.load(f) def baike(cid, default_v=0): diff --git a/deepdoc/parser/resume/entities/schools.py b/deepdoc/parser/resume/entities/schools.py index 4425236beb..5763ca48be 100644 --- a/deepdoc/parser/resume/entities/schools.py +++ b/deepdoc/parser/resume/entities/schools.py @@ -25,7 +25,8 @@ TBL = pd.read_csv( os.path.join(current_file_path, "res/schools.csv"), sep="\t", header=0 ).fillna("") TBL["name_en"] = TBL["name_en"].map(lambda x: x.lower().strip()) -GOOD_SCH = json.load(open(os.path.join(current_file_path, "res/good_sch.json"), "r",encoding="utf-8")) +with open(os.path.join(current_file_path, "res/good_sch.json"), "r", encoding="utf-8") as f: + GOOD_SCH = json.load(f) GOOD_SCH = set([re.sub(r"[,. &()()]+", "", c) for c in GOOD_SCH])