From c22811f0968cda6347c6f8782901f4c4a09f26a0 Mon Sep 17 00:00:00 2001
From: Ricardo-M-L <69202550+Ricardo-M-L@users.noreply.github.com>
Date: Tue, 14 Apr 2026 11:43:58 +0800
Subject: [PATCH] fix: close file handles in json.load() calls in resume parser
 (#14061)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary
- Replace `json.load(open(...))` with `with open(...) as f:
json.load(f)` in 2 resume parser files
- Fixes 4 leaked file descriptors in `corporations.py` (3) and
`schools.py` (1)

## Why
In a long-running server process like RAGFlow, leaked file handles can
accumulate and hit the OS file descriptor limit (`OSError: [Errno 24]
Too many open files`). The other instances mentioned in the issue
(`infinity_conn_base.py` and `init_data.py`) have already been fixed.

## Test plan
- [x] Verified affected files use `with` statement after fix
- [x] Grep confirms no remaining `json.load(open(` patterns in codebase

Fixes #13996

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 deepdoc/parser/resume/entities/corporations.py | 11 ++++++-----
 deepdoc/parser/resume/entities/schools.py      |  3 ++-
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/deepdoc/parser/resume/entities/corporations.py b/deepdoc/parser/resume/entities/corporations.py
index 0396281dee..5035967303 100644
--- a/deepdoc/parser/resume/entities/corporations.py
+++ b/deepdoc/parser/resume/entities/corporations.py
@@ -29,11 +29,12 @@ GOODS = pd.read_csv(
 ).fillna(0)
 GOODS["cid"] = GOODS["cid"].astype(str)
 GOODS = GOODS.set_index(["cid"])
-CORP_TKS = json.load(
-    open(os.path.join(current_file_path, "res/corp.tks.freq.json"), "r",encoding="utf-8")
-)
-GOOD_CORP = json.load(open(os.path.join(current_file_path, "res/good_corp.json"), "r",encoding="utf-8"))
-CORP_TAG = json.load(open(os.path.join(current_file_path, "res/corp_tag.json"), "r",encoding="utf-8"))
+with open(os.path.join(current_file_path, "res/corp.tks.freq.json"), "r", encoding="utf-8") as f:
+    CORP_TKS = json.load(f)
+with open(os.path.join(current_file_path, "res/good_corp.json"), "r", encoding="utf-8") as f:
+    GOOD_CORP = json.load(f)
+with open(os.path.join(current_file_path, "res/corp_tag.json"), "r", encoding="utf-8") as f:
+    CORP_TAG = json.load(f)
 
 
 def baike(cid, default_v=0):
diff --git a/deepdoc/parser/resume/entities/schools.py b/deepdoc/parser/resume/entities/schools.py
index 4425236beb..5763ca48be 100644
--- a/deepdoc/parser/resume/entities/schools.py
+++ b/deepdoc/parser/resume/entities/schools.py
@@ -25,7 +25,8 @@ TBL = pd.read_csv(
     os.path.join(current_file_path, "res/schools.csv"), sep="\t", header=0
 ).fillna("")
 TBL["name_en"] = TBL["name_en"].map(lambda x: x.lower().strip())
-GOOD_SCH = json.load(open(os.path.join(current_file_path, "res/good_sch.json"), "r",encoding="utf-8"))
+with open(os.path.join(current_file_path, "res/good_sch.json"), "r", encoding="utf-8") as f:
+    GOOD_SCH = json.load(f)
 GOOD_SCH = set([re.sub(r"[,. &（）()]+", "", c) for c in GOOD_SCH])