mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
fix: use context manager for pdfplumber to prevent resource leak (#13512)
## Summary - Convert `pdfplumber.open()` to use `with` context manager in `api/utils/file_utils.py` (`thumbnail_img` function) - If any exception occurs between `open()` and `close()`, the PDF file handle leaks - The rest of the codebase (e.g. `read_potential_broken_pdf` in the same file) already uses `with pdfplumber.open(...)` correctly ## Test plan - [x] PDF thumbnail generation works correctly with context manager - [x] Resources properly cleaned up on exceptions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -107,23 +107,21 @@ def thumbnail_img(filename, blob):
|
||||
if re.match(r".*\.pdf$", filename):
|
||||
try:
|
||||
with sys.modules[LOCK_KEY_pdfplumber]:
|
||||
pdf = pdfplumber.open(BytesIO(blob))
|
||||
if not pdf.pages:
|
||||
pdf.close()
|
||||
return None
|
||||
buffered = BytesIO()
|
||||
resolution = 32
|
||||
img = None
|
||||
for _ in range(10):
|
||||
pdf.pages[0].to_image(resolution=resolution).annotated.save(buffered, format="png")
|
||||
img = buffered.getvalue()
|
||||
if len(img) >= 64000 and resolution >= 2:
|
||||
resolution = resolution / 2
|
||||
buffered = BytesIO()
|
||||
else:
|
||||
break
|
||||
pdf.close()
|
||||
return img
|
||||
with pdfplumber.open(BytesIO(blob)) as pdf:
|
||||
if not pdf.pages:
|
||||
return None
|
||||
buffered = BytesIO()
|
||||
resolution = 32
|
||||
img = None
|
||||
for _ in range(10):
|
||||
pdf.pages[0].to_image(resolution=resolution).annotated.save(buffered, format="png")
|
||||
img = buffered.getvalue()
|
||||
if len(img) >= 64000 and resolution >= 2:
|
||||
resolution = resolution / 2
|
||||
buffered = BytesIO()
|
||||
else:
|
||||
break
|
||||
return img
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user