fix: use context manager for pdfplumber to prevent resource leak (#13512)

## Summary
- Convert `pdfplumber.open()` to use `with` context manager in
`api/utils/file_utils.py` (`thumbnail_img` function)
- If any exception occurs between `open()` and `close()`, the PDF file
handle leaks
- The rest of the codebase (e.g. `read_potential_broken_pdf` in the same
file) already uses `with pdfplumber.open(...)` correctly

## Test plan
- [x] PDF thumbnail generation works correctly with context manager
- [x] Resources properly cleaned up on exceptions

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Ethan T.
2026-05-13 21:09:51 +08:00
committed by GitHub
parent e994051eb9
commit 8c5845f6ca

View File

@@ -107,23 +107,21 @@ def thumbnail_img(filename, blob):
if re.match(r".*\.pdf$", filename):
try:
with sys.modules[LOCK_KEY_pdfplumber]:
pdf = pdfplumber.open(BytesIO(blob))
if not pdf.pages:
pdf.close()
return None
buffered = BytesIO()
resolution = 32
img = None
for _ in range(10):
pdf.pages[0].to_image(resolution=resolution).annotated.save(buffered, format="png")
img = buffered.getvalue()
if len(img) >= 64000 and resolution >= 2:
resolution = resolution / 2
buffered = BytesIO()
else:
break
pdf.close()
return img
with pdfplumber.open(BytesIO(blob)) as pdf:
if not pdf.pages:
return None
buffered = BytesIO()
resolution = 32
img = None
for _ in range(10):
pdf.pages[0].to_image(resolution=resolution).annotated.save(buffered, format="png")
img = buffered.getvalue()
if len(img) >= 64000 and resolution >= 2:
resolution = resolution / 2
buffered = BytesIO()
else:
break
return img
except Exception:
return None