mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Fix WebDriver resource leak in HTML-to-PDF conversion (#14310)
### What problem does this PR solve? In `api/utils/web_utils.py`, `__get_pdf_from_html()` creates a Chrome WebDriver but only calls `driver.quit()` inside the `TimeoutException` handler. If the page element becomes stale before the timeout (no exception raised), the WebDriver is never quit, leaking the Chrome browser process and returning `None`. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### Changes - Move the PDF printing logic and `driver.quit()` outside the `except` block so they execute on all code paths - Use `try/finally` to ensure `driver.quit()` is always called, even if the `Page.printToPDF` DevTools call fails Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -173,6 +173,9 @@ def __get_pdf_from_html(path: str, timeout: int, install_driver: bool, print_opt
|
||||
try:
|
||||
WebDriverWait(driver, timeout).until(staleness_of(driver.find_element(by=By.TAG_NAME, value="html")))
|
||||
except TimeoutException:
|
||||
pass
|
||||
|
||||
try:
|
||||
calculated_print_options = {
|
||||
"landscape": False,
|
||||
"displayHeaderFooter": False,
|
||||
@@ -181,8 +184,9 @@ def __get_pdf_from_html(path: str, timeout: int, install_driver: bool, print_opt
|
||||
}
|
||||
calculated_print_options.update(print_options)
|
||||
result = __send_devtools(driver, "Page.printToPDF", calculated_print_options)
|
||||
driver.quit()
|
||||
return base64.b64decode(result["data"])
|
||||
finally:
|
||||
driver.quit()
|
||||
|
||||
|
||||
def is_valid_url(url: str) -> bool:
|
||||
|
||||
Reference in New Issue
Block a user