fix: correct typos in code comments, docstrings and docs (#13931)

## Summary - Fix `a image` → `an image` in README and log message - Fix `colomn` → `column` in table structure recognizer comment - Fix `formated` → `formatted` in confluence connector docstring - Fix `tabel of content` → `table of contents` in TOC prompt ## Test plan - [ ] Documentation and comment changes, no functional impact 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: yuj <yuj@ztjzsoft.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Jin Hai <haijin.chn@gmail.com>
2026-06-29 15:31:05 +08:00 · 2026-04-07 13:05:39 +08:00
parent 29cf8aba48
commit 424aee5bec
5 changed files with 5 additions and 5 deletions
--- a/common/data_source/confluence_connector.py
+++ b/common/data_source/confluence_connector.py
@@ -920,7 +920,7 @@ def extract_text_from_confluence_html(
        confluence_client (Confluence): Confluence client
        fetched_titles (set[str]): The titles of the pages that have already been fetched
    Returns:
-        str: loaded and formated Confluence page
+        str: loaded and formatted Confluence page
    """
    body = confluence_object["body"]
    object_html = body.get("storage", body.get("view", {})).get("value")
--- a/deepdoc/README.md
+++ b/deepdoc/README.md
@@ -98,7 +98,7 @@ We use vision information to resolve problems as human being.
     ```bash
        python deepdoc/vision/t_recognizer.py --inputs=path_to_images_or_pdfs --threshold=0.2 --mode=tsr --output_dir=path_to_store_result
     ```
-    The inputs could be directory to images or PDF, or a image or PDF. 
+    The inputs could be directory to images or PDF, or an image or PDF. 
    You can look into the folder 'path_to_store_result' where has both images and html pages which demonstrate the detection results as following:
    <div align="center" style="margin-top:20px;margin-bottom:20px;">
    <img src="https://github.com/infiniflow/ragflow/assets/12318111/cb24e81b-f2ba-49f3-ac09-883d75606f4c" width="1000"/>
--- a/deepdoc/parser/pdf_parser.py
+++ b/deepdoc/parser/pdf_parser.py
@@ -708,7 +708,7 @@ class RAGFlowPdfParser:
    def __ocr(self, pagenum, img, chars, ZM=3, device_id: int | None = None):
        start = timer()
        bxs = self.ocr.detect(np.array(img), device_id)
-        logging.info(f"__ocr detecting boxes of a image cost ({timer() - start}s)")
+        logging.info(f"__ocr detecting boxes of an image cost ({timer() - start}s)")

        start = timer()
        if not bxs:
--- a/deepdoc/vision/table_structure_recognizer.py
+++ b/deepdoc/vision/table_structure_recognizer.py
@@ -394,7 +394,7 @@ class TableStructureRecognizer(Recognizer):

    @staticmethod
    def __desc_table(cap, hdr_rowno, tbl, is_english):
-        # get text of every colomn in header row to become header text
+        # get text of every column in header row to become header text
        clmno = len(tbl[0])
        rowno = len(tbl)
        headers = {}
--- a/rag/prompts/assign_toc_levels.md
+++ b/rag/prompts/assign_toc_levels.md
@@ -1,4 +1,4 @@
-You are given a JSON array of TOC(tabel of content) items. Each item has at least {"title": string} and may include an existing title hierarchical level.
+You are given a JSON array of TOC(table of contents) items. Each item has at least {"title": string} and may include an existing title hierarchical level.

 Task
 - For each item, assign a depth label using Arabic numerals only: top-level = 1, second-level = 2, third-level = 3, etc.