diff --git a/Dockerfile b/Dockerfile index d168341255..95a9d54b75 100644 --- a/Dockerfile +++ b/Dockerfile @@ -78,12 +78,12 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ # A modern version of cargo is needed for the latest version of the Rust compiler. RUN apt update && apt install -y curl build-essential \ && if [ "$NEED_MIRROR" == "1" ]; then \ - # Use TUNA mirrors for rustup/rust dist files + # Use TUNA mirrors for rustup/rust dist files \ export RUSTUP_DIST_SERVER="https://mirrors.tuna.tsinghua.edu.cn/rustup"; \ export RUSTUP_UPDATE_ROOT="https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup"; \ echo "Using TUNA mirrors for Rustup."; \ fi; \ - # Force curl to use HTTP/1.1 + # Force curl to use HTTP/1.1 \ curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://sh.rustup.rs | bash -s -- -y --profile minimal \ && echo 'export PATH="/root/.cargo/bin:${PATH}"' >> /root/.bashrc diff --git a/common/data_source/discord_connector.py b/common/data_source/discord_connector.py index 46b23443c3..4c19a6d5ec 100644 --- a/common/data_source/discord_connector.py +++ b/common/data_source/discord_connector.py @@ -33,7 +33,7 @@ def _convert_message_to_document( metadata: dict[str, str | list[str]] = {} semantic_substring = "" - # Only messages from TextChannels will make it here but we have to check for it anyways + # Only messages from TextChannels will make it here, but we have to check for it anyway if isinstance(message.channel, TextChannel) and (channel_name := message.channel.name): metadata["Channel"] = channel_name semantic_substring += f" in Channel: #{channel_name}" @@ -176,7 +176,7 @@ def _manage_async_retrieval( # parse requested_start_date_string to datetime pull_date: datetime | None = datetime.strptime(requested_start_date_string, "%Y-%m-%d").replace(tzinfo=timezone.utc) if requested_start_date_string else None - # Set start_time to the later of start and pull_date, or whichever is provided + # Set start_time to the most recent of start and pull_date, or whichever is provided start_time = max(filter(None, [start, pull_date])) if start or pull_date else None end_time: datetime | None = end diff --git a/deepdoc/parser/html_parser.py b/deepdoc/parser/html_parser.py index 7e4467c168..dcf33a8bbd 100644 --- a/deepdoc/parser/html_parser.py +++ b/deepdoc/parser/html_parser.py @@ -151,7 +151,7 @@ class RAGFlowHtmlParser: block_content = [] current_content = "" table_info_list = [] - lask_block_id = None + last_block_id = None for item in parser_result: content = item.get("content") tag_name = item.get("tag_name") @@ -160,11 +160,11 @@ class RAGFlowHtmlParser: if block_id: if title_flag: content = f"{TITLE_TAGS[tag_name]} {content}" - if lask_block_id != block_id: - if lask_block_id is not None: + if last_block_id != block_id: + if last_block_id is not None: block_content.append(current_content) current_content = content - lask_block_id = block_id + last_block_id = block_id else: current_content += (" " if current_content else "") + content else: diff --git a/deepdoc/vision/ocr.py b/deepdoc/vision/ocr.py index 207fb0e840..afa6921272 100644 --- a/deepdoc/vision/ocr.py +++ b/deepdoc/vision/ocr.py @@ -582,7 +582,7 @@ class OCR: self.crop_image_res_index = 0 def get_rotate_crop_image(self, img, points): - ''' + """ img_height, img_width = img.shape[0:2] left = int(np.min(points[:, 0])) right = int(np.max(points[:, 0])) @@ -591,7 +591,7 @@ class OCR: img_crop = img[top:bottom, left:right, :].copy() points[:, 0] = points[:, 0] - left points[:, 1] = points[:, 1] - top - ''' + """ assert len(points) == 4, "shape of points must be 4*2" img_crop_width = int( max( diff --git a/deepdoc/vision/postprocess.py b/deepdoc/vision/postprocess.py index a61464382b..7704bc5826 100644 --- a/deepdoc/vision/postprocess.py +++ b/deepdoc/vision/postprocess.py @@ -67,10 +67,10 @@ class DBPostProcess: [[1, 1], [1, 1]]) def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height): - ''' + """ _bitmap: single map with shape (1, H, W), whose values are binarized as {0, 1} - ''' + """ bitmap = _bitmap height, width = bitmap.shape @@ -114,10 +114,10 @@ class DBPostProcess: return boxes, scores def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): - ''' + """ _bitmap: single map with shape (1, H, W), whose values are binarized as {0, 1} - ''' + """ bitmap = _bitmap height, width = bitmap.shape @@ -192,9 +192,9 @@ class DBPostProcess: return box, min(bounding_box[1]) def box_score_fast(self, bitmap, _box): - ''' + """ box_score_fast: use bbox mean score as the mean score - ''' + """ h, w = bitmap.shape[:2] box = _box.copy() xmin = np.clip(np.floor(box[:, 0].min()).astype("int32"), 0, w - 1) @@ -209,9 +209,9 @@ class DBPostProcess: return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] def box_score_slow(self, bitmap, contour): - ''' - box_score_slow: use polyon mean score as the mean score - ''' + """ + box_score_slow: use polygon mean score as the mean score + """ h, w = bitmap.shape[:2] contour = contour.copy() contour = np.reshape(contour, (-1, 2)) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index adb3375115..b851687a51 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -25,7 +25,7 @@ services: # - --no-transport-streamable-http-enabled # Disable Streamable HTTP transport (/mcp endpoint) # - --no-json-response # Disable JSON response mode in Streamable HTTP transport (instead of SSE over HTTP) - # Example configration to start Admin server: + # Example configuration to start Admin server: # command: # - --enable-adminserver ports: @@ -74,7 +74,7 @@ services: # - --no-transport-streamable-http-enabled # Disable Streamable HTTP transport (/mcp endpoint) # - --no-json-response # Disable JSON response mode in Streamable HTTP transport (instead of SSE over HTTP) - # Example configration to start Admin server: + # Example configuration to start Admin server: # command: # - --enable-adminserver ports: diff --git a/docs/faq.mdx b/docs/faq.mdx index 55997e1c34..10c6bc57c9 100644 --- a/docs/faq.mdx +++ b/docs/faq.mdx @@ -151,7 +151,7 @@ See [Build a RAGFlow Docker image](./develop/build_docker_image.mdx). ### Cannot access https://huggingface.co -A locally deployed RAGflow downloads OCR models from [Huggingface website](https://huggingface.co) by default. If your machine is unable to access this site, the following error occurs and PDF parsing fails: +A locally deployed RAGFlow downloads OCR models from [Huggingface website](https://huggingface.co) by default. If your machine is unable to access this site, the following error occurs and PDF parsing fails: ``` FileNotFoundError: [Errno 2] No such file or directory: '/root/.cache/huggingface/hub/models--InfiniFlow--deepdoc/snapshots/be0c1e50eef6047b412d1800aa89aba4d275f997/ocr.res' diff --git a/docs/guides/agent/best_practices/accelerate_agent_question_answering.md b/docs/guides/agent/best_practices/accelerate_agent_question_answering.md index 76de06068c..1161588bdd 100644 --- a/docs/guides/agent/best_practices/accelerate_agent_question_answering.md +++ b/docs/guides/agent/best_practices/accelerate_agent_question_answering.md @@ -45,13 +45,13 @@ Click the light bulb icon above the *current* dialogue and scroll down the popup | Item name | Description | -| ----------------- | --------------------------------------------------------------------------------------------- | +| ----------------- |-----------------------------------------------------------------------------------------------| | Total | Total time spent on this conversation round, including chunk retrieval and answer generation. | | Check LLM | Time to validate the specified LLM. | | Create retriever | Time to create a chunk retriever. | | Bind embedding | Time to initialize an embedding model instance. | | Bind LLM | Time to initialize an LLM instance. | -| Tune question | Time to optimize the user query using the context of the mult-turn conversation. | +| Tune question | Time to optimize the user query using the context of the multi-turn conversation. | | Bind reranker | Time to initialize an reranker model instance for chunk retrieval. | | Generate keywords | Time to extract keywords from the user query. | | Retrieval | Time to retrieve the chunks. | diff --git a/docs/guides/chat/best_practices/accelerate_question_answering.mdx b/docs/guides/chat/best_practices/accelerate_question_answering.mdx index e404c1c2a4..af4d2521b1 100644 --- a/docs/guides/chat/best_practices/accelerate_question_answering.mdx +++ b/docs/guides/chat/best_practices/accelerate_question_answering.mdx @@ -37,7 +37,7 @@ Please note that rerank models are essential in certain scenarios. There is alwa | Create retriever | Time to create a chunk retriever. | | Bind embedding | Time to initialize an embedding model instance. | | Bind LLM | Time to initialize an LLM instance. | -| Tune question | Time to optimize the user query using the context of the mult-turn conversation. | +| Tune question | Time to optimize the user query using the context of the multi-turn conversation. | | Bind reranker | Time to initialize an reranker model instance for chunk retrieval. | | Generate keywords | Time to extract keywords from the user query. | | Retrieval | Time to retrieve the chunks. | diff --git a/docs/guides/manage_users_and_services.md b/docs/guides/manage_users_and_services.md index 94b933ec2a..6c06c40f85 100644 --- a/docs/guides/manage_users_and_services.md +++ b/docs/guides/manage_users_and_services.md @@ -8,7 +8,7 @@ slug: /manage_users_and_services -The Admin CLI and Admin Service form a client-server architectural suite for RAGflow system administration. The Admin CLI serves as an interactive command-line interface that receives instructions and displays execution results from the Admin Service in real-time. This duo enables real-time monitoring of system operational status, supporting visibility into RAGflow Server services and dependent components including MySQL, Elasticsearch, Redis, and MinIO. In administrator mode, they provide user management capabilities that allow viewing users and performing critical operations—such as user creation, password updates, activation status changes, and comprehensive user data deletion—even when corresponding web interface functionalities are disabled. +The Admin CLI and Admin Service form a client-server architectural suite for RAGFlow system administration. The Admin CLI serves as an interactive command-line interface that receives instructions and displays execution results from the Admin Service in real-time. This duo enables real-time monitoring of system operational status, supporting visibility into RAGFlow Server services and dependent components including MySQL, Elasticsearch, Redis, and MinIO. In administrator mode, they provide user management capabilities that allow viewing users and performing critical operations—such as user creation, password updates, activation status changes, and comprehensive user data deletion—even when corresponding web interface functionalities are disabled. diff --git a/docs/guides/models/deploy_local_llm.mdx b/docs/guides/models/deploy_local_llm.mdx index dfee3fc785..997e526f31 100644 --- a/docs/guides/models/deploy_local_llm.mdx +++ b/docs/guides/models/deploy_local_llm.mdx @@ -305,7 +305,7 @@ With the Ollama service running, open a new terminal and run `./ollama pull -### 4. Configure RAGflow +### 4. Configure RAGFlow To enable IPEX-LLM accelerated Ollama in RAGFlow, you must also complete the configurations in RAGFlow. The steps are identical to those outlined in the *Deploy a local model using Ollama* section: diff --git a/example/sdk/dataset_example.py b/example/sdk/dataset_example.py index 3a0504d8d8..a3931f1432 100644 --- a/example/sdk/dataset_example.py +++ b/example/sdk/dataset_example.py @@ -14,9 +14,9 @@ # limitations under the License. # -''' +""" The example is about CRUD operations (Create, Read, Update, Delete) on a dataset. -''' +""" from ragflow_sdk import RAGFlow import sys diff --git a/sandbox/executor_manager/core/container.py b/sandbox/executor_manager/core/container.py index f953886c11..36cdded28f 100644 --- a/sandbox/executor_manager/core/container.py +++ b/sandbox/executor_manager/core/container.py @@ -122,15 +122,15 @@ async def create_container(name: str, language: SupportLanguage) -> bool: logger.info(f"Sandbox config:\n\t {create_args}") try: - returncode, _, stderr = await async_run_command(*create_args, timeout=10) - if returncode != 0: + return_code, _, stderr = await async_run_command(*create_args, timeout=10) + if return_code != 0: logger.error(f"❌ Container creation failed {name}: {stderr}") return False if language == SupportLanguage.NODEJS: copy_cmd = ["docker", "exec", name, "bash", "-c", "cp -a /app/node_modules /workspace/"] - returncode, _, stderr = await async_run_command(*copy_cmd, timeout=10) - if returncode != 0: + return_code, _, stderr = await async_run_command(*copy_cmd, timeout=10) + if return_code != 0: logger.error(f"❌ Failed to prepare dependencies for {name}: {stderr}") return False @@ -185,7 +185,7 @@ async def allocate_container_blocking(language: SupportLanguage, timeout=10) -> async def container_is_running(name: str) -> bool: """Asynchronously check the container status""" try: - returncode, stdout, _ = await async_run_command("docker", "inspect", "-f", "{{.State.Running}}", name, timeout=2) - return returncode == 0 and stdout.strip() == "true" + return_code, stdout, _ = await async_run_command("docker", "inspect", "-f", "{{.State.Running}}", name, timeout=2) + return return_code == 0 and stdout.strip() == "true" except Exception: return False