mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Refa: GraphRAG to use async chat methods instead of thread pool execution (#14002)
### What problem does this PR solve? GraphRAG _async_chat. ### Type of change - [x] Refactoring - [x] Performance Improvement <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Refactor** * Unified chat calls to an async invocation across extractors, improving timeout handling and ensuring task IDs propagate reliably. * **Tests** * Added and expanded unit tests and mocks to cover extractor behavior, timeout scenarios, and safe test-package imports, reducing regression risk. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -28,6 +28,10 @@ _modules_to_mock = [
|
||||
"common.settings",
|
||||
"common.doc_store",
|
||||
"common.doc_store.doc_store_base",
|
||||
"api.db.services",
|
||||
"api.db.services.task_service",
|
||||
"rag.graphrag.general.leiden",
|
||||
"rag.llm.chat_model",
|
||||
"rag.nlp",
|
||||
"rag.nlp.search",
|
||||
"rag.nlp.rag_tokenizer",
|
||||
@@ -40,3 +44,7 @@ for mod_name in _modules_to_mock:
|
||||
|
||||
# Ensure `from common.connection_utils import timeout` returns a no-op decorator
|
||||
sys.modules["common.connection_utils"].timeout = lambda *a, **kw: (lambda fn: fn)
|
||||
sys.modules["api.db.services.task_service"].has_canceled = lambda *_a, **_kw: False
|
||||
sys.modules["rag.graphrag.general.leiden"].run = lambda *_a, **_kw: {}
|
||||
sys.modules["rag.graphrag.general.leiden"].add_community_info2graph = lambda *_a, **_kw: None
|
||||
sys.modules["rag.llm.chat_model"].Base = object
|
||||
|
||||
96
test/unit_test/rag/graphrag/test_graphrag_extractors.py
Normal file
96
test/unit_test/rag/graphrag/test_graphrag_extractors.py
Normal file
@@ -0,0 +1,96 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import asyncio
|
||||
from types import SimpleNamespace
|
||||
|
||||
import networkx as nx
|
||||
import pytest
|
||||
|
||||
import rag.graphrag.general.community_reports_extractor as community_reports_module
|
||||
from rag.graphrag.general.community_reports_extractor import CommunityReportsExtractor
|
||||
from rag.graphrag.general.graph_extractor import GraphExtractor
|
||||
|
||||
|
||||
def _build_llm_stub():
|
||||
return SimpleNamespace(llm_name="test-llm", max_length=4096)
|
||||
|
||||
|
||||
class TestGraphExtractor:
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.asyncio
|
||||
async def test_process_single_content_passes_task_id_to_gleaning_calls(self, monkeypatch):
|
||||
extractor = GraphExtractor(_build_llm_stub(), entity_types=["person"])
|
||||
extractor.callback = None
|
||||
seen_task_ids = []
|
||||
responses = iter(["seed-response", "glean-response", "N"])
|
||||
|
||||
async def fake_async_chat(_system, _history, _gen_conf=None, task_id=""):
|
||||
seen_task_ids.append(task_id)
|
||||
return next(responses)
|
||||
|
||||
monkeypatch.setattr(extractor, "_async_chat", fake_async_chat)
|
||||
monkeypatch.setattr(extractor, "_entities_and_relations", lambda *_args, **_kwargs: ({}, {}))
|
||||
|
||||
out_results = []
|
||||
await extractor._process_single_content(("chunk-1", "alpha beta"), 0, 1, out_results, task_id="task-123")
|
||||
|
||||
assert seen_task_ids == ["task-123", "task-123", "task-123"]
|
||||
|
||||
|
||||
class TestCommunityReportsExtractor:
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.asyncio
|
||||
async def test_call_does_not_use_outer_timeout_shorter_than_llm_timeout(self, monkeypatch):
|
||||
extractor = CommunityReportsExtractor(_build_llm_stub())
|
||||
graph = nx.Graph()
|
||||
graph.add_node("A", description="alpha")
|
||||
graph.add_node("B", description="beta")
|
||||
graph.add_edge("A", "B", description="related")
|
||||
|
||||
monkeypatch.setenv("ENABLE_TIMEOUT_ASSERTION", "1")
|
||||
|
||||
original_wait_for = asyncio.wait_for
|
||||
|
||||
def fake_timeout(_seconds, _attempts=2, **_kwargs):
|
||||
def decorator(fn):
|
||||
async def wrapper(*args, **kwargs):
|
||||
return await original_wait_for(fn(*args, **kwargs), timeout=0.01)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
async def slow_async_chat(*_args, **_kwargs):
|
||||
await asyncio.sleep(0.02)
|
||||
return (
|
||||
'{"title":"Community","summary":"Summary","findings":[],'
|
||||
'"rating":1.0,"rating_explanation":"Clear"}'
|
||||
)
|
||||
|
||||
monkeypatch.setattr(community_reports_module, "timeout", fake_timeout, raising=False)
|
||||
monkeypatch.setattr(
|
||||
community_reports_module.leiden,
|
||||
"run",
|
||||
lambda *_args, **_kwargs: {0: {"0": {"weight": 1.0, "nodes": ["A", "B"]}}},
|
||||
)
|
||||
monkeypatch.setattr(community_reports_module, "add_community_info2graph", lambda *_args, **_kwargs: None)
|
||||
monkeypatch.setattr(extractor, "_async_chat", slow_async_chat)
|
||||
|
||||
result = await extractor(graph)
|
||||
|
||||
assert len(result.structured_output) == 1
|
||||
assert result.structured_output[0]["title"] == "Community"
|
||||
21
test/unit_test/test_test_chunk_feedback_package.py
Normal file
21
test/unit_test/test_test_chunk_feedback_package.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import importlib
|
||||
import sys
|
||||
from types import ModuleType
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.p2
|
||||
|
||||
|
||||
def test_chunk_feedback_package_import_is_safe_when_common_is_shadowed(monkeypatch):
|
||||
shadow_common = ModuleType("common")
|
||||
monkeypatch.setitem(sys.modules, "common", shadow_common)
|
||||
monkeypatch.delitem(
|
||||
sys.modules,
|
||||
"test.testcases.test_web_api.test_chunk_feedback",
|
||||
raising=False,
|
||||
)
|
||||
|
||||
module = importlib.import_module("test.testcases.test_web_api.test_chunk_feedback")
|
||||
|
||||
assert module is not None
|
||||
Reference in New Issue
Block a user