mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Refactor: Task Executor (#15154)
### What problem does this PR solve?
1. Break huge function into smaller pieces
2. Add unit test for the smaller pieces function
3. Layer-ed design
a. infra layer - task_context.py, recording_context.py,
write_operation_interceptor.py, ...
b. service layer - *_service.py
c. business layer - task_handler.py
4. Default behavior: use "refactor-ed version" - can switch to original
version by change env variable
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] Refactoring
- [x] Performance Improvement
---------
Co-authored-by: Liu An <asiro@qq.com>
Co-authored-by: Zhichang Yu <yuzhichang@gmail.com>
This commit is contained in:
@@ -246,7 +246,7 @@ class ForgettingPolicy(StrEnum):
|
||||
# ENV_TRACE_MALLOC_ENABLED = "TRACE_MALLOC_ENABLED"
|
||||
|
||||
PAGERANK_FLD = "pagerank_fea"
|
||||
SVR_QUEUE_NAME = "rag_flow_svr_queue"
|
||||
SVR_QUEUE_NAME = "te"
|
||||
SVR_CONSUMER_GROUP_NAME = "rag_flow_svr_task_broker"
|
||||
TAG_FLD = "tag_feas"
|
||||
|
||||
|
||||
@@ -13,7 +13,12 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import functools
|
||||
import inspect
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
|
||||
|
||||
def singleton(cls, *args, **kw):
|
||||
instances = {}
|
||||
@@ -24,4 +29,58 @@ def singleton(cls, *args, **kw):
|
||||
instances[key] = cls(*args, **kw)
|
||||
return instances[key]
|
||||
|
||||
return _singleton
|
||||
return _singleton
|
||||
|
||||
|
||||
def timing(func=None, *, name=None, context=None):
|
||||
"""Decorator that records function execution time.
|
||||
|
||||
Usage:
|
||||
@timing
|
||||
async def my_func(): ...
|
||||
|
||||
@timing(name="custom_name")
|
||||
def my_func(): ...
|
||||
|
||||
@timing(context=recording_ctx)
|
||||
async def my_func(): ...
|
||||
|
||||
Args:
|
||||
func: The function to decorate (auto-passed when used as @timing)
|
||||
name: Custom name for the timing record, defaults to function name
|
||||
context: A RecordingContext-like object to record timing data into.
|
||||
If not provided, will try to use global recording_context from task_executor.
|
||||
Timing data will be recorded as "{name}_time".
|
||||
"""
|
||||
if func is None:
|
||||
return functools.partial(timing, name=name, context=context)
|
||||
|
||||
func_name = name or func.__name__
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
if inspect.iscoroutinefunction(func):
|
||||
@functools.wraps(func)
|
||||
async def async_wrapper(*args, **kwargs):
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
result = await func(*args, **kwargs)
|
||||
return result
|
||||
finally:
|
||||
elapsed = time.perf_counter() - start
|
||||
log.debug(f"[TIMING] {func_name} took {elapsed:.3f}s")
|
||||
if context is not None:
|
||||
context.record(f"{func_name}_time", elapsed)
|
||||
return async_wrapper
|
||||
else:
|
||||
@functools.wraps(func)
|
||||
def sync_wrapper(*args, **kwargs):
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
result = func(*args, **kwargs)
|
||||
return result
|
||||
finally:
|
||||
elapsed = time.perf_counter() - start
|
||||
log.debug(f"[TIMING] {func_name} took {elapsed:.3f}s")
|
||||
if context is not None:
|
||||
context.record(f"{func_name}_time", elapsed)
|
||||
return sync_wrapper
|
||||
@@ -133,13 +133,30 @@ PARALLEL_DEVICES: int = 0
|
||||
STORAGE_IMPL_TYPE = os.getenv('STORAGE_IMPL', 'MINIO')
|
||||
STORAGE_IMPL = None
|
||||
|
||||
def get_svr_queue_name(priority: int) -> str:
|
||||
if priority == 0:
|
||||
return SVR_QUEUE_NAME
|
||||
return f"{SVR_QUEUE_NAME}_{priority}"
|
||||
def get_svr_queue_name(priority: int, suffix: str = "common") -> str:
|
||||
"""
|
||||
Generate queue name with two dimensions: priority and suffix.
|
||||
|
||||
Args:
|
||||
priority: Task priority (0=low, 1=high)
|
||||
suffix: Task type suffix (common/resume/graphrag/raptor/mindmap)
|
||||
Currently only "common" is used, other suffixes are reserved.
|
||||
|
||||
Returns:
|
||||
Queue name string
|
||||
|
||||
Examples:
|
||||
get_svr_queue_name(0, "common") -> "te.0.common"
|
||||
get_svr_queue_name(1, "common") -> "te.1.common"
|
||||
get_svr_queue_name(0) -> "te.0.common" # default suffix="common"
|
||||
|
||||
def get_svr_queue_names():
|
||||
return [get_svr_queue_name(priority) for priority in [1, 0]]
|
||||
"""
|
||||
return f"{SVR_QUEUE_NAME}.{priority}.common"
|
||||
|
||||
|
||||
def get_svr_queue_names(suffix:str):
|
||||
"""Return queue names sorted by priority (high to low)."""
|
||||
return [get_svr_queue_name(priority, suffix) for priority in [1, 0]]
|
||||
|
||||
def init_secret_key():
|
||||
secret_key = os.environ.get("RAGFLOW_SECRET_KEY")
|
||||
|
||||
Reference in New Issue
Block a user