Refactor: Task Executor (#15154)

### What problem does this PR solve?

1. Break huge function into smaller pieces
2. Add unit test for the smaller pieces function
3. Layer-ed design
a. infra layer - task_context.py, recording_context.py,
write_operation_interceptor.py, ...
    b. service layer - *_service.py
    c. business layer - task_handler.py
4. Default behavior: use "refactor-ed version" - can switch to original
version by change env variable

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] Refactoring
- [x] Performance Improvement

---------

Co-authored-by: Liu An <asiro@qq.com>
Co-authored-by: Zhichang Yu <yuzhichang@gmail.com>
This commit is contained in:
Jack
2026-05-27 21:54:17 +08:00
committed by GitHub
parent 0071e98c11
commit f0cb7a544b
55 changed files with 12707 additions and 465 deletions

View File

@@ -246,7 +246,7 @@ class ForgettingPolicy(StrEnum):
# ENV_TRACE_MALLOC_ENABLED = "TRACE_MALLOC_ENABLED"
PAGERANK_FLD = "pagerank_fea"
SVR_QUEUE_NAME = "rag_flow_svr_queue"
SVR_QUEUE_NAME = "te"
SVR_CONSUMER_GROUP_NAME = "rag_flow_svr_task_broker"
TAG_FLD = "tag_feas"

View File

@@ -13,7 +13,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import functools
import inspect
import logging
import os
import time
def singleton(cls, *args, **kw):
instances = {}
@@ -24,4 +29,58 @@ def singleton(cls, *args, **kw):
instances[key] = cls(*args, **kw)
return instances[key]
return _singleton
return _singleton
def timing(func=None, *, name=None, context=None):
"""Decorator that records function execution time.
Usage:
@timing
async def my_func(): ...
@timing(name="custom_name")
def my_func(): ...
@timing(context=recording_ctx)
async def my_func(): ...
Args:
func: The function to decorate (auto-passed when used as @timing)
name: Custom name for the timing record, defaults to function name
context: A RecordingContext-like object to record timing data into.
If not provided, will try to use global recording_context from task_executor.
Timing data will be recorded as "{name}_time".
"""
if func is None:
return functools.partial(timing, name=name, context=context)
func_name = name or func.__name__
log = logging.getLogger(__name__)
if inspect.iscoroutinefunction(func):
@functools.wraps(func)
async def async_wrapper(*args, **kwargs):
start = time.perf_counter()
try:
result = await func(*args, **kwargs)
return result
finally:
elapsed = time.perf_counter() - start
log.debug(f"[TIMING] {func_name} took {elapsed:.3f}s")
if context is not None:
context.record(f"{func_name}_time", elapsed)
return async_wrapper
else:
@functools.wraps(func)
def sync_wrapper(*args, **kwargs):
start = time.perf_counter()
try:
result = func(*args, **kwargs)
return result
finally:
elapsed = time.perf_counter() - start
log.debug(f"[TIMING] {func_name} took {elapsed:.3f}s")
if context is not None:
context.record(f"{func_name}_time", elapsed)
return sync_wrapper

View File

@@ -133,13 +133,30 @@ PARALLEL_DEVICES: int = 0
STORAGE_IMPL_TYPE = os.getenv('STORAGE_IMPL', 'MINIO')
STORAGE_IMPL = None
def get_svr_queue_name(priority: int) -> str:
if priority == 0:
return SVR_QUEUE_NAME
return f"{SVR_QUEUE_NAME}_{priority}"
def get_svr_queue_name(priority: int, suffix: str = "common") -> str:
"""
Generate queue name with two dimensions: priority and suffix.
Args:
priority: Task priority (0=low, 1=high)
suffix: Task type suffix (common/resume/graphrag/raptor/mindmap)
Currently only "common" is used, other suffixes are reserved.
Returns:
Queue name string
Examples:
get_svr_queue_name(0, "common") -> "te.0.common"
get_svr_queue_name(1, "common") -> "te.1.common"
get_svr_queue_name(0) -> "te.0.common" # default suffix="common"
def get_svr_queue_names():
return [get_svr_queue_name(priority) for priority in [1, 0]]
"""
return f"{SVR_QUEUE_NAME}.{priority}.common"
def get_svr_queue_names(suffix:str):
"""Return queue names sorted by priority (high to low)."""
return [get_svr_queue_name(priority, suffix) for priority in [1, 0]]
def init_secret_key():
secret_key = os.environ.get("RAGFLOW_SECRET_KEY")