mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-04 18:45:38 +08:00
### What problem does this PR solve? Addresses event-loop blocking under high concurrency reported in #13825. When multiple requests hit the API simultaneously, synchronous DB/Redis calls block the async event loop, preventing Quart from handling other requests and causing cascading 502/504 timeouts. This PR wraps all remaining blocking DB/Redis calls in `canvas_app.py`, `chat_api.py`, `session.py`, and `canvas_service.py` with `await thread_pool_exec()` - Offload all synchronous `Service.*`, `REDIS_CONN.*`, and `APIToken.query` calls to the thread pool - Convert sync endpoint handlers (`list_chats`, `get_chat`, `templates`, `sessions`, etc.) to `async def` - Convert sync helper functions (`_ensure_owned_chat`, `_validate_llm_id`, `_validate_dataset_ids`, etc.) to async - no duplicate sync/async pairs - Wrap `CanvasReplicaService` Redis IO calls (`bootstrap`, `replace_for_set`, `commit_after_run`) - Use `asyncio.gather()` for concurrent file uploads and chat response building **Note:** This fixes the code-level event-loop blocking, which is a prerequisite for handling concurrent requests. For the full "30 concurrent requests without 502/504" goal described in the issue, users should also tune deployment config: - `WS=4` or higher (HTTP worker processes, default 1) - `MAX_CONCURRENT_CHATS=50` (default 10) - `SANDBOX_EXECUTOR_MANAGER_POOL_SIZE` for workflow-heavy workloads ### Performance verification Reviewer asked for a before-vs-after comparison ([comment](https://github.com/infiniflow/ragflow/pull/13941#issuecomment-4393667231)). I built a self-contained microbenchmark that reproduces the exact failure mode this PR targets: an async handler that performs blocking DB/Redis-style calls (50 ms each, 3 per request, 30 concurrent requests) is run twice — once with the pre-PR pattern (sync call directly inside the async handler) and once with the post-PR pattern (`await thread_pool_exec(...)`). The benchmark imports nothing from RAGFlow except `thread_pool_exec` itself, so it is hermetic and reproducible (`THREAD_POOL_MAX_WORKERS=128`, Python 3.13.12). **Throughput — wall-clock for 30 concurrent requests (lower is better)** | flavour | wall(s) | p50(s) | p95(s) | max(s) | |---|---:|---:|---:|---:| | before | 4.986 | 0.158 | 0.207 | 0.269 | | after | 0.248 | 0.181 | 0.230 | 0.231 | The pre-PR handler serializes the entire load on the event-loop thread, so 30 × 3 × 50 ms ≈ 4.5 s shows up as the wall time. The post-PR handler parallelizes the blocking work across the thread pool and finishes the same load in 248 ms — a **~20× speedup** on this workload. **Event-loop responsiveness — latency of an unrelated probe coroutine while the 30 slow requests are running (lower is better)** | flavour | samples | probe p50 (ms) | probe p95 (ms) | probe max (ms) | |---|---:|---:|---:|---:| | before | 1 | 5442.26 | 5442.26 | 5442.26 | | after | 28 | 0.88 | 11.53 | 98.02 | This is the metric that maps directly to "the API still answers other requests while one is busy". A 5 ms-interval probe was scheduled while the 30 slow handlers ran. With the pre-PR code the event loop was frozen for the entire duration of the blocking work, so only one probe sample was ever picked up and it waited **5,442 ms**. After the PR, 28 probe samples landed with **p50 0.88 ms / p95 11.53 ms**, meaning unrelated requests are no longer starved by the slow ones. That is the regression mode behind the cascading 502/504s reported in #13825. <details> <summary>Raw benchmark output</summary> ``` config: 30 concurrent requests, 3 blocking calls of 50ms each per request, THREAD_POOL_MAX_WORKERS=128 === Throughput (lower wall is better) === flavour wall(s) p50(s) p95(s) max(s) before 4.986 0.158 0.207 0.269 after 0.248 0.181 0.230 0.231 === Event-loop responsiveness (lower probe latency is better) === flavour samples probe p50(ms) probe p95(ms) probe max(ms) before 1 5442.26 5442.26 5442.26 after 28 0.88 11.53 98.02 ``` </details> The benchmark script is included as a comment on the PR for reproducibility. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] Performance Improvement Closes [#13825](https://github.com/infiniflow/ragflow/issues/13825) --------- Co-authored-by: tmimmanuel <tmimmanuel@users.noreply.github.com> Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
407 lines
15 KiB
Python
407 lines
15 KiB
Python
#
|
|
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
import json
|
|
import logging
|
|
import time
|
|
from uuid import uuid4
|
|
from agent.canvas import Canvas
|
|
from api.db import CanvasCategory, TenantPermission
|
|
from api.db.db_models import DB, CanvasTemplate, User, UserCanvas, API4Conversation, UserCanvasVersion
|
|
from api.db.services.api_service import API4ConversationService
|
|
from api.db.services.common_service import CommonService
|
|
from api.db.services.user_canvas_version import UserCanvasVersionService
|
|
from common.misc_utils import get_uuid, thread_pool_exec
|
|
from api.utils.api_utils import get_data_openai
|
|
import tiktoken
|
|
from peewee import fn
|
|
|
|
|
|
class CanvasTemplateService(CommonService):
|
|
model = CanvasTemplate
|
|
|
|
class DataFlowTemplateService(CommonService):
|
|
"""
|
|
Alias of CanvasTemplateService
|
|
"""
|
|
model = CanvasTemplate
|
|
|
|
|
|
class UserCanvasService(CommonService):
|
|
model = UserCanvas
|
|
|
|
@classmethod
|
|
@DB.connection_context()
|
|
def get_list(cls, tenant_id,
|
|
page_number, items_per_page, orderby, desc, id, title, canvas_category=CanvasCategory.Agent):
|
|
agents = cls.model.select()
|
|
if id:
|
|
agents = agents.where(cls.model.id == id)
|
|
if title:
|
|
agents = agents.where(cls.model.title == title)
|
|
agents = agents.where(cls.model.user_id == tenant_id)
|
|
agents = agents.where(cls.model.canvas_category == canvas_category)
|
|
if desc:
|
|
agents = agents.order_by(cls.model.getter_by(orderby).desc())
|
|
else:
|
|
agents = agents.order_by(cls.model.getter_by(orderby).asc())
|
|
|
|
agents = agents.paginate(page_number, items_per_page)
|
|
|
|
return list(agents.dicts())
|
|
|
|
@classmethod
|
|
@DB.connection_context()
|
|
def get_all_agents_by_tenant_ids(cls, tenant_ids, user_id):
|
|
# will get all permitted agents, be cautious
|
|
fields = [
|
|
cls.model.id,
|
|
cls.model.avatar,
|
|
cls.model.title,
|
|
cls.model.permission,
|
|
cls.model.canvas_type,
|
|
cls.model.canvas_category
|
|
]
|
|
# find team agents and owned agents
|
|
agents = cls.model.select(*fields).where(
|
|
(cls.model.user_id.in_(tenant_ids) & (cls.model.permission == TenantPermission.TEAM.value)) | (
|
|
cls.model.user_id == user_id
|
|
)
|
|
)
|
|
# sort by create_time, asc
|
|
agents.order_by(cls.model.create_time.asc())
|
|
# maybe cause slow query by deep paginate, optimize later
|
|
offset, limit = 0, 50
|
|
res = []
|
|
while True:
|
|
ag_batch = agents.offset(offset).limit(limit)
|
|
_temp = list(ag_batch.dicts())
|
|
if not _temp:
|
|
break
|
|
res.extend(_temp)
|
|
offset += limit
|
|
return res
|
|
|
|
@classmethod
|
|
@DB.connection_context()
|
|
def get_by_canvas_id(cls, pid):
|
|
try:
|
|
|
|
fields = [
|
|
cls.model.id,
|
|
cls.model.avatar,
|
|
cls.model.title,
|
|
cls.model.dsl,
|
|
cls.model.description,
|
|
cls.model.permission,
|
|
cls.model.update_time,
|
|
cls.model.user_id,
|
|
cls.model.create_time,
|
|
cls.model.create_date,
|
|
cls.model.update_date,
|
|
cls.model.canvas_category,
|
|
User.nickname,
|
|
User.avatar.alias('tenant_avatar'),
|
|
]
|
|
agents = cls.model.select(*fields) \
|
|
.join(User, on=(cls.model.user_id == User.id)) \
|
|
.where(cls.model.id == pid)
|
|
# obj = cls.model.query(id=pid)[0]
|
|
return True, agents.dicts()[0]
|
|
except Exception as e:
|
|
logging.exception(e)
|
|
return False, None
|
|
|
|
@classmethod
|
|
@DB.connection_context()
|
|
def get_basic_info_by_canvas_ids(cls, canvas_id):
|
|
fields = [
|
|
cls.model.id,
|
|
cls.model.avatar,
|
|
cls.model.user_id,
|
|
cls.model.title,
|
|
cls.model.permission,
|
|
cls.model.canvas_category
|
|
]
|
|
return cls.model.select(*fields).where(cls.model.id.in_(canvas_id)).dicts()
|
|
|
|
@classmethod
|
|
@DB.connection_context()
|
|
def get_by_tenant_ids(
|
|
cls,
|
|
joined_tenant_ids,
|
|
user_id,
|
|
page_number,
|
|
items_per_page,
|
|
orderby,
|
|
desc,
|
|
keywords,
|
|
canvas_category=None,
|
|
):
|
|
fields = [
|
|
cls.model.id,
|
|
cls.model.avatar,
|
|
cls.model.title,
|
|
cls.model.description,
|
|
cls.model.permission,
|
|
cls.model.user_id.alias("tenant_id"),
|
|
User.nickname,
|
|
User.avatar.alias('tenant_avatar'),
|
|
cls.model.update_time,
|
|
cls.model.canvas_category,
|
|
]
|
|
if keywords:
|
|
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
|
|
(((cls.model.user_id.in_(joined_tenant_ids)) & (cls.model.permission == TenantPermission.TEAM.value)) | (cls.model.user_id == user_id)),
|
|
(fn.LOWER(cls.model.title).contains(keywords.lower()))
|
|
)
|
|
else:
|
|
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
|
|
(((cls.model.user_id.in_(joined_tenant_ids)) & (cls.model.permission == TenantPermission.TEAM.value)) | (cls.model.user_id == user_id))
|
|
)
|
|
if canvas_category:
|
|
agents = agents.where(cls.model.canvas_category == canvas_category)
|
|
if desc:
|
|
agents = agents.order_by(cls.model.getter_by(orderby).desc())
|
|
else:
|
|
agents = agents.order_by(cls.model.getter_by(orderby).asc())
|
|
|
|
count = agents.count()
|
|
if page_number and items_per_page:
|
|
agents = agents.paginate(page_number, items_per_page)
|
|
|
|
agents_list = list(agents.dicts())
|
|
|
|
# Get latest release time for each canvas
|
|
if agents_list:
|
|
canvas_ids = [a['id'] for a in agents_list]
|
|
release_times = (
|
|
UserCanvasVersion.select(UserCanvasVersion.user_canvas_id, fn.MAX(UserCanvasVersion.create_time).alias("release_time"))
|
|
.where((UserCanvasVersion.user_canvas_id.in_(canvas_ids)) & (UserCanvasVersion.release))
|
|
.group_by(UserCanvasVersion.user_canvas_id)
|
|
)
|
|
release_time_map = {r.user_canvas_id: r.release_time for r in release_times}
|
|
|
|
for agent in agents_list:
|
|
agent['release_time'] = release_time_map.get(agent['id'])
|
|
|
|
return agents_list, count
|
|
|
|
@classmethod
|
|
@DB.connection_context()
|
|
def accessible(cls, canvas_id, tenant_id):
|
|
from api.db.services.user_service import UserTenantService
|
|
e, c = UserCanvasService.get_by_canvas_id(canvas_id)
|
|
if not e:
|
|
return False
|
|
|
|
tids = [t.tenant_id for t in UserTenantService.query(user_id=tenant_id)]
|
|
if c["user_id"] == tenant_id:
|
|
return True
|
|
if c["user_id"] not in tids:
|
|
return False
|
|
if c["permission"] != TenantPermission.TEAM.value:
|
|
return False
|
|
return True
|
|
|
|
@classmethod
|
|
def get_agent_dsl_with_release(cls, agent_id, release_mode=False, tenant_id=None):
|
|
e, cvs = cls.get_by_id(agent_id)
|
|
if not e:
|
|
raise LookupError("Agent not found.")
|
|
|
|
if release_mode:
|
|
released_version = UserCanvasVersionService.get_latest_released(agent_id)
|
|
if not released_version:
|
|
raise PermissionError("No available published version")
|
|
dsl = released_version.dsl
|
|
else:
|
|
dsl = cvs.dsl
|
|
|
|
if not isinstance(dsl, str):
|
|
dsl = json.dumps(dsl, ensure_ascii=False)
|
|
|
|
return cvs, dsl
|
|
|
|
|
|
async def completion(tenant_id, agent_id, session_id=None, **kwargs):
|
|
query = kwargs.get("query", "") or kwargs.get("question", "")
|
|
files = kwargs.get("files", [])
|
|
inputs = kwargs.get("inputs", {})
|
|
user_id = kwargs.get("user_id", "")
|
|
custom_header = kwargs.get("custom_header", "")
|
|
release_mode = str(kwargs.get("release", "")).strip().lower()
|
|
|
|
if session_id:
|
|
e, conv = await thread_pool_exec(API4ConversationService.get_by_id, session_id)
|
|
if not e:
|
|
raise LookupError("Session not found!")
|
|
if not conv.message:
|
|
conv.message = []
|
|
if not isinstance(conv.dsl, str):
|
|
conv.dsl = json.dumps(conv.dsl, ensure_ascii=False)
|
|
canvas = Canvas(conv.dsl, tenant_id, agent_id, canvas_id=agent_id, custom_header=custom_header)
|
|
else:
|
|
cvs, dsl = await thread_pool_exec(UserCanvasService.get_agent_dsl_with_release, agent_id, release_mode=release_mode == "true", tenant_id=tenant_id)
|
|
|
|
session_id = get_uuid()
|
|
canvas = Canvas(dsl, tenant_id, agent_id, canvas_id=cvs.id, custom_header=custom_header)
|
|
canvas.reset()
|
|
# Get the version title based on release_mode
|
|
version_title = await thread_pool_exec(UserCanvasVersionService.get_latest_version_title, cvs.id, release_mode=release_mode == "true")
|
|
conv = {"id": session_id, "dialog_id": cvs.id, "user_id": user_id, "message": [], "source": "agent", "dsl": dsl, "reference": [], "version_title": version_title}
|
|
await thread_pool_exec(API4ConversationService.save, **conv)
|
|
conv = API4Conversation(**conv)
|
|
|
|
message_id = str(uuid4())
|
|
conv.message.append({
|
|
"role": "user",
|
|
"content": query,
|
|
"id": message_id,
|
|
"files": files
|
|
})
|
|
txt = ""
|
|
async for ans in canvas.run(query=query, files=files, user_id=user_id, inputs=inputs):
|
|
ans["session_id"] = session_id
|
|
if ans["event"] == "message":
|
|
txt += ans["data"]["content"]
|
|
if ans["data"].get("start_to_think", False):
|
|
txt += "<think>"
|
|
elif ans["data"].get("end_to_think", False):
|
|
txt += "</think>"
|
|
yield "data:" + json.dumps(ans, ensure_ascii=False) + "\n\n"
|
|
|
|
conv.message.append({"role": "assistant", "content": txt, "created_at": time.time(), "id": message_id})
|
|
conv.reference = canvas.get_reference()
|
|
conv.errors = canvas.error
|
|
conv.dsl = str(canvas)
|
|
conv = conv.to_dict()
|
|
await thread_pool_exec(API4ConversationService.append_message, conv["id"], conv)
|
|
|
|
|
|
async def completion_openai(tenant_id, agent_id, question, session_id=None, stream=True, **kwargs):
|
|
tiktoken_encoder = tiktoken.get_encoding("cl100k_base")
|
|
prompt_tokens = len(tiktoken_encoder.encode(str(question)))
|
|
user_id = kwargs.get("user_id", "")
|
|
|
|
if stream:
|
|
completion_tokens = 0
|
|
try:
|
|
async for ans in completion(
|
|
tenant_id=tenant_id,
|
|
agent_id=agent_id,
|
|
session_id=session_id,
|
|
query=question,
|
|
user_id=user_id,
|
|
**kwargs
|
|
):
|
|
if isinstance(ans, str):
|
|
try:
|
|
ans = json.loads(ans[5:]) # remove "data:"
|
|
except Exception as e:
|
|
logging.exception(f"Agent OpenAI-Compatible completion_openai parse answer failed: {e}")
|
|
continue
|
|
if ans.get("event") not in ["message", "message_end"]:
|
|
continue
|
|
|
|
content_piece = ""
|
|
if ans["event"] == "message":
|
|
content_piece = ans["data"]["content"]
|
|
|
|
completion_tokens += len(tiktoken_encoder.encode(content_piece))
|
|
|
|
openai_data = get_data_openai(
|
|
id=session_id or str(uuid4()),
|
|
model=agent_id,
|
|
content=content_piece,
|
|
prompt_tokens=prompt_tokens,
|
|
completion_tokens=completion_tokens,
|
|
stream=True
|
|
)
|
|
|
|
if ans.get("data", {}).get("reference", None):
|
|
openai_data["choices"][0]["delta"]["reference"] = ans["data"]["reference"]
|
|
|
|
yield "data: " + json.dumps(openai_data, ensure_ascii=False) + "\n\n"
|
|
|
|
yield "data: [DONE]\n\n"
|
|
|
|
except Exception as e:
|
|
logging.exception(e)
|
|
yield "data: " + json.dumps(
|
|
get_data_openai(
|
|
id=session_id or str(uuid4()),
|
|
model=agent_id,
|
|
content=f"**ERROR**: {str(e)}",
|
|
finish_reason="stop",
|
|
prompt_tokens=prompt_tokens,
|
|
completion_tokens=len(tiktoken_encoder.encode(f"**ERROR**: {str(e)}")),
|
|
stream=True
|
|
),
|
|
ensure_ascii=False
|
|
) + "\n\n"
|
|
yield "data: [DONE]\n\n"
|
|
|
|
else:
|
|
try:
|
|
all_content = ""
|
|
reference = {}
|
|
async for ans in completion(
|
|
tenant_id=tenant_id,
|
|
agent_id=agent_id,
|
|
session_id=session_id,
|
|
query=question,
|
|
user_id=user_id,
|
|
**kwargs
|
|
):
|
|
if isinstance(ans, str):
|
|
ans = json.loads(ans[5:])
|
|
if ans.get("event") not in ["message", "message_end"]:
|
|
continue
|
|
|
|
if ans["event"] == "message":
|
|
all_content += ans["data"]["content"]
|
|
|
|
if ans.get("data", {}).get("reference", None):
|
|
reference.update(ans["data"]["reference"])
|
|
|
|
completion_tokens = len(tiktoken_encoder.encode(all_content))
|
|
|
|
openai_data = get_data_openai(
|
|
id=session_id or str(uuid4()),
|
|
model=agent_id,
|
|
prompt_tokens=prompt_tokens,
|
|
completion_tokens=completion_tokens,
|
|
content=all_content,
|
|
finish_reason="stop",
|
|
param=None
|
|
)
|
|
|
|
if reference:
|
|
openai_data["choices"][0]["message"]["reference"] = reference
|
|
|
|
yield openai_data
|
|
except Exception as e:
|
|
logging.exception(e)
|
|
yield get_data_openai(
|
|
id=session_id or str(uuid4()),
|
|
model=agent_id,
|
|
prompt_tokens=prompt_tokens,
|
|
completion_tokens=len(tiktoken_encoder.encode(f"**ERROR**: {str(e)}")),
|
|
content=f"**ERROR**: {str(e)}",
|
|
finish_reason="stop",
|
|
param=None
|
|
)
|