mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-05 10:58:34 +08:00
feat: Auto-adjust chunk recall weights based on user feedback (#12689)
### What problem does this PR solve? Implements automatic adjustment of knowledge base chunk recall weights based on user feedback (upvotes/downvotes). When users upvote or downvote a response, the system locates the corresponding knowledge snippets and adjusts their recall weight to improve future retrieval quality. **Closes #12670** **How it works:** 1. User upvotes/downvotes a response via `POST /thumbup` 2. System extracts chunk IDs from the conversation reference 3. For each referenced chunk: - Reads current `pagerank_fea` value from document store - Increments (+1) for upvote or decrements (-1) for downvote - Clamps weight to [0, 100] range - Updates chunk in ES/Infinity/OceanBase 4. Future retrievals score these chunks higher/lower based on accumulated feedback **Files changed:** - `api/db/services/chunk_feedback_service.py` - New service for updating chunk pagerank weights - `api/apps/conversation_app.py` - Integrated feedback service into thumbup endpoint - `test/testcases/test_web_api/test_chunk_feedback/` - Unit tests ### Type of change - [x] New Feature (non-breaking change which adds functionality) <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Chat message feedback now updates per-chunk relevance weights (feature-flag gated), with configurable weighting and atomic updates across storage backends. * **Bug Fixes** * Stricter validation for message feedback inputs and more robust handling of feedback transitions. * **Tests** * Expanded test coverage for chunk-feedback behavior, weighting strategies, storage backends, and thumb-flip scenarios. * **Chores** * CI workflow extended to run the new chunk-feedback web API tests. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: mkdev11 <YOUR_GITHUB_ID+MkDev11@users.noreply.github.com> Co-authored-by: mkdev11 <MkDev11@users.noreply.github.com>
This commit is contained in:
@@ -28,6 +28,7 @@ from api.db.joint_services.tenant_model_service import (
|
||||
get_model_config_by_type_and_name,
|
||||
get_tenant_default_model_by_type,
|
||||
)
|
||||
from api.db.services.chunk_feedback_service import ChunkFeedbackService
|
||||
from api.db.services.conversation_service import ConversationService, structure_answer
|
||||
from api.db.services.dialog_service import DialogService, async_ask, async_chat, gen_mindmap
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
@@ -769,28 +770,64 @@ async def delete_session_message(chat_id, session_id, msg_id):
|
||||
@manager.route("/chats/<chat_id>/sessions/<session_id>/messages/<msg_id>/feedback", methods=["PUT"]) # noqa: F821
|
||||
@login_required
|
||||
async def update_message_feedback(chat_id, session_id, msg_id):
|
||||
if not _ensure_owned_chat(chat_id):
|
||||
owned = _ensure_owned_chat(chat_id)
|
||||
if not owned:
|
||||
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
||||
try:
|
||||
req = await get_request_json()
|
||||
ok, conv = ConversationService.get_by_id(session_id)
|
||||
if not ok or conv.dialog_id != chat_id:
|
||||
return get_data_error_result(message="Session not found!")
|
||||
up_down = req.get("thumbup")
|
||||
thumb_raw = req.get("thumbup")
|
||||
if not isinstance(thumb_raw, bool):
|
||||
return get_data_error_result(message="thumbup must be a boolean")
|
||||
feedback = req.get("feedback", "")
|
||||
conv = conv.to_dict()
|
||||
for msg in conv["message"]:
|
||||
conv_dict = conv.to_dict()
|
||||
message_index = None
|
||||
apply_chunk_feedback = False
|
||||
prior_thumb = None
|
||||
for i, msg in enumerate(conv_dict["message"]):
|
||||
if msg_id == msg.get("id", "") and msg.get("role", "") == "assistant":
|
||||
if up_down:
|
||||
prior_thumb = msg.get("thumbup")
|
||||
if thumb_raw is True:
|
||||
msg["thumbup"] = True
|
||||
msg.pop("feedback", None)
|
||||
apply_chunk_feedback = prior_thumb is not True
|
||||
else:
|
||||
msg["thumbup"] = False
|
||||
if feedback:
|
||||
msg["feedback"] = feedback
|
||||
apply_chunk_feedback = prior_thumb is not False
|
||||
message_index = i
|
||||
break
|
||||
ConversationService.update_by_id(conv["id"], conv)
|
||||
return get_json_result(data=_build_session_response(conv))
|
||||
|
||||
if message_index is not None and apply_chunk_feedback:
|
||||
try:
|
||||
ref_index = (message_index - 1) // 2
|
||||
if 0 <= ref_index < len(conv_dict.get("reference", [])):
|
||||
reference = conv_dict["reference"][ref_index]
|
||||
if reference:
|
||||
if isinstance(prior_thumb, bool) and prior_thumb != thumb_raw:
|
||||
ChunkFeedbackService.apply_feedback(
|
||||
tenant_id=current_user.id,
|
||||
reference=reference,
|
||||
is_positive=not prior_thumb,
|
||||
)
|
||||
feedback_result = ChunkFeedbackService.apply_feedback(
|
||||
tenant_id=current_user.id,
|
||||
reference=reference,
|
||||
is_positive=thumb_raw is True,
|
||||
)
|
||||
logging.debug(
|
||||
"Chunk feedback applied: %s succeeded, %s failed",
|
||||
feedback_result["success_count"],
|
||||
feedback_result["fail_count"],
|
||||
)
|
||||
except Exception as e:
|
||||
logging.warning("Failed to apply chunk feedback: %s", e)
|
||||
|
||||
ConversationService.update_by_id(conv_dict["id"], conv_dict)
|
||||
return get_json_result(data=_build_session_response(conv_dict))
|
||||
except Exception as ex:
|
||||
return server_error_response(ex)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user