mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-03 01:01:56 +08:00
QueryRewrite prompt builder and response parser. Zero external dependencies. ### Functions - `BuildQueryRewritePrompt`: Renders `minirag_query2kwd` prompt with query and type pool - `ParseQueryRewriteResponse`: Parses LLM JSON response with fallback for markdown and extra text ### Testing ``` === RUN TestBuildQueryRewritePrompt --- PASS === RUN TestParseQueryRewriteResponse_ValidJSON --- PASS === RUN TestParseQueryRewriteResponse_MarkdownBlock --- PASS === RUN TestParseQueryRewriteResponse_ExtraText --- PASS === RUN TestParseQueryRewriteResponse_Invalid --- PASS === RUN TestParseQueryRewriteResponse_EmptyEntities --- PASS ``` Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
173 lines
5.8 KiB
Go
173 lines
5.8 KiB
Go
//
|
|
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
package common
|
|
|
|
import (
|
|
"encoding/json"
|
|
"strings"
|
|
)
|
|
|
|
// QueryRewriteResult holds the parsed result of a query rewrite.
|
|
type QueryRewriteResult struct {
|
|
TypeKeywords []string `json:"answer_type_keywords"`
|
|
Entities []string `json:"entities_from_query"`
|
|
}
|
|
|
|
// queryRewritePromptTmpl is the system prompt template for query rewriting.
|
|
// Matches Python: rag/graphrag/query_analyze_prompt.py::PROMPTS["minirag_query2kwd"]
|
|
const queryRewritePromptTmpl = `---Role---
|
|
|
|
You are a helpful assistant tasked with identifying both answer-type and low-level keywords in the user's query.
|
|
|
|
---Goal---
|
|
|
|
Given the query, list both answer-type and low-level keywords.
|
|
answer_type_keywords focus on the type of the answer to the certain query, while low-level keywords focus on specific entities, details, or concrete terms.
|
|
The answer_type_keywords must be selected from Answer type pool.
|
|
This pool is in the form of a dictionary, where the key represents the Type you should choose from and the value represents the example samples.
|
|
|
|
---Instructions---
|
|
|
|
- Output the keywords in JSON format.
|
|
- The JSON should have three keys:
|
|
- "answer_type_keywords" for the types of the answer. In this list, the types with the highest likelihood should be placed at the forefront. No more than 3.
|
|
- "entities_from_query" for specific entities or details. It must be extracted from the query.
|
|
######################
|
|
-Examples-
|
|
######################
|
|
Example 1:
|
|
|
|
Query: "How does international trade influence global economic stability?"
|
|
Answer type pool: {
|
|
'PERSONAL LIFE': ['FAMILY TIME', 'HOME MAINTENANCE'],
|
|
'STRATEGY': ['MARKETING PLAN', 'BUSINESS EXPANSION'],
|
|
'SERVICE FACILITATION': ['ONLINE SUPPORT', 'CUSTOMER SERVICE TRAINING'],
|
|
'PERSON': ['JANE DOE', 'JOHN SMITH'],
|
|
'FOOD': ['PASTA', 'SUSHI'],
|
|
'EMOTION': ['HAPPINESS', 'ANGER'],
|
|
'PERSONAL EXPERIENCE': ['TRAVEL ABROAD', 'STUDYING ABROAD'],
|
|
'INTERACTION': ['TEAM MEETING', 'NETWORKING EVENT'],
|
|
'BEVERAGE': ['COFFEE', 'TEA'],
|
|
'PLAN': ['ANNUAL BUDGET', 'PROJECT TIMELINE'],
|
|
'GEO': ['NEW YORK CITY', 'SOUTH AFRICA'],
|
|
'GEAR': ['CAMPING TENT', 'CYCLING HELMET'],
|
|
'EMOJI': ['🎉', '🚀'],
|
|
'BEHAVIOR': ['POSITIVE FEEDBACK', 'NEGATIVE CRITICISM'],
|
|
'TONE': ['FORMAL', 'INFORMAL'],
|
|
'LOCATION': ['DOWNTOWN', 'SUBURBS']
|
|
}}
|
|
################
|
|
Output:
|
|
{
|
|
"answer_type_keywords": ["STRATEGY","PERSONAL LIFE"],
|
|
"entities_from_query": ["Trade agreements", "Tariffs", "Currency exchange", "Imports", "Exports"]
|
|
}
|
|
#############################
|
|
Example 2:
|
|
|
|
Query: "Where is the capital of the United States?"
|
|
Answer type pool: {
|
|
'ORGANIZATION': ['GREENPEACE', 'RED CROSS'],
|
|
'PERSONAL LIFE': ['DAILY WORKOUT', 'HOME COOKING'],
|
|
'STRATEGY': ['FINANCIAL INVESTMENT', 'BUSINESS EXPANSION'],
|
|
'SERVICE FACILITATION': ['ONLINE SUPPORT', 'CUSTOMER SERVICE TRAINING'],
|
|
'PERSON': ['ALBERTA SMITH', 'BENJAMIN JONES'],
|
|
'FOOD': ['PASTA CARBONARA', 'SUSHI PLATTER'],
|
|
'EMOTION': ['HAPPINESS', 'SADNESS'],
|
|
'PERSONAL EXPERIENCE': ['TRAVEL ADVENTURE', 'BOOK CLUB'],
|
|
'INTERACTION': ['TEAM BUILDING', 'NETWORKING MEETUP'],
|
|
'BEVERAGE': ['LATTE', 'GREEN TEA'],
|
|
'PLAN': ['WEIGHT LOSS', 'CAREER DEVELOPMENT'],
|
|
'GEO': ['PARIS', 'NEW YORK'],
|
|
'GEAR': ['CAMERA', 'HEADPHONES'],
|
|
'EMOJI': ['🏢', '🌍'],
|
|
'BEHAVIOR': ['POSITIVE THINKING', 'STRESS MANAGEMENT'],
|
|
'TONE': ['FRIENDLY', 'PROFESSIONAL'],
|
|
'LOCATION': ['DOWNTOWN', 'SUBURBS']
|
|
}}
|
|
################
|
|
Output:
|
|
{
|
|
"answer_type_keywords": ["LOCATION"],
|
|
"entities_from_query": ["capital of the United States", "Washington", "New York"]
|
|
}
|
|
#############################
|
|
|
|
-Real Data-
|
|
######################
|
|
Query: {query}
|
|
Answer type pool:{TYPE_POOL}
|
|
######################
|
|
Output:
|
|
`
|
|
|
|
// BuildQueryRewritePrompt builds the system prompt for query rewrite.
|
|
func BuildQueryRewritePrompt(question string, ty2entsJSON string) string {
|
|
r := strings.NewReplacer(
|
|
"{query}", question,
|
|
"{TYPE_POOL}", ty2entsJSON,
|
|
)
|
|
return r.Replace(queryRewritePromptTmpl)
|
|
}
|
|
|
|
// ParseQueryRewriteResponse parses the LLM response and returns structured keywords.
|
|
// Handles JSON parsing with fallback logic matching Python's json_repair behavior.
|
|
func ParseQueryRewriteResponse(response string) (*QueryRewriteResult, error) {
|
|
// Try direct JSON parsing first
|
|
result, err := tryParseJSON(response)
|
|
if err == nil {
|
|
return result, nil
|
|
}
|
|
|
|
// Fallback: try to extract JSON from markdown code blocks
|
|
cleaned := strings.TrimSpace(response)
|
|
if idx := strings.Index(cleaned, "```"); idx >= 0 {
|
|
rest := cleaned[idx+3:]
|
|
if end := strings.Index(rest, "```"); end >= 0 {
|
|
code := strings.TrimSpace(rest[:end])
|
|
code = strings.TrimPrefix(code, "json")
|
|
code = strings.TrimSpace(code)
|
|
result, err := tryParseJSON(code)
|
|
if err == nil {
|
|
return result, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// Fallback: extract first JSON object
|
|
start := strings.Index(cleaned, "{")
|
|
end := strings.LastIndex(cleaned, "}")
|
|
if start >= 0 && end > start {
|
|
candidate := cleaned[start : end+1]
|
|
result, err := tryParseJSON(candidate)
|
|
if err == nil {
|
|
return result, nil
|
|
}
|
|
}
|
|
|
|
return nil, err // return the original error
|
|
}
|
|
|
|
// tryParseJSON attempts to parse a JSON string into QueryRewriteResult.
|
|
func tryParseJSON(data string) (*QueryRewriteResult, error) {
|
|
var result QueryRewriteResult
|
|
if err := json.Unmarshal([]byte(data), &result); err != nil {
|
|
return nil, err
|
|
}
|
|
return &result, nil
|
|
}
|