Fix special characters in matching text of search() (#13852)

### What problem does this PR solve?

Fix special characters in matching text of search(). We should escape
some special characters(such as ?, *,:) before passing to matching_text
of search()

Fix https://github.com/infiniflow/ragflow/issues/13729

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
qinling0210
2026-03-30 18:47:10 +08:00
committed by GitHub
parent 0d85a8e7aa
commit 0462c20113
3 changed files with 9 additions and 2 deletions

View File

@@ -32,7 +32,7 @@ class QueryBase(ABC):
@staticmethod
def sub_special_char(line):
return re.sub(r"([:\{\}/\[\]\-\*\"\(\)\|\+~\^])", r"\\\1", line).strip()
return re.sub(r"([:\{\}/\[\]\-\*\?\"\(\)\|\+~\^])", r"\\\1", line).strip()
@staticmethod
def rmWWW(txt):

View File

@@ -41,8 +41,14 @@ class FulltextQueryer(QueryBase):
def question(self, txt, tbl="qa", min_match: float = 0.6):
original_query = txt
txt = self.add_space_between_eng_zh(txt)
# Strip Infinity ESCAPABLE characters from the query.
#
# Infinity's search_lexer.l defines ESCAPABLE characters [\x20()^"'~*?:\\]
# If these characters appear unescaped in a query, Infinity's lexer will
# interpret them as special tokens, causing parsing errors.
txt = re.sub(
r"[ :|\r\n\t,,。??/`!&^%%()\[\]{}<>]+",
r"[ :|\r\n\t,,。??/`!&^%%()\[\]{}<>*~'\"\\]+",
" ",
rag_tokenizer.tradi2simp(rag_tokenizer.strQ2B(txt.lower())),
).strip()

View File

@@ -243,6 +243,7 @@ class InfinityConnection(InfinityConnectionBase):
for matchExpr in match_expressions:
if isinstance(matchExpr, MatchTextExpr):
fields = ",".join(matchExpr.fields)
self.logger.info(f"INFINITY search match_text: {matchExpr.matching_text}")
builder = builder.match_text(
fields,
matchExpr.matching_text,