From 0462c20113d4565844c56e54affb92b8d0f29690 Mon Sep 17 00:00:00 2001 From: qinling0210 <88864212+qinling0210@users.noreply.github.com> Date: Mon, 30 Mar 2026 18:47:10 +0800 Subject: [PATCH] Fix special characters in matching text of search() (#13852) ### What problem does this PR solve? Fix special characters in matching text of search(). We should escape some special characters(such as ?, *,:) before passing to matching_text of search() Fix https://github.com/infiniflow/ragflow/issues/13729 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- common/query_base.py | 2 +- rag/nlp/query.py | 8 +++++++- rag/utils/infinity_conn.py | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/common/query_base.py b/common/query_base.py index eae44514f1..c728304639 100644 --- a/common/query_base.py +++ b/common/query_base.py @@ -32,7 +32,7 @@ class QueryBase(ABC): @staticmethod def sub_special_char(line): - return re.sub(r"([:\{\}/\[\]\-\*\"\(\)\|\+~\^])", r"\\\1", line).strip() + return re.sub(r"([:\{\}/\[\]\-\*\?\"\(\)\|\+~\^])", r"\\\1", line).strip() @staticmethod def rmWWW(txt): diff --git a/rag/nlp/query.py b/rag/nlp/query.py index 39b6b439d0..2f5807147c 100644 --- a/rag/nlp/query.py +++ b/rag/nlp/query.py @@ -41,8 +41,14 @@ class FulltextQueryer(QueryBase): def question(self, txt, tbl="qa", min_match: float = 0.6): original_query = txt txt = self.add_space_between_eng_zh(txt) + + # Strip Infinity ESCAPABLE characters from the query. + # + # Infinity's search_lexer.l defines ESCAPABLE characters [\x20()^"'~*?:\\] + # If these characters appear unescaped in a query, Infinity's lexer will + # interpret them as special tokens, causing parsing errors. txt = re.sub( - r"[ :|\r\n\t,,。??/`!!&^%%()\[\]{}<>]+", + r"[ :|\r\n\t,,。??/`!!&^%%()\[\]{}<>*~'\"\\]+", " ", rag_tokenizer.tradi2simp(rag_tokenizer.strQ2B(txt.lower())), ).strip() diff --git a/rag/utils/infinity_conn.py b/rag/utils/infinity_conn.py index 1976e14270..c039a7f7c1 100644 --- a/rag/utils/infinity_conn.py +++ b/rag/utils/infinity_conn.py @@ -243,6 +243,7 @@ class InfinityConnection(InfinityConnectionBase): for matchExpr in match_expressions: if isinstance(matchExpr, MatchTextExpr): fields = ",".join(matchExpr.fields) + self.logger.info(f"INFINITY search match_text: {matchExpr.matching_text}") builder = builder.match_text( fields, matchExpr.matching_text,