mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Fix special characters in matching text of search() (#13852)
### What problem does this PR solve? Fix special characters in matching text of search(). We should escape some special characters(such as ?, *,:) before passing to matching_text of search() Fix https://github.com/infiniflow/ragflow/issues/13729 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@@ -32,7 +32,7 @@ class QueryBase(ABC):
|
||||
|
||||
@staticmethod
|
||||
def sub_special_char(line):
|
||||
return re.sub(r"([:\{\}/\[\]\-\*\"\(\)\|\+~\^])", r"\\\1", line).strip()
|
||||
return re.sub(r"([:\{\}/\[\]\-\*\?\"\(\)\|\+~\^])", r"\\\1", line).strip()
|
||||
|
||||
@staticmethod
|
||||
def rmWWW(txt):
|
||||
|
||||
@@ -41,8 +41,14 @@ class FulltextQueryer(QueryBase):
|
||||
def question(self, txt, tbl="qa", min_match: float = 0.6):
|
||||
original_query = txt
|
||||
txt = self.add_space_between_eng_zh(txt)
|
||||
|
||||
# Strip Infinity ESCAPABLE characters from the query.
|
||||
#
|
||||
# Infinity's search_lexer.l defines ESCAPABLE characters [\x20()^"'~*?:\\]
|
||||
# If these characters appear unescaped in a query, Infinity's lexer will
|
||||
# interpret them as special tokens, causing parsing errors.
|
||||
txt = re.sub(
|
||||
r"[ :|\r\n\t,,。??/`!!&^%%()\[\]{}<>]+",
|
||||
r"[ :|\r\n\t,,。??/`!!&^%%()\[\]{}<>*~'\"\\]+",
|
||||
" ",
|
||||
rag_tokenizer.tradi2simp(rag_tokenizer.strQ2B(txt.lower())),
|
||||
).strip()
|
||||
|
||||
@@ -243,6 +243,7 @@ class InfinityConnection(InfinityConnectionBase):
|
||||
for matchExpr in match_expressions:
|
||||
if isinstance(matchExpr, MatchTextExpr):
|
||||
fields = ",".join(matchExpr.fields)
|
||||
self.logger.info(f"INFINITY search match_text: {matchExpr.matching_text}")
|
||||
builder = builder.match_text(
|
||||
fields,
|
||||
matchExpr.matching_text,
|
||||
|
||||
Reference in New Issue
Block a user