mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
fix(rag/nlp): treat string input as one phrase in is_english (#16308)
This commit is contained in:
@@ -240,7 +240,7 @@ def is_english(texts):
|
||||
pattern = re.compile(r"[`a-zA-Z0-9\s.,':;/\"?<>!\(\)\-]+")
|
||||
|
||||
if isinstance(texts, str):
|
||||
texts = list(texts)
|
||||
texts = [texts]
|
||||
elif isinstance(texts, list):
|
||||
texts = [t for t in texts if isinstance(t, str) and t.strip()]
|
||||
else:
|
||||
|
||||
@@ -55,6 +55,12 @@ def test_is_english_single_english_answer_in_list():
|
||||
assert is_english(["This is a normal English answer."]) is True
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_is_english_multi_word_phrase():
|
||||
# Regression: splitting a string into characters made short spaced phrases fail.
|
||||
assert is_english("I am good") is True
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_is_english_chinese_list_is_false():
|
||||
assert is_english(["这是中文段落。", "另一个中文段落。"]) is False
|
||||
|
||||
Reference in New Issue
Block a user