fix(rag/nlp): treat string input as one phrase in is_english (#16308)

This commit is contained in:
Harsh Kashyap
2026-06-25 17:37:09 +05:30
committed by GitHub
parent 5defb4e7d6
commit c7052f4dd1
2 changed files with 7 additions and 1 deletions

View File

@@ -240,7 +240,7 @@ def is_english(texts):
pattern = re.compile(r"[`a-zA-Z0-9\s.,':;/\"?<>!\(\)\-]+")
if isinstance(texts, str):
texts = list(texts)
texts = [texts]
elif isinstance(texts, list):
texts = [t for t in texts if isinstance(t, str) and t.strip()]
else:

View File

@@ -55,6 +55,12 @@ def test_is_english_single_english_answer_in_list():
assert is_english(["This is a normal English answer."]) is True
@pytest.mark.p2
def test_is_english_multi_word_phrase():
# Regression: splitting a string into characters made short spaced phrases fail.
assert is_english("I am good") is True
@pytest.mark.p2
def test_is_english_chinese_list_is_false():
assert is_english(["这是中文段落。", "另一个中文段落。"]) is False