From cf5cca5cbbe785f24f014a9f20232de09e6ba263 Mon Sep 17 00:00:00 2001 From: Yingfeng Date: Tue, 9 Jun 2026 22:48:33 +0800 Subject: [PATCH] Fix wrong unit test path (#15864) --- .../common}/test_think_stream_parser.py | 0 .../parser}/test_naive_markdown_merge.py | 55 +++++++++++-------- 2 files changed, 31 insertions(+), 24 deletions(-) rename test/{testcases/unit => unit_test/common}/test_think_stream_parser.py (100%) rename {tests => test/unit_test/deepdoc/parser}/test_naive_markdown_merge.py (64%) diff --git a/test/testcases/unit/test_think_stream_parser.py b/test/unit_test/common/test_think_stream_parser.py similarity index 100% rename from test/testcases/unit/test_think_stream_parser.py rename to test/unit_test/common/test_think_stream_parser.py diff --git a/tests/test_naive_markdown_merge.py b/test/unit_test/deepdoc/parser/test_naive_markdown_merge.py similarity index 64% rename from tests/test_naive_markdown_merge.py rename to test/unit_test/deepdoc/parser/test_naive_markdown_merge.py index 10aa3c3edb..af7540e23e 100644 --- a/tests/test_naive_markdown_merge.py +++ b/test/unit_test/deepdoc/parser/test_naive_markdown_merge.py @@ -3,100 +3,107 @@ Unit tests for markdown chunk merging logic in rag/app/naive.py. Tests the _is_short_header() helper function to ensure short markdown headers are correctly identified and will be force-merged with the next section. + +Uses lazy import via fixture to avoid triggering deepdoc model loading +at pytest collection time (which would fail in CI without model files). """ import sys -import os +from pathlib import Path -# Add project root to path for imports -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) +import pytest -from rag.app.naive import _is_short_header +_REPO = Path(__file__).parents[4] class TestIsShortHeader: """Test cases for _is_short_header() function.""" + @pytest.fixture(autouse=True) + def _lazy_import(self): + sys.path.insert(0, str(_REPO)) + from rag.app.naive import _is_short_header + + self._is_short_header = _is_short_header + def test_short_header_h1(self): """Short level-1 header should return True.""" text = "# Quick Start" - result = _is_short_header(text) + result = self._is_short_header(text) assert result is True def test_short_header_h2(self): """Short level-2 header should return True.""" text = "## Quick Travel" - result = _is_short_header(text) + result = self._is_short_header(text) assert result is True def test_short_header_h3(self): """Short level-3 header should return True.""" text = "### Setup" - result = _is_short_header(text) + result = self._is_short_header(text) assert result is True def test_long_header(self): """Long header (> 50 tokens) should return False.""" text = "# " + "Very long header " * 20 # ~100 tokens - result = _is_short_header(text) + result = self._is_short_header(text) assert result is False def test_non_header_short_text(self): """Short text without header pattern should return False.""" text = "This is short" - result = _is_short_header(text) + result = self._is_short_header(text) assert result is False def test_empty_text(self): """Empty text should return False.""" text = "" - result = _is_short_header(text) + result = self._is_short_header(text) assert result is False def test_whitespace_only(self): """Whitespace-only text should return False.""" text = " " - result = _is_short_header(text) + result = self._is_short_header(text) assert result is False def test_header_exactly_50_tokens(self): """Header with exactly 50 tokens should return False (strict <).""" - # Construct a header with exactly 50 tokens - words = ["word"] * 49 # 49 words = 49 tokens, plus "# " = 1 token + words = ["word"] * 49 text = "# " + " ".join(words) - result = _is_short_header(text, max_tokens=50) - # 50 tokens = not < 50, so should return False + result = self._is_short_header(text, max_tokens=50) assert result is False def test_header_49_tokens(self): """Header with 49 tokens should return True (< 50).""" - words = ["word"] * 48 # 48 words = 48 tokens, plus "# " = 1 token = 49 tokens + words = ["word"] * 48 text = "# " + " ".join(words) - result = _is_short_header(text, max_tokens=50) + result = self._is_short_header(text, max_tokens=50) assert result is True def test_custom_max_tokens(self): """Should respect custom max_tokens parameter.""" + # "# Short" = 2 tokens in cl100k_base encoding text = "# Short" - result = _is_short_header(text, max_tokens=5) - assert result is False # "# Short" is ~2 tokens, but wait... + result = self._is_short_header(text, max_tokens=5) + assert result is True # 2 < 5 → short - result = _is_short_header(text, max_tokens=10) - assert result is True + result = self._is_short_header(text, max_tokens=2) + assert result is False # 2 < 2 → not short def test_header_with_special_chars(self): """Header with special characters should still be recognized.""" text = "## API Endpoint: /api/v1/users" - result = _is_short_header(text) + result = self._is_short_header(text) assert result is True def test_header_with_cjk_chars(self): """Header with CJK characters should be recognized.""" text = "## 快速旅行" - result = _is_short_header(text) + result = self._is_short_header(text) assert result is True if __name__ == "__main__": - import pytest pytest.main([__file__, "-v"])