diff --git a/rag/nlp/search.py b/rag/nlp/search.py
index e20cece79b..8201bb370c 100644
--- a/rag/nlp/search.py
+++ b/rag/nlp/search.py
@@ -560,6 +560,31 @@ class Dealer:
                                            rag_tokenizer.tokenize(ans).split(),
                                            rag_tokenizer.tokenize(inst).split())
 
+    @staticmethod
+    def _rerank_window(page_size: int, top: int = 0) -> int:
+        """Candidate-window size shared by retrieval's block fetch and slice.
+
+        ``retrieval`` reuses this value BOTH as the backend block size and as
+        the modulus for extracting a single page from a (re)ranked block::
+
+            req["page"] = global_offset // window   # which block to fetch
+            begin       = global_offset %  window   # where the page starts
+
+        For those two to agree the window MUST be an exact multiple of
+        ``page_size``; otherwise blocks and pages drift apart and deep
+        pagination silently drops results and returns short pages.
+
+        The window targets a provider-friendly pool of ~64 candidates, bounded
+        by ``top`` when given (i.e. when an external reranker is active), and is
+        always rounded UP to a whole number of pages to preserve the invariant.
+        """
+        if page_size <= 1:
+            return min(30, top) if top > 0 else 30
+        window = math.ceil(64 / page_size) * page_size
+        if top > 0:
+            window = min(window, math.ceil(top / page_size) * page_size)
+        return window
+
     async def retrieval(
             self,
             question,
@@ -582,12 +607,12 @@ class Dealer:
         if not question:
             return ranks
 
-        # Keep the historical windowing strategy by default, but when an external
-        # reranker is enabled cap candidate count by both top_k and provider-safe 64.
-        RERANK_LIMIT = math.ceil(64 / page_size) * page_size if page_size > 1 else 1
-        RERANK_LIMIT = max(30, RERANK_LIMIT)
-        if rerank_mdl and top > 0:
-            RERANK_LIMIT = min(RERANK_LIMIT, top, 64)
+        # Candidate window for block-based pagination. It MUST stay a multiple
+        # of page_size so the block fetched (global_offset // RERANK_LIMIT) and
+        # the in-block page slice (global_offset % RERANK_LIMIT) stay aligned;
+        # see _rerank_window. When an external reranker is active the pool is
+        # also bounded by top.
+        RERANK_LIMIT = self._rerank_window(page_size, top if rerank_mdl else 0)
         page = max(page, 1)
         global_offset = (page - 1) * page_size
         req = {
diff --git a/test/unit_test/rag/test_search_pagination.py b/test/unit_test/rag/test_search_pagination.py
new file mode 100644
index 0000000000..75a33d1adb
--- /dev/null
+++ b/test/unit_test/rag/test_search_pagination.py
@@ -0,0 +1,135 @@
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+"""Unit tests for the block-based pagination window used by Dealer.retrieval.
+
+retrieval reuses RERANK_LIMIT both as the backend block size
+(req["page"] = global_offset // RERANK_LIMIT) and as the modulus for slicing a
+page out of a (re)ranked block (begin = global_offset % RERANK_LIMIT). If the
+window is not an exact multiple of page_size, blocks and pages drift apart, so
+deep pages silently drop results and come back short. These tests pin that
+invariant and verify cross-block pagination loses nothing.
+"""
+import math
+import sys
+import types
+
+import pytest
+
+# Stub the heavy / circular-importing dependencies before importing search,
+# mirroring test_rank_feature_scores.py so the module imports in isolation.
+_fake_query = types.ModuleType("rag.nlp.query")
+
+
+class _DummyFulltextQueryer:
+    pass
+
+
+_fake_query.FulltextQueryer = _DummyFulltextQueryer
+sys.modules.setdefault("rag.nlp.query", _fake_query)
+sys.modules.setdefault("common.settings", types.ModuleType("common.settings"))
+
+from rag.nlp.search import Dealer  # noqa: E402
+
+_rerank_window = Dealer._rerank_window
+
+# (page_size, top) combinations, including the common page sizes (10, 30) that
+# do NOT divide 64 -- the exact case the old `min(..., 64)` clamp broke -- plus
+# tiny / large / page-aligned tops.
+GRID = [
+    (page_size, top)
+    for page_size in (1, 5, 7, 10, 30, 50, 64)
+    for top in (0, 5, 30, 50, 55, 64, 100, 1024)
+]
+
+
+def _paginate(total, page_size, top, rerank):
+    """Replay retrieval's block-fetch + in-block slice over `total` candidates.
+
+    Returns the concatenated global positions actually surfaced across every
+    page, exactly as Dealer.retrieval would emit them.
+    """
+    window = _rerank_window(page_size, top if rerank else 0)
+    # The backend caps the candidate pool at `top` when an external reranker is
+    # active; otherwise the whole result set is windowed.
+    cap = min(total, top) if (rerank and top > 0) else total
+    surfaced = []
+    page = 1
+    while (page - 1) * page_size < cap:
+        global_offset = (page - 1) * page_size
+        block_index = global_offset // window
+        block_start = block_index * window
+        block = list(range(block_start, min(block_start + window, cap)))
+        begin = global_offset % window
+        surfaced.extend(block[begin:begin + page_size])
+        page += 1
+    return window, cap, surfaced
+
+
+@pytest.mark.parametrize("page_size,top", GRID)
+def test_window_is_page_aligned(page_size, top):
+    """The window must be a positive whole multiple of page_size."""
+    for rerank in (False, True):
+        window = _rerank_window(page_size, top if rerank else 0)
+        assert window >= 1
+        if page_size > 1:
+            assert window % page_size == 0, (page_size, top, rerank, window)
+
+
+@pytest.mark.parametrize("page_size,top", GRID)
+def test_pagination_loses_nothing(page_size, top):
+    """Walking every page reconstructs the candidate pool exactly: in order,
+    no gaps, no duplicates, and no short interior pages."""
+    total = 250
+    for rerank in (False, True):
+        window, cap, surfaced = _paginate(total, page_size, top, rerank)
+        assert surfaced == list(range(cap)), (
+            f"page_size={page_size} top={top} rerank={rerank} window={window} "
+            f"cap={cap}: missing={sorted(set(range(cap)) - set(surfaced))[:10]} "
+            f"dups={len(surfaced) != len(set(surfaced))}"
+        )
+
+
+@pytest.mark.p1
+def test_reported_regression_page7_not_short():
+    """The reported case: page_size=10, top=1024, reranker on. Page 7 (global
+    offset 60) used to return only 4 of 10 results because the window was
+    clamped to 64 (not a multiple of 10)."""
+    page_size, top = 10, 1024
+    window = _rerank_window(page_size, top)
+    assert window % page_size == 0
+    assert window >= 64  # still a provider-friendly ~64 candidate pool
+
+    total = 250
+    _, cap, surfaced = _paginate(total, page_size, top, rerank=True)
+    # Page 7 spans global positions 60..69 and must be full and contiguous.
+    assert surfaced[60:70] == list(range(60, 70))
+    assert len(surfaced) == cap
+
+
+@pytest.mark.p1
+def test_matches_legacy_window_on_non_buggy_paths():
+    """Where the old formula already produced a page-aligned value, the new
+    window is unchanged (no behavioral regression on the non-buggy paths)."""
+    def legacy(page_size, top, rerank):
+        limit = math.ceil(64 / page_size) * page_size if page_size > 1 else 1
+        limit = max(30, limit)
+        if rerank and top > 0:
+            limit = min(limit, top, 64)
+        return limit
+
+    for page_size in (1, 5, 7, 10, 30, 50, 64):
+        # The non-rerank path was always page-aligned already -> must match.
+        assert _rerank_window(page_size, 0) == legacy(page_size, 0, False)