mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Fix: remove duplicate document preview access check (#15625)
### What problem does this PR solve? remove duplicate document preview access check ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@@ -1925,18 +1925,6 @@ async def get(doc_id):
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
if not DocumentService.accessible(doc_id, current_user.id):
|
||||
# Issue #15501: PR #15146 dropped this check, letting any
|
||||
# authenticated caller download any tenant's document bytes by
|
||||
# guessing/knowing the doc_id. Return the same "Document not
|
||||
# found!" shape used for missing docs so the response is
|
||||
# indistinguishable to a cross-tenant probe (avoids ID
|
||||
# enumeration).
|
||||
logging.warning(
|
||||
"Rejected /documents/<doc_id>/preview cross-tenant access: "
|
||||
"caller_user=%s doc_id=%s", current_user.id, doc_id,
|
||||
)
|
||||
return get_data_error_result(message="Document not found!")
|
||||
|
||||
b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
|
||||
data = await thread_pool_exec(settings.STORAGE_IMPL.get, b, n)
|
||||
|
||||
@@ -1,234 +0,0 @@
|
||||
#
|
||||
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
"""Regression test for `/api/v1/documents/<doc_id>/preview` (#15501).
|
||||
|
||||
PR #15146 dropped the `DocumentService.accessible(doc_id, user_id)` check
|
||||
from the REST preview handler. Any authenticated user could then download
|
||||
any tenant's document bytes by guessing / knowing its doc_id. This test
|
||||
locks in the restored check by asserting that a caller whose tenant does
|
||||
NOT own the document gets the same `Document not found!` response a missing
|
||||
doc would produce, and that the storage backend is never touched.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from types import ModuleType, SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class _PassthroughManager:
|
||||
def route(self, *_args, **_kwargs):
|
||||
return lambda func: func
|
||||
|
||||
|
||||
def _stub(monkeypatch, name, **attrs):
|
||||
mod = ModuleType(name)
|
||||
for key, value in attrs.items():
|
||||
setattr(mod, key, value)
|
||||
monkeypatch.setitem(sys.modules, name, mod)
|
||||
return mod
|
||||
|
||||
|
||||
def _load_document_api(
|
||||
monkeypatch,
|
||||
*,
|
||||
doc_get_by_id,
|
||||
accessible_fn,
|
||||
storage_get,
|
||||
):
|
||||
"""Load api/apps/restful_apis/document_api.py with the minimum stubs."""
|
||||
|
||||
async def _make_response(payload):
|
||||
return SimpleNamespace(payload=payload, headers={})
|
||||
|
||||
def _login_required(func=None, **_kw):
|
||||
if func is not None:
|
||||
return func
|
||||
return lambda f: f
|
||||
|
||||
_stub(
|
||||
monkeypatch, "api.apps",
|
||||
AUTH_JWT="JWT", AUTH_API="API", AUTH_BETA="BETA",
|
||||
current_user=SimpleNamespace(id="caller-tenant"),
|
||||
login_required=_login_required,
|
||||
)
|
||||
_stub(monkeypatch, "api.constants", FILE_NAME_LEN_LIMIT=128, IMG_BASE64_PREFIX="data:image/")
|
||||
_stub(
|
||||
monkeypatch, "api.apps.services.document_api_service",
|
||||
validate_document_update_fields=lambda *_a, **_k: None,
|
||||
map_doc_keys=lambda *_a, **_k: None,
|
||||
map_doc_keys_with_run_status=lambda *_a, **_k: None,
|
||||
update_document_name_only=lambda *_a, **_k: None,
|
||||
update_chunk_method=lambda *_a, **_k: None,
|
||||
update_document_status_only=lambda *_a, **_k: None,
|
||||
reset_document_for_reparse=lambda *_a, **_k: None,
|
||||
)
|
||||
_stub(monkeypatch, "api.db", VALID_FILE_TYPES=(), FileType=SimpleNamespace(VISUAL="visual"))
|
||||
_stub(monkeypatch, "api.db.services", duplicate_name=lambda *_a, **_k: "x")
|
||||
_stub(monkeypatch, "api.db.services.doc_metadata_service", DocMetadataService=SimpleNamespace())
|
||||
_stub(monkeypatch, "api.db.db_models", Task=SimpleNamespace())
|
||||
_stub(
|
||||
monkeypatch, "api.db.services.document_service",
|
||||
DocumentService=SimpleNamespace(get_by_id=lambda _id: doc_get_by_id, accessible=accessible_fn),
|
||||
)
|
||||
_stub(
|
||||
monkeypatch, "api.db.services.file2document_service",
|
||||
File2DocumentService=SimpleNamespace(get_storage_address=lambda **_k: ("bucket", "key")),
|
||||
)
|
||||
_stub(monkeypatch, "api.db.services.file_service", FileService=SimpleNamespace())
|
||||
_stub(monkeypatch, "api.db.services.knowledgebase_service", KnowledgebaseService=SimpleNamespace())
|
||||
_stub(monkeypatch, "api.common.check_team_permission", check_kb_team_permission=lambda *_a, **_k: True)
|
||||
_stub(monkeypatch, "api.db.services.task_service", TaskService=SimpleNamespace(), cancel_all_task_of=lambda *_a, **_k: None)
|
||||
_stub(
|
||||
monkeypatch, "api.utils.api_utils",
|
||||
construct_json_result=lambda **kw: {"kind": "json", **kw},
|
||||
get_data_error_result=lambda message="", code=0, data=False: {"kind": "data_error", "message": message},
|
||||
get_error_data_result=lambda *_a, **_k: {"kind": "error"},
|
||||
get_result=lambda *_a, **_k: {"kind": "result"},
|
||||
get_json_result=lambda *_a, **_k: {"kind": "json_result"},
|
||||
server_error_response=lambda e: {"kind": "server_error", "error": str(e)},
|
||||
add_tenant_id_to_kwargs=lambda *_a, **_k: None,
|
||||
get_request_json=lambda: {},
|
||||
get_error_argument_result=lambda *_a, **_k: {"kind": "error_argument"},
|
||||
check_duplicate_ids=lambda *_a, **_k: True,
|
||||
)
|
||||
_stub(monkeypatch, "api.utils.pagination_utils", validate_rest_api_page_size=lambda *_a, **_k: 10)
|
||||
_stub(
|
||||
monkeypatch, "api.utils.validation_utils",
|
||||
UpdateDocumentReq=type("UpdateDocumentReq", (), {}),
|
||||
DeleteDocumentReq=type("DeleteDocumentReq", (), {}),
|
||||
format_validation_error_message=lambda *_a, **_k: "",
|
||||
validate_and_parse_json_request=lambda *_a, **_k: ({}, None),
|
||||
)
|
||||
_stub(monkeypatch, "common", settings=SimpleNamespace(STORAGE_IMPL=SimpleNamespace(get=storage_get)))
|
||||
_stub(
|
||||
monkeypatch, "common.constants",
|
||||
ParserType=SimpleNamespace(NAIVE="naive"),
|
||||
RetCode=SimpleNamespace(AUTHENTICATION_ERROR=109, DATA_ERROR=102),
|
||||
TaskStatus=SimpleNamespace(RUNNING="1"),
|
||||
SANDBOX_ARTIFACT_BUCKET="sandbox",
|
||||
)
|
||||
_stub(
|
||||
monkeypatch, "common.metadata_utils",
|
||||
convert_conditions=lambda c: c, meta_filter=lambda *_a, **_k: [],
|
||||
turn2jsonschema=lambda *_a, **_k: {},
|
||||
)
|
||||
_stub(
|
||||
monkeypatch, "common.misc_utils",
|
||||
get_uuid=lambda: "uuid",
|
||||
thread_pool_exec=lambda fn, *a, **k: storage_get(*a, **k),
|
||||
)
|
||||
_stub(
|
||||
monkeypatch, "api.utils.file_utils",
|
||||
filename_type=lambda *_a, **_k: "doc",
|
||||
thumbnail=lambda *_a, **_k: b"",
|
||||
)
|
||||
_stub(
|
||||
monkeypatch, "api.utils.web_utils",
|
||||
CONTENT_TYPE_MAP={"pdf": "application/pdf"},
|
||||
html2pdf=lambda *_a, **_k: b"",
|
||||
is_valid_url=lambda *_a, **_k: True,
|
||||
apply_safe_file_response_headers=lambda *_a, **_k: None,
|
||||
)
|
||||
_stub(monkeypatch, "common.ssrf_guard", assert_url_is_safe=lambda *_a, **_k: None)
|
||||
_stub(
|
||||
monkeypatch, "rag.nlp",
|
||||
search=SimpleNamespace(index_name=lambda *_a, **_k: "index"),
|
||||
)
|
||||
_stub(monkeypatch, "rag.nlp.search", index_name=lambda *_a, **_k: "index")
|
||||
|
||||
quart_stub = ModuleType("quart")
|
||||
quart_stub.request = SimpleNamespace(method="GET", args={})
|
||||
quart_stub.make_response = _make_response
|
||||
quart_stub.send_file = lambda *_a, **_k: None
|
||||
monkeypatch.setitem(sys.modules, "quart", quart_stub)
|
||||
|
||||
# Third-party deps imported at module top — stub if not installed.
|
||||
if "peewee" not in sys.modules:
|
||||
_stub(monkeypatch, "peewee", OperationalError=type("OperationalError", (Exception,), {}))
|
||||
if "pydantic" not in sys.modules:
|
||||
_stub(monkeypatch, "pydantic", ValidationError=type("ValidationError", (Exception,), {}))
|
||||
|
||||
# api.apps.settings is reached as `settings.STORAGE_IMPL.get`.
|
||||
_stub(
|
||||
monkeypatch, "api.apps.settings",
|
||||
STORAGE_IMPL=SimpleNamespace(get=storage_get),
|
||||
)
|
||||
|
||||
# parents[5] = repo root (test/unit_test/api/apps/restful_apis/<file>)
|
||||
repo_root = Path(__file__).resolve().parents[5]
|
||||
module_path = repo_root / "api" / "apps" / "restful_apis" / "document_api.py"
|
||||
spec = importlib.util.spec_from_file_location("test_document_api_module", module_path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
module.manager = _PassthroughManager()
|
||||
module.settings = SimpleNamespace(STORAGE_IMPL=SimpleNamespace(get=storage_get))
|
||||
monkeypatch.setitem(sys.modules, "test_document_api_module", module)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestDocumentPreviewAccessCheck:
|
||||
"""Regression for #15501: cross-tenant document preview via /documents/<id>/preview."""
|
||||
|
||||
def test_cross_tenant_preview_is_denied(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""A caller whose tenant does NOT own the document gets the same
|
||||
'Document not found!' response a missing doc would, and storage is
|
||||
never read."""
|
||||
doc = SimpleNamespace(id="DOC_VICTIM", name="secrets.pdf", type="application")
|
||||
storage_calls: list[tuple] = []
|
||||
|
||||
def _storage_get(*args, **kwargs):
|
||||
storage_calls.append((args, kwargs))
|
||||
return b"SECRET BYTES"
|
||||
|
||||
def _accessible_only_for_owner(_doc_id, user_id):
|
||||
return user_id == "tenant-owner"
|
||||
|
||||
module = _load_document_api(
|
||||
monkeypatch,
|
||||
doc_get_by_id=(True, doc),
|
||||
accessible_fn=_accessible_only_for_owner,
|
||||
storage_get=_storage_get,
|
||||
)
|
||||
|
||||
# current_user.id is "caller-tenant" via the api.apps stub above;
|
||||
# tenant-owner is "tenant-owner" → not owner → must be denied.
|
||||
result = asyncio.run(module.get("DOC_VICTIM"))
|
||||
|
||||
assert isinstance(result, dict) and result.get("kind") == "data_error", result
|
||||
assert "not found" in result["message"].lower()
|
||||
assert storage_calls == [], "storage was read despite cross-tenant request"
|
||||
|
||||
def test_missing_doc_returns_not_found(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Missing-doc behaviour is unchanged: same 'Document not found!' shape."""
|
||||
|
||||
def _accessible_returns_false(*_a, **_k):
|
||||
return False
|
||||
|
||||
module = _load_document_api(
|
||||
monkeypatch,
|
||||
doc_get_by_id=(False, None),
|
||||
accessible_fn=_accessible_returns_false,
|
||||
storage_get=lambda *_a, **_k: b"",
|
||||
)
|
||||
|
||||
result = asyncio.run(module.get("DOC_DOES_NOT_EXIST"))
|
||||
assert isinstance(result, dict) and result.get("kind") == "data_error"
|
||||
assert "not found" in result["message"].lower()
|
||||
Reference in New Issue
Block a user