fix(api): gate sandbox artifact download on agent session ownership (#16169)

Fixes #16168

## Summary
- Add session-scoped authorization for `GET
/api/v1/documents/artifact/<filename>`
- Allow download only when the artifact filename appears in the caller's
`api_4_conversation` message and
`UserCanvasService.accessible(dialog_id, user_id)` passes
- Deny with generic `"Artifact not found."` before storage access (no
cross-user enumeration)
- Return 4xx when the blob is missing (existing behavior preserved)

## Approach
Sandbox artifacts are runtime CodeExec outputs, not KB documents — this
uses the same session gate pattern as `agent_chat_completion`, not
`DocumentService.accessible`.

## Test plan
- [x] Unit: denied when filename not referenced in user sessions
- [x] Unit: denied when agent canvas is not accessible
- [x] Unit: authorized user receives bytes; missing blob returns
`"Artifact not found."`
- [ ] `pytest
test/testcases/test_web_api/test_document_app/test_document_metadata.py
-k get_artifact`

---------

Co-authored-by: Cursor <cursoragent@cursor.com>
Co-authored-by: Zhichang Yu <yuzhichang@gmail.com>
This commit is contained in:
kpdev
2026-06-27 22:24:15 -07:00
committed by GitHub
parent e16d1a0150
commit 4712fcb1b3
2 changed files with 103 additions and 1 deletions

View File

@@ -29,7 +29,8 @@ from api.constants import FILE_NAME_LEN_LIMIT, IMG_BASE64_PREFIX
from api.apps.services.document_api_service import validate_document_update_fields, map_doc_keys, \
map_doc_keys_with_run_status, update_document_name_only, update_chunk_method, update_document_status_only, \
reset_document_for_reparse
from api.db import VALID_FILE_TYPES, FileType
from api.db import VALID_FILE_TYPES, FileType, DB
from api.db.db_models import API4Conversation
from api.db.services import duplicate_name
from api.db.services.doc_metadata_service import DocMetadataService
from api.db.db_models import Task
@@ -37,6 +38,7 @@ from api.db.services.document_service import DocumentService
from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.canvas_service import UserCanvasService
from api.common.check_team_permission import check_kb_team_permission
from api.db.services.task_service import TaskService, cancel_all_task_of
from api.utils.api_utils import construct_json_result, get_data_error_result, get_error_data_result, get_result, get_json_result, \
@@ -1749,6 +1751,31 @@ ARTIFACT_CONTENT_TYPES = {
}
@DB.connection_context()
def _sandbox_artifact_dialog_ids_for_user(filename: str, user_id: str) -> list[str]:
"""Return agent dialog IDs for sessions owned by *user_id* that reference *filename*."""
if not filename:
return []
artifact_ref = f"documents/artifact/{filename}"
rows = (
API4Conversation.select(API4Conversation.dialog_id)
.where(
((API4Conversation.user_id == user_id) | (API4Conversation.exp_user_id == user_id)),
(API4Conversation.message.contains(filename) | API4Conversation.message.contains(artifact_ref)),
)
.distinct()
)
return [row.dialog_id for row in rows if row.dialog_id]
def _sandbox_artifact_accessible(filename: str, user_id: str) -> bool:
"""True when a CodeExec sandbox artifact belongs to an agent session the user may access."""
for dialog_id in _sandbox_artifact_dialog_ids_for_user(filename, user_id):
if UserCanvasService.accessible(dialog_id, user_id):
return True
return False
@manager.route("/documents/artifact/<filename>", methods=["GET"]) # noqa: F821
@login_required
async def get_artifact(filename):
@@ -1785,6 +1812,8 @@ async def get_artifact(filename):
ext = os.path.splitext(basename)[1].lower()
if ext not in ARTIFACT_CONTENT_TYPES:
return get_data_error_result(message="Invalid file type.")
if not await thread_pool_exec(_sandbox_artifact_accessible, basename, current_user.id):
return get_data_error_result(message="Artifact not found.")
data = await thread_pool_exec(settings.STORAGE_IMPL.get, bucket, basename)
if not data:
return get_data_error_result(message="Artifact not found.")

View File

@@ -641,6 +641,79 @@ class TestDocumentMetadataUnit:
assert res["code"] == RetCode.DATA_ERROR
assert "Image not found" in res["message"]
@pytest.mark.p2
def test_get_artifact_denied_without_session_reference_unit(self, document_app_module, monkeypatch):
module = document_app_module
filename = "a1b2c3d4e5f6789012345678901234abcd.png"
monkeypatch.setattr(module, "_sandbox_artifact_dialog_ids_for_user", lambda *_args, **_kwargs: [])
res = _run(module.get_artifact(filename))
assert res["code"] == RetCode.DATA_ERROR
assert res["message"] == "Artifact not found."
@pytest.mark.p2
def test_get_artifact_denied_when_agent_not_accessible_unit(self, document_app_module, monkeypatch):
module = document_app_module
filename = "a1b2c3d4e5f6789012345678901234abcd.png"
monkeypatch.setattr(module, "_sandbox_artifact_dialog_ids_for_user", lambda *_args, **_kwargs: ["agent-1"])
monkeypatch.setattr(module.UserCanvasService, "accessible", lambda *_args, **_kwargs: False)
res = _run(module.get_artifact(filename))
assert res["code"] == RetCode.DATA_ERROR
assert res["message"] == "Artifact not found."
@pytest.mark.p2
def test_get_artifact_success_and_missing_blob_unit(self, document_app_module, monkeypatch):
module = document_app_module
filename = "a1b2c3d4e5f6789012345678901234abcd.png"
class _Headers(dict):
def set(self, key, value):
self[key] = value
class _ArtifactResponse:
def __init__(self, data):
self.data = data
self.headers = _Headers()
monkeypatch.setattr(module, "_sandbox_artifact_dialog_ids_for_user", lambda *_args, **_kwargs: ["agent-1"])
monkeypatch.setattr(module.UserCanvasService, "accessible", lambda *_args, **_kwargs: True)
async def fake_thread_pool_exec(fn, *args, **kwargs):
return fn(*args, **kwargs)
async def fake_make_response(data):
return _ArtifactResponse(data)
monkeypatch.setattr(module, "thread_pool_exec", fake_thread_pool_exec)
monkeypatch.setattr(module, "make_response", fake_make_response)
monkeypatch.setattr(
module,
"apply_safe_file_response_headers",
lambda response, content_type, extension: response.headers.update(
{"content_type": content_type, "extension": extension}
),
)
monkeypatch.setattr(
module.settings,
"STORAGE_IMPL",
SimpleNamespace(get=lambda *_args, **_kwargs: b"artifact-bytes"),
)
res = _run(module.get_artifact(filename))
assert isinstance(res, _ArtifactResponse)
assert res.data == b"artifact-bytes"
assert res.headers["content_type"] == "image/png"
monkeypatch.setattr(
module.settings,
"STORAGE_IMPL",
SimpleNamespace(get=lambda *_args, **_kwargs: None),
)
res = _run(module.get_artifact(filename))
assert res["code"] == RetCode.DATA_ERROR
assert res["message"] == "Artifact not found."
class TestDocumentBatchChangeStatus:
@pytest.mark.p2