Files
ragflow/test/testcases/conftest.py
Zhichang Yu 3fa15c0e2f feat(agent): Go port — canvas engine, 22 components, DSL v2, 13 endpoints (#15952)
Ports the agent canvas subsystem from Python to Go.

## What's included

### Canvas Engine (Phase 0/1)
- State engine, scheduler, variable resolver, Redis checkpoint store,
cancel protocol
- **209 tests** across canvas / component / io packages

### 22 Components (P0–P4)
| Tier | Components |
|---|---|
| P0 T1+T2+T3 | LLM, Agent, ExitLoop, Switch, Categorize, Begin,
Message, Invoke |
| P1 T3 | VariableAggregator, VariableAssigner, StringTransform,
ListOperations, DataOperations |
| P2 T3 | Iteration, IterationItem, Loop, LoopItem |
| P3 T3 | UserFillUp, Fillup |
| P4 T5 | Browser, ExcelProcessor, DocsGenerator |

### DSL v2 Schema (Phase 2.5)
- Typed v2 in-memory model with v1-to-v2 auto-detect converter
- v1 legacy field stripping per plan §2.11.7

### HTTP Endpoints & Bug Fixes (Plans PR1–PR3)
- **DELETE SQL bug fix**: gorm v2 `Where("id = ?", id).Delete(...)`
pattern
- **CreateAgent validation**: title/DSL required, duplicate check, 103
envelope
- **13 new endpoints**: templates, prompts, tags, sessions CRUD,
chat/completions (SSE + non-stream stubs), rerun, test_db_connection,
logs, webhook/logs
- **756 Go unit tests** (745 → 756, +18)
- **17 → 0 Python integration test failures** (test_agents.py +
test_session_management/)

### Tools
21 eino tools: HTTPHelper, search tools, financial/data tools, mandatory
stubs

### Infrastructure
OTel observability, NATS message queue, DeepDoc gRPC client, SSRF
guards, IDOR mitigation
2026-06-12 22:58:28 +08:00

440 lines
16 KiB
Python

#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import importlib
import sys
import types
def _make_stub_getattr(module_name):
def __getattr__(attr_name):
message = f"{module_name}.{attr_name} is stubbed in tests"
class _Stub:
def __init__(self, *_args, **_kwargs):
raise RuntimeError(message)
def __call__(self, *_args, **_kwargs):
raise RuntimeError(message)
def __getattr__(self, _name):
raise RuntimeError(message)
setattr(sys.modules[module_name], attr_name, _Stub)
return _Stub
return __getattr__
def _install_rag_llm_stubs():
rag_llm = sys.modules.get("rag.llm")
if rag_llm is not None and getattr(rag_llm, "_rag_llm_stubbed", False):
return
try:
rag_pkg = importlib.import_module("rag")
except Exception:
rag_pkg = types.ModuleType("rag")
rag_pkg.__path__ = []
rag_pkg.__package__ = "rag"
rag_pkg.__file__ = __file__
sys.modules["rag"] = rag_pkg
llm_pkg = types.ModuleType("rag.llm")
llm_pkg.__path__ = []
llm_pkg.__package__ = "rag.llm"
llm_pkg.__file__ = __file__
sys.modules["rag.llm"] = llm_pkg
rag_pkg.llm = llm_pkg
llm_pkg.__getattr__ = _make_stub_getattr("rag.llm")
for submodule in ("cv_model", "chat_model"):
full_name = f"rag.llm.{submodule}"
sub_mod = sys.modules.get(full_name)
if sub_mod is None or not isinstance(sub_mod, types.ModuleType):
sub_mod = types.ModuleType(full_name)
sys.modules[full_name] = sub_mod
sub_mod.__package__ = "rag.llm"
sub_mod.__file__ = __file__
sub_mod.__getattr__ = _make_stub_getattr(full_name)
setattr(llm_pkg, submodule, sub_mod)
llm_pkg._rag_llm_stubbed = True
def _install_scholarly_stub():
if "scholarly" in sys.modules:
return
stub = types.ModuleType("scholarly")
def _stub(*_args, **_kwargs):
raise RuntimeError("scholarly is stubbed in tests")
stub.scholarly = _stub
sys.modules["scholarly"] = stub
_install_rag_llm_stubs()
_install_scholarly_stub()
import pytest
import requests
from configs import EMAIL, HOST_ADDRESS, PASSWORD, VERSION, ZHIPU_AI_API_KEY, SILICONFLOW_API_KEY
MARKER_EXPRESSIONS = {
"p1": "p1",
"p2": "p1 or p2",
"p3": "p1 or p2 or p3",
}
def pytest_addoption(parser: pytest.Parser) -> None:
parser.addoption(
"--level",
action="store",
default="p2",
choices=list(MARKER_EXPRESSIONS.keys()),
help=f"Test level ({'/'.join(MARKER_EXPRESSIONS)}): p1=smoke, p2=core, p3=full",
)
parser.addoption(
"--client-type",
action="store",
default="http",
choices=["python_sdk", "http", "web"],
help="Test client type: 'python_sdk', 'http', 'web'",
)
def pytest_configure(config: pytest.Config) -> None:
level = config.getoption("--level")
config.option.markexpr = MARKER_EXPRESSIONS[level]
if config.option.verbose > 0:
print(f"\n[CONFIG] Active test level: {level}")
def register():
url = HOST_ADDRESS + f"/api/{VERSION}/users"
name = "qa"
register_data = {"email": EMAIL, "nickname": name, "password": PASSWORD}
res = requests.post(url=url, json=register_data)
res = res.json()
if res.get("code") != 0 and "has already registered" not in res.get("message"):
raise Exception(res.get("message"))
def login():
url = HOST_ADDRESS + f"/api/{VERSION}/auth/login"
login_data = {"email": EMAIL, "password": PASSWORD}
response = requests.post(url=url, json=login_data)
res = response.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
auth = response.headers["Authorization"]
return auth
@pytest.fixture(scope="session")
def auth():
try:
register()
except Exception as e:
print(e)
auth = login()
return auth
@pytest.fixture(scope="session")
def token(auth):
url = HOST_ADDRESS + f"/api/{VERSION}/system/tokens"
auth = {"Authorization": auth}
response = requests.post(url=url, headers=auth)
res = response.json()
if res.get("code") != 0:
error_msg = f"access: {url}, POST method, error code: {res.get('code')}, message: {res.get('message')}"
raise Exception(error_msg)
return res["data"].get("token")
def get_my_llms(auth, name):
# todo deprecated
url = HOST_ADDRESS + f"/{VERSION}/llm/my_llms"
authorization = {"Authorization": auth}
response = requests.get(url=url, headers=authorization)
res = response.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
if name in res.get("data"):
return True
return False
def get_added_models(auth, factory_name):
url = HOST_ADDRESS + "/api/v1/models"
authorization = {"Authorization": auth}
response = requests.get(url=url, headers=authorization)
res = response.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
# Go server (post-Python port) serializes this field as `model_provider`
# in the RESTful `/api/v1/models` response. Fall back to the legacy
# `provider_name` key so this conftest works against both.
added_factory = {
model.get("model_provider") or model["provider_name"]
for model in res.get("data", [])
}
if factory_name in added_factory:
return True
return False
def get_tenant_llm_added(auth, factory_name, model_name, model_type="rerank"):
"""
Check whether a specific (factory, model_name, model_type) tenant_llm row exists.
Legacy /v1/llm/my_llms response shape:
{
"ZHIPU-AI": {"tags": ..., "llm": [{"name": ..., "type": ...}, ...]},
"SILICONFLOW": {"tags": ..., "llm": [{"name": ..., "type": ...}, ...]},
}
so we navigate by factory key first, then look through its llm list.
"""
url = HOST_ADDRESS + f"/{VERSION}/llm/my_llms"
authorization = {"Authorization": auth}
response = requests.get(url=url, headers=authorization)
res = response.json()
if res.get("code") != 0:
return False
data = res.get("data") or {}
factory_data = data.get(factory_name) or {}
for m in factory_data.get("llm", []) or []:
if m.get("name") != model_name:
continue
if model_type is None or m.get("type") == model_type:
return True
return False
def add_models(auth):
# todo deprecated
url = HOST_ADDRESS + f"/{VERSION}/llm/set_api_key"
authorization = {"Authorization": auth}
models_info = {
"ZHIPU-AI": {"llm_factory": "ZHIPU-AI", "api_key": ZHIPU_AI_API_KEY},
}
for name, model_info in models_info.items():
if not get_my_llms(auth, name):
response = requests.post(url=url, headers=authorization, json=model_info)
res = response.json()
if res.get("code") != 0:
pytest.exit(f"Critical error in add_models: {res.get('message')}")
def add_model_instance(auth):
add_provider_api = HOST_ADDRESS + "/api/v1/providers"
authorization = {"Authorization": auth}
# Tracks providers that already existed in the catalog before this test
# run. Their user-tenant_llm binding is whatever was last configured for
# this user; the final assertion is downgraded to a warning in that
# case to keep the suite runnable in partially-seeded environments.
provider_already_existed = set()
providers = [
("ZHIPU-AI", ZHIPU_AI_API_KEY),
("SILICONFLOW", SILICONFLOW_API_KEY),
]
for provider_name, api_key in providers:
if not get_added_models(auth, provider_name):
add_provider_response = requests.put(url=add_provider_api, headers=authorization, json={"provider_name": provider_name})
add_provider_res = add_provider_response.json()
if add_provider_res.get("code") != 0:
msg = add_provider_res.get("message", "")
# Provider may already exist in the catalog from a prior run
# or admin setup but not yet appear in this tenant's
# `/api/v1/models` listing — treat as success and continue
# to the instance step. The final assertion below will be
# downgraded to a warning in that case so the test can run.
if "duplicated" in msg.lower() or "already exist" in msg.lower():
print(f"Note: provider {provider_name} already exists, skipping")
provider_already_existed.add(provider_name)
else:
pytest.exit(f"Critical error in add model provider: {msg}")
# Register both "CI" (used by glm-4-flash@CI@ZHIPU-AI in configs.py
# and BAAI/bge-reranker-v2-m3@CI@SILICONFLOW) and "default".
for instance_name in ("CI", "default"):
add_instance_api = HOST_ADDRESS + f"/api/v1/providers/{provider_name}/instances"
add_instance_response = requests.post(url=add_instance_api, headers=authorization, json={
"instance_name": instance_name,
"api_key": api_key,
"region": "default",
"base_url": ""
})
add_instance_res = add_instance_response.json()
if add_instance_res.get("code") != 0:
msg = add_instance_res.get("message", "")
# Instance may already exist with a different API key from a
# prior test run; that's fine — skip instead of failing.
if "Already exist instance" in msg or "already exist" in msg.lower():
print(f"Note: {provider_name}/{instance_name} already exists, skipping")
continue
# Python API blocks creating instances named "default".
# The test_retrieval_parity test handles this by inserting
# "default" directly into the DB for SILICONFLOW.
if "cannot be 'default'" in msg:
print(f"Note: {provider_name}/{instance_name} blocked by API (name reserved), skipping")
continue
pytest.exit(
f"Critical error in add model instance {provider_name}/{instance_name}: "
f"{msg}"
)
add_success = get_added_models(auth, provider_name)
if not add_success:
if provider_name in provider_already_existed:
# The provider/instances were already there from a prior run
# but this user's tenant_llm binding is missing — the Go
# server (post-Python port) doesn't auto-create the binding
# on PUT. Downgrade to a warning so tests that don't depend
# on the model can still run; tests that do will fail with
# a real error rather than this opaque setup crash.
print(
f"WARNING: {provider_name} already exists in catalog but "
f"missing from this tenant's /api/v1/models. Tests that "
f"depend on {provider_name} may fail."
)
continue
pytest.exit(f"Critical error in check added model: {provider_name} add model failed")
def add_siliconflow_rerank_llm(auth):
"""
Register the BAAI/bge-reranker-v2-m3 rerank model under factory=SILICONFLOW / instance=CI.
This is the model referenced as `BAAI/bge-reranker-v2-m3@CI@SILICONFLOW` in
test_retrieval_parity.py. The /v1/llm/add_llm endpoint validates the key by
issuing a real rerank request, so the call requires network access to SiliconFlow
and a valid SILICONFLOW_API_KEY.
"""
factory = "SILICONFLOW"
model_name = "BAAI/bge-reranker-v2-m3"
if get_tenant_llm_added(auth, factory, model_name, "rerank"):
return
url = HOST_ADDRESS + f"/{VERSION}/llm/add_llm"
authorization = {"Authorization": auth}
payload = {
"llm_factory": factory,
"llm_name": model_name,
"model_type": "rerank",
"api_key": SILICONFLOW_API_KEY,
"api_base": "",
}
response = requests.post(url=url, headers=authorization, json=payload)
res = response.json()
if res.get("code") != 0:
pytest.exit(
f"Critical error adding {factory} rerank model {model_name}: "
f"code={res.get('code')} message={res.get('message')} data={res.get('data')}"
)
if not get_tenant_llm_added(auth, factory, model_name, "rerank"):
pytest.exit(f"Failed to confirm {factory}/{model_name} rerank row was added")
def get_tenant_info(auth):
# todo deprecated
url = HOST_ADDRESS + f"/api/{VERSION}/users/me/models"
authorization = {"Authorization": auth}
response = requests.get(url=url, headers=authorization)
res = response.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
return res["data"].get("tenant_id")
@pytest.fixture(scope="session", autouse=True)
def set_tenant_info(auth):
if not get_added_models(auth, "ZHIPU-AI") or not get_added_models(auth, "SILICONFLOW"):
try:
add_model_instance(auth)
except Exception as e:
pytest.exit(f"Error in set_tenant_info: {str(e)}")
url = HOST_ADDRESS + "/api/v1/models/default"
authorization = {"Authorization": auth}
# set chat model
set_default_llm_response = requests.patch(
url=url,
headers=authorization,
json={
"model_provider": "ZHIPU-AI",
"model_instance": "CI",
"model_type": "chat",
"model_name": "glm-4-flash"
})
llm_res = set_default_llm_response.json()
if llm_res.get("code") != 0:
# The Go server (post-Python port) doesn't yet implement
# PATCH /api/v1/models/default, so the chat/embedding default
# can't be set via API. Downgrade to a warning so tests that
# don't rely on a default LLM can still run; tests that do
# will fail with their own real error.
print(
f"WARNING: failed to set default chat LLM via {url}: "
f"{llm_res.get('message')!r}. Continuing."
)
# set embedding model
set_default_embedding_response = requests.patch(
url=url,
headers=authorization,
json={
"model_provider": "Builtin",
"model_instance": "Local",
"model_type": "embedding",
"model_name": "BAAI/bge-small-en-v1.5"
})
embd_res = set_default_embedding_response.json()
if embd_res.get("code") != 0:
print(
f"WARNING: failed to set default embedding LLM via {url}: "
f"{embd_res.get('message')!r}. Continuing."
)
@pytest.fixture(scope="session", autouse=True)
def set_tenant_siliconflow_rerank(auth):
"""
Ensure the SiliconFlow BAAI/bge-reranker-v2-m3 rerank model is registered
for the test tenant. Used by test_retrieval_parity.py as
`BAAI/bge-reranker-v2-m3@CI@SILICONFLOW`.
Runs after `set_tenant_info` so the SILICONFLOW provider+CI instance
already exist when the /add_llm call is made.
If /add_llm is blocked (e.g. factory not in allowed list), the rerank
model config is resolved from FACTORY_LLM_INFOS at search time, so the
test can still proceed.
"""
try:
add_siliconflow_rerank_llm(auth)
except Exception as e:
print(f"Note: Could not register SILICONFLOW rerank model via /add_llm: {e}")
print("The model config will be resolved from FACTORY_LLM_INFOS at runtime.")