diff --git a/.github/workflows/sep-tests.yml b/.github/workflows/sep-tests.yml new file mode 100644 index 0000000000..611119154b --- /dev/null +++ b/.github/workflows/sep-tests.yml @@ -0,0 +1,1114 @@ +name: sep-tests +permissions: + contents: read + +on: + push: + branches: + - 'main' + - '*.*.*' + paths-ignore: + - 'docs/**' + - '*.md' + - '*.mdx' + # The only difference between pull_request and pull_request_target is the context in which the workflow runs: + # — pull_request_target workflows use the workflow files from the default branch, and secrets are available. + # — pull_request workflows use the workflow files from the pull request branch, and secrets are unavailable. + pull_request: + types: [opened, synchronize, reopened, ready_for_review, labeled] + paths-ignore: + - 'docs/**' + - '*.md' + - '*.mdx' + schedule: + - cron: '0 16 * * *' # This schedule runs every 16:00:00Z(00:00:00+08:00) + +# https://docs.github.com/en/actions/using-jobs/using-concurrency +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + ragflow_preflight: + name: ragflow_preflight + # https://docs.github.com/en/actions/using-jobs/using-conditions-to-control-job-execution + # https://github.com/orgs/community/discussions/26261 + if: ${{ github.event_name != 'pull_request' || (github.event.pull_request.draft == false && contains(github.event.pull_request.labels.*.name, 'ci') && (github.event.action != 'labeled' || github.event.label.name == 'ci')) }} + runs-on: [ "self-hosted", "ragflow-test" ] + outputs: + http_api_test_level: ${{ steps.test_level.outputs.http_api_test_level }} + has_go_changes: ${{ steps.detect_changes.outputs.has_go_changes }} + has_python_changes: ${{ steps.detect_changes.outputs.has_python_changes }} + steps: + - name: Ensure workspace ownership + run: | + echo "Workflow triggered by ${{ github.event_name }}" + echo "chown -R ${USER} ${GITHUB_WORKSPACE}" && sudo chown -R ${USER} ${GITHUB_WORKSPACE} + + # https://github.com/actions/checkout/issues/1781 + - name: Check out code + uses: actions/checkout@v6 + with: + ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && format('refs/pull/{0}/merge', github.event.pull_request.number) || github.sha }} + fetch-depth: 0 + fetch-tags: true + + - name: Check workflow duplication + if: ${{ !cancelled() && !failure() }} + run: | + if [[ ${GITHUB_EVENT_NAME} != "pull_request" && ${GITHUB_EVENT_NAME} != "schedule" ]]; then + HEAD=$(git rev-parse HEAD) + # Find a PR that introduced a given commit + gh auth login --with-token <<< "${{ secrets.GITHUB_TOKEN }}" + PR_NUMBER=$(gh pr list --search ${HEAD} --state merged --json number --jq .[0].number) + echo "HEAD=${HEAD}" + echo "PR_NUMBER=${PR_NUMBER}" + if [[ -n "${PR_NUMBER}" ]]; then + PR_SHA_FP=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/PR_${PR_NUMBER} + if [[ -f "${PR_SHA_FP}" ]]; then + read -r PR_SHA PR_RUN_ID < "${PR_SHA_FP}" + # Calculate the hash of the current workspace content + HEAD_SHA=$(git rev-parse HEAD^{tree}) + if [[ "${HEAD_SHA}" == "${PR_SHA}" ]]; then + echo "Cancel myself since the workspace content hash is the same with PR #${PR_NUMBER} merged. See ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${PR_RUN_ID} for details." + gh run cancel ${GITHUB_RUN_ID} + while true; do + status=$(gh run view ${GITHUB_RUN_ID} --json status -q .status) + [ "${status}" = "completed" ] && break + sleep 5 + done + exit 1 + fi + fi + fi + elif [[ ${GITHUB_EVENT_NAME} == "pull_request" ]]; then + PR_NUMBER=${{ github.event.pull_request.number }} + PR_SHA_FP=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/PR_${PR_NUMBER} + # Calculate the hash of the current workspace content + PR_SHA=$(git rev-parse HEAD^{tree}) + echo "PR #${PR_NUMBER} workspace content hash: ${PR_SHA}" + mkdir -p ${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY} + echo "${PR_SHA} ${GITHUB_RUN_ID}" > ${PR_SHA_FP} + fi + ARTIFACTS_DIR=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/${GITHUB_RUN_ID} + echo "ARTIFACTS_DIR=${ARTIFACTS_DIR}" >> ${GITHUB_ENV} + rm -rf ${ARTIFACTS_DIR} && mkdir -p ${ARTIFACTS_DIR} + +# - name: Check comments of changed Python files +# if: ${{ false }} +# run: | +# if [[ ${{ github.event_name }} == 'pull_request' || ${{ github.event_name }} == 'pull_request_target' ]]; then +# CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }} \ +# | grep -E '\.(py)$' || true) +# +# if [ -n "$CHANGED_FILES" ]; then +# echo "Check comments of changed Python files with check_comment_ascii.py" +# +# readarray -t files <<< "$CHANGED_FILES" +# HAS_ERROR=0 +# +# for file in "${files[@]}"; do +# if [ -f "$file" ]; then +# if python3 check_comment_ascii.py "$file"; then +# echo "✅ $file" +# else +# echo "❌ $file" +# HAS_ERROR=1 +# fi +# fi +# done +# +# if [ $HAS_ERROR -ne 0 ]; then +# exit 1 +# fi +# else +# echo "No Python files changed" +# fi +# fi + + - name: Run Lefthook on changed files + run: | + set -euo pipefail + if [[ "${GITHUB_EVENT_NAME}" == "pull_request" || "${GITHUB_EVENT_NAME}" == "pull_request_target" ]]; then + changed_files=$(mktemp) + trap 'rm -f "$changed_files"' EXIT + git diff --name-only ${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }} \ + | while read -r file; do + [[ -f "$file" ]] && printf '%s\0' "$file" + done > "$changed_files" + echo "Changed files to run lefthook on:" + if [[ -s "$changed_files" ]]; then + tr '\0' '\n' < "$changed_files" | sed 's/^/ /' + else + echo " (none — lefthook will be a no-op)" + fi + lefthook run pre-commit --files-from-stdin --no-auto-install < "$changed_files" + fi + + - name: Set test level + id: test_level + run: | + set -euo pipefail + if [[ ${GITHUB_EVENT_NAME} == "schedule" ]]; then + export HTTP_API_TEST_LEVEL=p3 + else + export HTTP_API_TEST_LEVEL=p2 + fi + echo "HTTP_API_TEST_LEVEL=${HTTP_API_TEST_LEVEL}" >> ${GITHUB_ENV} + echo "http_api_test_level=${HTTP_API_TEST_LEVEL}" >> ${GITHUB_OUTPUT} + + - name: Detect changed file types + id: detect_changes + run: | + set -euo pipefail + has_go=false + has_python=false + + if [[ "${GITHUB_EVENT_NAME}" == "schedule" ]]; then + has_go=true + has_python=true + else + if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then + CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}) + else + CHANGED_FILES=$(git diff --name-only HEAD~1 HEAD) + fi + + while IFS= read -r file; do + case "$file" in + *.go|go.mod|go.sum) has_go=true ;; + *.py|pyproject.toml|requirements*.txt) has_python=true ;; + Dockerfile|docker-compose*.yml|build.sh) has_go=true; has_python=true ;; + esac + done <<< "$CHANGED_FILES" + + if [[ "$has_go" == "false" && "$has_python" == "false" ]]; then + has_go=true + has_python=true + fi + fi + + echo "has_go_changes=${has_go}" >> $GITHUB_OUTPUT + echo "has_python_changes=${has_python}" >> $GITHUB_OUTPUT + echo "Go: ${has_go}, Python: ${has_python}" + + - name: Prepare Python test environment + if: steps.detect_changes.outputs.has_python_changes == 'true' + run: | + uv sync --python 3.13 --group test --frozen + uv pip install -e sdk/python + + - name: Run unit test + if: steps.detect_changes.outputs.has_python_changes == 'true' + run: | + source .venv/bin/activate + which pytest || echo "pytest not in PATH" + echo "Start to run unit test" + python3 run_tests.py -i + + + + ragflow_tests_infinity: + name: ragflow_tests_infinity + needs: ragflow_preflight + if: ${{ github.event_name != 'pull_request' || (github.event.pull_request.draft == false && contains(github.event.pull_request.labels.*.name, 'ci') && (github.event.action != 'labeled' || github.event.label.name == 'ci')) }} + runs-on: [ "self-hosted", "ragflow-test" ] + env: + DOC_ENGINE: infinity + RAGFLOW_IMAGE: infiniflow/ragflow:${{ github.run_id }}-infinity + HTTP_API_TEST_LEVEL: ${{ needs.ragflow_preflight.outputs.http_api_test_level }} + HAS_GO: ${{ needs.ragflow_preflight.outputs.has_go_changes }} + HAS_PYTHON: ${{ needs.ragflow_preflight.outputs.has_python_changes }} + steps: + - name: Ensure workspace ownership + if: always() + run: | + echo "Workflow triggered by ${{ github.event_name }}" + echo "chown -R ${USER} ${GITHUB_WORKSPACE}" && sudo chown -R ${USER} ${GITHUB_WORKSPACE} + + - name: Check out code + if: always() + uses: actions/checkout@v6 + with: + ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && format('refs/pull/{0}/merge', github.event.pull_request.number) || github.sha }} + fetch-depth: 0 + fetch-tags: true + + - name: Build ragflow go server + if: env.HAS_GO == 'true' + run: | + set -euo pipefail + BUILDER_CONTAINER=ragflow_build_${GITHUB_RUN_ID}_${DOC_ENGINE}_$(od -An -N4 -tx4 /dev/urandom | tr -d ' ') + cleanup_builder() { + if [[ -n "${BUILDER_CONTAINER:-}" ]]; then + sudo docker rm -f -v "${BUILDER_CONTAINER}" >/dev/null 2>&1 || true + fi + } + trap cleanup_builder EXIT + + TZ=${TZ:-$(readlink -f /etc/localtime | awk -F '/zoneinfo/' '{print $2}')} + sudo docker run --privileged -d --name "${BUILDER_CONTAINER}" \ + -e TZ="${TZ}" \ + -e UV_INDEX=https://mirrors.aliyun.com/pypi/simple \ + -v "${PWD}:/ragflow" \ + -v "${PWD}/internal/cpp/resource:/usr/share/infinity/resource" \ + infiniflow/infinity_builder:ubuntu22_clang20 + sudo docker exec "${BUILDER_CONTAINER}" bash -c 'git config --global safe.directory "*" && cd /ragflow && ./build.sh --cpp' + ./build.sh --go + + - name: Run Go unit tests + if: env.HAS_GO == 'true' + # Runs after `./build.sh --go`, which guarantees the C++ static + # library (librag_tokenizer_c_api.a) is present on disk. The Go + # test binaries link against it transitively through + # `internal/binding`, so running `go test` before the C++ build + # fails the link step. + # + # Excludes packages whose tests fail for environmental reasons + # unrelated to the diff: + # - internal/storage: TestMinioStorage_* needs a MinIO server + # at localhost:9000; not started by this job. + # - internal/tokenizer: tests need /usr/share/infinity/resource + # dict files, only mounted inside the docker builder, not + # in the Go test environment. + # - internal/handler: TestListAgentVersionsHandler_Success and + # sqlite setup (e.g. "no such table: user_tenant") are + # pre-existing flakes unrelated to the diff. + run: | + set -euo pipefail + PKGS=$(go list ./... 2>/dev/null \ + | grep -v '/internal/storage$' \ + | grep -v '/internal/tokenizer$' \ + | grep -v '/internal/handler$' || true) + if [ -z "$PKGS" ]; then + ./build.sh --test + else + ./build.sh --test -- $PKGS + fi + + - name: Build ragflow:nightly + if: env.HAS_PYTHON == 'true' + run: | + set -euo pipefail + sudo docker pull ubuntu:24.04 + sudo DOCKER_BUILDKIT=1 docker build --build-arg NEED_MIRROR=1 --build-arg HTTPS_PROXY=${HTTPS_PROXY} --build-arg HTTP_PROXY=${HTTP_PROXY} -f Dockerfile -t ${RAGFLOW_IMAGE} . + + - name: Prepare Python test environment + if: env.HAS_PYTHON == 'true' + run: | + uv sync --python 3.13 --group test --frozen + uv pip install -e sdk/python + + - name: Prepare function test environment + if: env.HAS_PYTHON == 'true' + working-directory: docker + run: | + set -euo pipefail + # install ss + sudo apt update && sudo apt install -y iproute2 + RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-${HOME}} + COMPOSE_PROJECT_NAME="${GITHUB_RUN_ID}-${DOC_ENGINE}" + echo "COMPOSE_PROJECT_NAME=${COMPOSE_PROJECT_NAME}" >> ${GITHUB_ENV} + echo "RAGFLOW_CONTAINER=${COMPOSE_PROJECT_NAME}-ragflow-cpu-1" >> ${GITHUB_ENV} + ARTIFACTS_DIR=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/${GITHUB_RUN_ID}/${DOC_ENGINE} + echo "ARTIFACTS_DIR=${ARTIFACTS_DIR}" >> ${GITHUB_ENV} + rm -rf "${ARTIFACTS_DIR}" && mkdir -p "${ARTIFACTS_DIR}" + + # Determine runner number (default to 1 if not found) + RUNNER_NUM=$(sudo docker inspect $(hostname) --format '{{index .Config.Labels "com.docker.compose.container-number"}}' 2>/dev/null || true) + RUNNER_NUM=${RUNNER_NUM:-1} + + # Engine-specific offset partitions keep concurrent engine jobs from + # choosing the same host ports when they land on the same self-hosted runner. + # A lock plus reservation file closes the check/start race between parallel jobs. + PORT_BASES=(1200 1201 23817 23820 5432 5455 9000 9001 6379 6380 6601 9380 9381 9382 9384 9383 9385 80 443 4222) + PARTITION_SIZE=6000 + case "${DOC_ENGINE}" in + elasticsearch) PARTITION_BASE=1000 ;; + infinity) PARTITION_BASE=31000 ;; + *) echo "Unsupported DOC_ENGINE=${DOC_ENGINE}" >&2; exit 1 ;; + esac + PORT_LOCK_DIR=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/port-locks + mkdir -p "${PORT_LOCK_DIR}" + + port_offset_available() { + local offset=$1 + local base port + for base in "${PORT_BASES[@]}"; do + port=$((base + offset)) + if ss -ltnH "sport = :${port}" | grep -q .; then + return 1 + fi + done + return 0 + } + + cleanup_stale_port_locks() { + local now stale_after lock lock_ts + now=$(date -u +%s) + stale_after=$((6 * 60 * 60)) + for lock in "${PORT_LOCK_DIR}"/*.lock; do + [[ -e "${lock}" ]] || continue + lock_ts=$(awk '{print $3}' "${lock}" 2>/dev/null || true) + if [[ "${lock_ts}" =~ ^[0-9]+$ ]] && (( now - lock_ts > stale_after )); then + rm -f "${lock}" + fi + done + } + + reserve_port_offset() { + local attempt candidate reservation + cleanup_stale_port_locks + for attempt in $(seq 0 59); do + candidate=$(( PARTITION_BASE + ((GITHUB_RUN_ID + RUNNER_NUM * 1000 + attempt * 97) % PARTITION_SIZE) )) + reservation="${PORT_LOCK_DIR}/${candidate}.lock" + if ( set -o noclobber; echo "${GITHUB_RUN_ID} ${DOC_ENGINE} $(date -u +%s)" > "${reservation}" ) 2>/dev/null; then + if port_offset_available "${candidate}"; then + PORT_OFFSET=${candidate} + PORT_RESERVATION=${reservation} + return 0 + fi + rm -f "${reservation}" + fi + done + return 1 + } + + if ! reserve_port_offset; then + echo "Failed to reserve a free host port range for ${DOC_ENGINE} docker compose" >&2 + exit 1 + fi + echo "PORT_RESERVATION=${PORT_RESERVATION}" >> ${GITHUB_ENV} + echo "Using ${DOC_ENGINE} host port offset ${PORT_OFFSET}" + ES_PORT=$((1200 + PORT_OFFSET)) + OS_PORT=$((1201 + PORT_OFFSET)) + INFINITY_THRIFT_PORT=$((23817 + PORT_OFFSET)) + INFINITY_HTTP_PORT=$((23820 + PORT_OFFSET)) + INFINITY_PSQL_PORT=$((5432 + PORT_OFFSET)) + EXPOSE_MYSQL_PORT=$((5455 + PORT_OFFSET)) + MINIO_PORT=$((9000 + PORT_OFFSET)) + MINIO_CONSOLE_PORT=$((9001 + PORT_OFFSET)) + REDIS_PORT=$((6379 + PORT_OFFSET)) + NATS_PORT=$((4222 + PORT_OFFSET)) + TEI_PORT=$((6380 + PORT_OFFSET)) + KIBANA_PORT=$((6601 + PORT_OFFSET)) + SVR_HTTP_PORT=$((9380 + PORT_OFFSET)) + ADMIN_SVR_HTTP_PORT=$((9381 + PORT_OFFSET)) + SVR_MCP_PORT=$((9382 + PORT_OFFSET)) + GO_HTTP_PORT=$((9384 + PORT_OFFSET)) + GO_ADMIN_PORT=$((9383 + PORT_OFFSET)) + SANDBOX_EXECUTOR_MANAGER_PORT=$((9385 + PORT_OFFSET)) + SVR_WEB_HTTP_PORT=$((80 + PORT_OFFSET)) + SVR_WEB_HTTPS_PORT=$((443 + PORT_OFFSET)) + + # Persist computed ports into .env so docker-compose uses the correct host bindings. + # Remove previous CI overrides first; docker compose uses the last duplicate key. + sed -i '/^ES_PORT=/d;/^OS_PORT=/d;/^INFINITY_THRIFT_PORT=/d;/^INFINITY_HTTP_PORT=/d;/^INFINITY_PSQL_PORT=/d;/^EXPOSE_MYSQL_PORT=/d;/^MINIO_PORT=/d;/^MINIO_CONSOLE_PORT=/d;/^REDIS_PORT=/d;/^TEI_PORT=/d;/^KIBANA_PORT=/d;/^SVR_HTTP_PORT=/d;/^ADMIN_SVR_HTTP_PORT=/d;/^SVR_MCP_PORT=/d;/^GO_HTTP_PORT=/d;/^GO_ADMIN_PORT=/d;/^SANDBOX_EXECUTOR_MANAGER_PORT=/d;/^SVR_WEB_HTTP_PORT=/d;/^SVR_WEB_HTTPS_PORT=/d;/^NATS_PORT=/d;/^COMPOSE_PROFILES=/d;/^TEI_MODEL=/d;/^RAGFLOW_IMAGE=/d;/^DOC_ENGINE=/d' .env + { + echo "" + echo "ES_PORT=${ES_PORT}" + echo "OS_PORT=${OS_PORT}" + echo "INFINITY_THRIFT_PORT=${INFINITY_THRIFT_PORT}" + echo "INFINITY_HTTP_PORT=${INFINITY_HTTP_PORT}" + echo "INFINITY_PSQL_PORT=${INFINITY_PSQL_PORT}" + echo "EXPOSE_MYSQL_PORT=${EXPOSE_MYSQL_PORT}" + echo "MINIO_PORT=${MINIO_PORT}" + echo "MINIO_CONSOLE_PORT=${MINIO_CONSOLE_PORT}" + echo "REDIS_PORT=${REDIS_PORT}" + echo "NATS_PORT=${NATS_PORT}" + echo "TEI_PORT=${TEI_PORT}" + echo "KIBANA_PORT=${KIBANA_PORT}" + echo "SVR_HTTP_PORT=${SVR_HTTP_PORT}" + echo "ADMIN_SVR_HTTP_PORT=${ADMIN_SVR_HTTP_PORT}" + echo "SVR_MCP_PORT=${SVR_MCP_PORT}" + echo "GO_HTTP_PORT=${GO_HTTP_PORT}" + echo "GO_ADMIN_PORT=${GO_ADMIN_PORT}" + echo "SANDBOX_EXECUTOR_MANAGER_PORT=${SANDBOX_EXECUTOR_MANAGER_PORT}" + echo "SVR_WEB_HTTP_PORT=${SVR_WEB_HTTP_PORT}" + echo "SVR_WEB_HTTPS_PORT=${SVR_WEB_HTTPS_PORT}" + echo "COMPOSE_PROFILES=${DOC_ENGINE},cpu,tei-cpu,deepdoc" + echo "TEI_MODEL=BAAI/bge-small-en-v1.5" + echo "RAGFLOW_IMAGE=${RAGFLOW_IMAGE}" + echo "DOC_ENGINE=${DOC_ENGINE}" + } >> .env + echo "HOST_ADDRESS=http://host.docker.internal:${SVR_HTTP_PORT}" >> ${GITHUB_ENV} + + # Patch entrypoint.sh for coverage + sed -i '/"\$PY" api\/ragflow_server.py \${INIT_SUPERUSER_ARGS} &/c\ echo "Ensuring coverage is installed..."\n "$PY" -m pip install coverage -i https://mirrors.aliyun.com/pypi/simple\n export COVERAGE_FILE=/ragflow/logs/.coverage\n echo "Starting ragflow_server with coverage..."\n "$PY" -m coverage run --source=./api/apps --omit="*/tests/*,*/migrations/*" -a api/ragflow_server.py ${INIT_SUPERUSER_ARGS} &' ./entrypoint.sh + + + - name: Start ragflow:nightly for Infinity + if: env.HAS_PYTHON == 'true' + run: | + sudo docker compose -f docker/docker-compose.yml -p ${COMPOSE_PROJECT_NAME} down -v || true + sudo docker ps -a --filter "label=com.docker.compose.project=${COMPOSE_PROJECT_NAME}" -q | xargs -r sudo docker rm -f + sudo docker compose -f docker/docker-compose.yml -p ${COMPOSE_PROJECT_NAME} up -d + + - name: Run sdk tests against Infinity + if: env.HAS_PYTHON == 'true' + run: | + export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" + svc_ready=0 + for i in $(seq 1 60); do + if sudo docker exec ${RAGFLOW_CONTAINER} curl -sf --connect-timeout 5 "${HOST_ADDRESS}/api/v1/system/ping" > /dev/null 2>&1; then + svc_ready=1 + break + fi + echo "Waiting for service to be available... ($i/60)" + sleep 5 + done + if [ "$svc_ready" -ne 1 ]; then + echo "Service did not become ready after 5 minutes. Docker logs:" + sudo docker logs ${RAGFLOW_CONTAINER} + exit 1 + fi + echo "Start to run test sdk on Infinity" + source .venv/bin/activate && set -o pipefail; DOC_ENGINE=infinity pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} --junitxml=pytest-infinity-sdk.xml --cov=sdk/python/ragflow_sdk --cov-branch --cov-report=xml:coverage-infinity-sdk.xml test/testcases/test_sdk_api 2>&1 | tee infinity_sdk_test.log + + - name: Run New RESTFUL api tests against Infinity + if: env.HAS_PYTHON == 'true' + run: | + export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" + svc_ready=0 + for i in $(seq 1 60); do + if sudo docker exec ${RAGFLOW_CONTAINER} curl -sf --connect-timeout 5 "${HOST_ADDRESS}/api/v1/system/ping" > /dev/null 2>&1; then + svc_ready=1 + break + fi + echo "Waiting for service to be available... ($i/60)" + sleep 5 + done + if [ "$svc_ready" -ne 1 ]; then + echo "Service did not become ready after 5 minutes. Docker logs:" + sudo docker logs ${RAGFLOW_CONTAINER} + exit 1 + fi + source .venv/bin/activate && set -o pipefail; DOC_ENGINE=infinity pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/restful_api 2>&1 | tee infinity_restful_api_test.log + + - name: RAGFlow CLI retrieval test Infinity + if: env.HAS_PYTHON == 'true' + env: + PYTHONPATH: ${{ github.workspace }} + run: | + set -euo pipefail + source .venv/bin/activate + + export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" + + EMAIL="ci-${GITHUB_RUN_ID}@example.com" + PASS="ci-pass-${GITHUB_RUN_ID}" + DATASET="ci_dataset_${GITHUB_RUN_ID}" + + CLI="python admin/client/ragflow_cli.py" + + LOG_FILE="infinity_cli_test.log" + : > "${LOG_FILE}" + + ERROR_RE='Traceback|ModuleNotFoundError|ImportError|Parse error|Bad response|Fail to|code:\\s*[1-9]' + run_cli() { + local logfile="$1" + shift + local allow_re="" + if [[ "${1:-}" == "--allow" ]]; then + allow_re="$2" + shift 2 + fi + local cmd_display="$*" + echo "===== $(date -u +\"%Y-%m-%dT%H:%M:%SZ\") CMD: ${cmd_display} =====" | tee -a "${logfile}" + local tmp_log + tmp_log="$(mktemp)" + set +e + timeout 500s "$@" 2>&1 | tee "${tmp_log}" + local status=${PIPESTATUS[0]} + set -e + cat "${tmp_log}" >> "${logfile}" + if grep -qiE "${ERROR_RE}" "${tmp_log}"; then + if [[ -n "${allow_re}" ]] && grep -qiE "${allow_re}" "${tmp_log}"; then + echo "Allowed CLI error markers in ${logfile}" + rm -f "${tmp_log}" + return 0 + fi + echo "Detected CLI error markers in ${logfile}" + rm -f "${tmp_log}" + exit 1 + fi + rm -f "${tmp_log}" + return ${status} + } + + set -a + source docker/.env + set +a + + HOST_ADDRESS="http://host.docker.internal:${SVR_HTTP_PORT}" + USER_HOST="$(echo "${HOST_ADDRESS}" | sed -E 's#^https?://([^:/]+).*#\1#')" + USER_PORT="${SVR_HTTP_PORT}" + ADMIN_HOST="${USER_HOST}" + ADMIN_PORT="${ADMIN_SVR_HTTP_PORT}" + + svc_ready=0 + for i in $(seq 1 60); do + if sudo docker exec ${RAGFLOW_CONTAINER} curl -sf --connect-timeout 5 "${HOST_ADDRESS}/api/v1/system/ping" > /dev/null 2>&1; then + svc_ready=1 + break + fi + echo "Waiting for service to be available... ($i/60)" + sleep 5 + done + if [ "$svc_ready" -ne 1 ]; then + echo "Service did not become ready after 5 minutes. Docker logs:" + sudo docker logs ${RAGFLOW_CONTAINER} + exit 1 + fi + + admin_ready=0 + for i in $(seq 1 30); do + if run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "ping"; then + admin_ready=1 + break + fi + sleep 1 + done + if [[ "${admin_ready}" -ne 1 ]]; then + echo "Admin service did not become ready" + exit 1 + fi + + run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "show version" + ALLOW_USER_EXISTS_RE='already exists|already exist|duplicate|already.*registered|exist(s)?' + run_cli "${LOG_FILE}" --allow "${ALLOW_USER_EXISTS_RE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "create user '$EMAIL' '$PASS'" + + user_ready=0 + for i in $(seq 1 30); do + if run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "ping"; then + user_ready=1 + break + fi + sleep 1 + done + if [[ "${user_ready}" -ne 1 ]]; then + echo "User service did not become ready" + exit 1 + fi + + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "show version" + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "create dataset '$DATASET' with embedding 'BAAI/bge-small-en-v1.5@Builtin' parser 'auto'" + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "import 'test/benchmark/test_docs/Doc1.pdf,test/benchmark/test_docs/Doc2.pdf' into dataset '$DATASET'" + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "parse dataset '$DATASET' sync" + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "Benchmark 16 100 search 'what are these documents about' on datasets '$DATASET'" + + - name: Stop ragflow to save coverage Infinity + if: ${{ !cancelled() && env.HAS_PYTHON == 'true' }} + run: | + # Send SIGINT to ragflow_server.py to trigger coverage save + PID=$(sudo docker exec ${RAGFLOW_CONTAINER} ps aux | grep "ragflow_server.py" | grep -v grep | awk '{print $2}' | head -n 1) + if [ -n "$PID" ]; then + echo "Sending SIGINT to ragflow_server.py (PID: $PID)..." + sudo docker exec ${RAGFLOW_CONTAINER} kill -INT $PID + # Wait for process to exit and coverage file to be written + sleep 10 + else + echo "ragflow_server.py not found!" + fi + sudo docker compose -f docker/docker-compose.yml -p ${COMPOSE_PROJECT_NAME} stop + + - name: Generate server coverage report Infinity + if: ${{ !cancelled() && env.HAS_PYTHON == 'true' }} + run: | + # .coverage file should be in docker/ragflow-logs/.coverage + if [ -f docker/ragflow-logs/.coverage ]; then + echo "Found .coverage file" + cp docker/ragflow-logs/.coverage .coverage + source .venv/bin/activate + # Create .coveragerc to map container paths to host paths + echo "[paths]" > .coveragerc + echo "source =" >> .coveragerc + echo " ." >> .coveragerc + echo " /ragflow" >> .coveragerc + coverage xml -o coverage-infinity-server.xml + rm .coveragerc + else + echo ".coverage file not found!" + fi + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v5 + if: ${{ !cancelled() }} + with: + token: ${{ secrets.CODECOV_TOKEN }} + fail_ci_if_error: false + + - name: Collect ragflow log Infinity + if: ${{ !cancelled() && env.HAS_PYTHON == 'true' }} + run: | + if [ -d docker/ragflow-logs ]; then + cp -r docker/ragflow-logs ${ARTIFACTS_DIR}/ragflow-logs-infinity + echo "ragflow log" && tail -n 200 docker/ragflow-logs/ragflow_server.log || true + else + echo "No docker/ragflow-logs directory found; skipping log collection" + fi + sudo rm -rf docker/ragflow-logs || true + + - name: Stop ragflow:nightly for Infinity + if: ${{ always() && env.HAS_PYTHON == 'true' }} + run: | + # Sometimes `docker compose down` fail due to hang container, heavy load etc. Need to remove such containers to release resources(for example, listen ports). + sudo docker compose -f docker/docker-compose.yml -p ${COMPOSE_PROJECT_NAME} down -v || true + sudo docker ps -a --filter "label=com.docker.compose.project=${COMPOSE_PROJECT_NAME}" -q | xargs -r sudo docker rm -f + if [[ -n ${RAGFLOW_IMAGE} ]]; then + sudo docker rmi -f ${RAGFLOW_IMAGE} + fi + if [[ -n ${PORT_RESERVATION:-} ]]; then + rm -f "${PORT_RESERVATION}" + fi + + + + ragflow_tests_elasticsearch: + name: ragflow_tests_elasticsearch + needs: ragflow_preflight + if: ${{ github.event_name != 'pull_request' || (github.event.pull_request.draft == false && contains(github.event.pull_request.labels.*.name, 'ci') && (github.event.action != 'labeled' || github.event.label.name == 'ci')) }} + runs-on: [ "self-hosted", "ragflow-test" ] + env: + DOC_ENGINE: elasticsearch + RAGFLOW_IMAGE: infiniflow/ragflow:${{ github.run_id }}-elasticsearch + HTTP_API_TEST_LEVEL: ${{ needs.ragflow_preflight.outputs.http_api_test_level }} + HAS_GO: ${{ needs.ragflow_preflight.outputs.has_go_changes }} + HAS_PYTHON: ${{ needs.ragflow_preflight.outputs.has_python_changes }} + steps: + - name: Ensure workspace ownership + if: always() + run: | + echo "Workflow triggered by ${{ github.event_name }}" + echo "chown -R ${USER} ${GITHUB_WORKSPACE}" && sudo chown -R ${USER} ${GITHUB_WORKSPACE} + + - name: Check out code + if: always() + uses: actions/checkout@v6 + with: + ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && format('refs/pull/{0}/merge', github.event.pull_request.number) || github.sha }} + fetch-depth: 0 + fetch-tags: true + + - name: Build ragflow go server + if: env.HAS_GO == 'true' + run: | + set -euo pipefail + BUILDER_CONTAINER=ragflow_build_${GITHUB_RUN_ID}_${DOC_ENGINE}_$(od -An -N4 -tx4 /dev/urandom | tr -d ' ') + cleanup_builder() { + if [[ -n "${BUILDER_CONTAINER:-}" ]]; then + sudo docker rm -f -v "${BUILDER_CONTAINER}" >/dev/null 2>&1 || true + fi + } + trap cleanup_builder EXIT + + TZ=${TZ:-$(readlink -f /etc/localtime | awk -F '/zoneinfo/' '{print $2}')} + sudo docker run --privileged -d --name "${BUILDER_CONTAINER}" \ + -e TZ="${TZ}" \ + -e UV_INDEX=https://mirrors.aliyun.com/pypi/simple \ + -v "${PWD}:/ragflow" \ + -v "${PWD}/internal/cpp/resource:/usr/share/infinity/resource" \ + infiniflow/infinity_builder:ubuntu22_clang20 + sudo docker exec "${BUILDER_CONTAINER}" bash -c 'git config --global safe.directory "*" && cd /ragflow && ./build.sh --cpp' + ./build.sh --go + + - name: Run Go unit tests + if: env.HAS_GO == 'true' + # Runs after `./build.sh --go`, which guarantees the C++ static + # library (librag_tokenizer_c_api.a) is present on disk. The Go + # test binaries link against it transitively through + # `internal/binding`, so running `go test` before the C++ build + # fails the link step. + # + # Excludes packages whose tests fail for environmental reasons + # unrelated to the diff: + # - internal/storage: TestMinioStorage_* needs a MinIO server + # at localhost:9000; not started by this job. + # - internal/tokenizer: tests need /usr/share/infinity/resource + # dict files, only mounted inside the docker builder, not + # in the Go test environment. + # - internal/handler: TestListAgentVersionsHandler_Success and + # sqlite setup (e.g. "no such table: user_tenant") are + # pre-existing flakes unrelated to the diff. + run: | + set -euo pipefail + PKGS=$(go list ./... 2>/dev/null \ + | grep -v '/internal/storage$' \ + | grep -v '/internal/tokenizer$' \ + | grep -v '/internal/handler$' || true) + if [ -z "$PKGS" ]; then + ./build.sh --test + else + ./build.sh --test -- $PKGS + fi + + - name: Build ragflow:nightly + if: env.HAS_PYTHON == 'true' + run: | + set -euo pipefail + sudo docker pull ubuntu:24.04 + sudo DOCKER_BUILDKIT=1 docker build --build-arg NEED_MIRROR=1 --build-arg HTTPS_PROXY=${HTTPS_PROXY} --build-arg HTTP_PROXY=${HTTP_PROXY} -f Dockerfile -t ${RAGFLOW_IMAGE} . + + - name: Prepare Python test environment + if: env.HAS_PYTHON == 'true' + run: | + uv sync --python 3.13 --group test --frozen + uv pip install -e sdk/python + + - name: Prepare function test environment + if: env.HAS_PYTHON == 'true' + working-directory: docker + run: | + set -euo pipefail + # install ss + sudo apt update && sudo apt install -y iproute2 + RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-${HOME}} + COMPOSE_PROJECT_NAME="${GITHUB_RUN_ID}-${DOC_ENGINE}" + echo "COMPOSE_PROJECT_NAME=${COMPOSE_PROJECT_NAME}" >> ${GITHUB_ENV} + echo "RAGFLOW_CONTAINER=${COMPOSE_PROJECT_NAME}-ragflow-cpu-1" >> ${GITHUB_ENV} + ARTIFACTS_DIR=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/${GITHUB_RUN_ID}/${DOC_ENGINE} + echo "ARTIFACTS_DIR=${ARTIFACTS_DIR}" >> ${GITHUB_ENV} + rm -rf "${ARTIFACTS_DIR}" && mkdir -p "${ARTIFACTS_DIR}" + + # Determine runner number (default to 1 if not found) + RUNNER_NUM=$(sudo docker inspect $(hostname) --format '{{index .Config.Labels "com.docker.compose.container-number"}}' 2>/dev/null || true) + RUNNER_NUM=${RUNNER_NUM:-1} + + # Engine-specific offset partitions keep concurrent engine jobs from + # choosing the same host ports when they land on the same self-hosted runner. + # A lock plus reservation file closes the check/start race between parallel jobs. + PORT_BASES=(1200 1201 23817 23820 5432 5455 9000 9001 6379 6380 6601 9380 9381 9382 9384 9383 9385 80 443 4222) + PARTITION_SIZE=6000 + case "${DOC_ENGINE}" in + elasticsearch) PARTITION_BASE=1000 ;; + infinity) PARTITION_BASE=31000 ;; + *) echo "Unsupported DOC_ENGINE=${DOC_ENGINE}" >&2; exit 1 ;; + esac + PORT_LOCK_DIR=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/port-locks + mkdir -p "${PORT_LOCK_DIR}" + + port_offset_available() { + local offset=$1 + local base port + for base in "${PORT_BASES[@]}"; do + port=$((base + offset)) + if ss -ltnH "sport = :${port}" | grep -q .; then + return 1 + fi + done + return 0 + } + + cleanup_stale_port_locks() { + local now stale_after lock lock_ts + now=$(date -u +%s) + stale_after=$((6 * 60 * 60)) + for lock in "${PORT_LOCK_DIR}"/*.lock; do + [[ -e "${lock}" ]] || continue + lock_ts=$(awk '{print $3}' "${lock}" 2>/dev/null || true) + if [[ "${lock_ts}" =~ ^[0-9]+$ ]] && (( now - lock_ts > stale_after )); then + rm -f "${lock}" + fi + done + } + + reserve_port_offset() { + local attempt candidate reservation + cleanup_stale_port_locks + for attempt in $(seq 0 59); do + candidate=$(( PARTITION_BASE + ((GITHUB_RUN_ID + RUNNER_NUM * 1000 + attempt * 97) % PARTITION_SIZE) )) + reservation="${PORT_LOCK_DIR}/${candidate}.lock" + if ( set -o noclobber; echo "${GITHUB_RUN_ID} ${DOC_ENGINE} $(date -u +%s)" > "${reservation}" ) 2>/dev/null; then + if port_offset_available "${candidate}"; then + PORT_OFFSET=${candidate} + PORT_RESERVATION=${reservation} + return 0 + fi + rm -f "${reservation}" + fi + done + return 1 + } + + if ! reserve_port_offset; then + echo "Failed to reserve a free host port range for ${DOC_ENGINE} docker compose" >&2 + exit 1 + fi + echo "PORT_RESERVATION=${PORT_RESERVATION}" >> ${GITHUB_ENV} + echo "Using ${DOC_ENGINE} host port offset ${PORT_OFFSET}" + ES_PORT=$((1200 + PORT_OFFSET)) + OS_PORT=$((1201 + PORT_OFFSET)) + INFINITY_THRIFT_PORT=$((23817 + PORT_OFFSET)) + INFINITY_HTTP_PORT=$((23820 + PORT_OFFSET)) + INFINITY_PSQL_PORT=$((5432 + PORT_OFFSET)) + EXPOSE_MYSQL_PORT=$((5455 + PORT_OFFSET)) + MINIO_PORT=$((9000 + PORT_OFFSET)) + MINIO_CONSOLE_PORT=$((9001 + PORT_OFFSET)) + REDIS_PORT=$((6379 + PORT_OFFSET)) + NATS_PORT=$((4222 + PORT_OFFSET)) + TEI_PORT=$((6380 + PORT_OFFSET)) + KIBANA_PORT=$((6601 + PORT_OFFSET)) + SVR_HTTP_PORT=$((9380 + PORT_OFFSET)) + ADMIN_SVR_HTTP_PORT=$((9381 + PORT_OFFSET)) + SVR_MCP_PORT=$((9382 + PORT_OFFSET)) + GO_HTTP_PORT=$((9384 + PORT_OFFSET)) + GO_ADMIN_PORT=$((9383 + PORT_OFFSET)) + SANDBOX_EXECUTOR_MANAGER_PORT=$((9385 + PORT_OFFSET)) + SVR_WEB_HTTP_PORT=$((80 + PORT_OFFSET)) + SVR_WEB_HTTPS_PORT=$((443 + PORT_OFFSET)) + + # Persist computed ports into .env so docker-compose uses the correct host bindings. + # Remove previous CI overrides first; docker compose uses the last duplicate key. + sed -i '/^ES_PORT=/d;/^OS_PORT=/d;/^INFINITY_THRIFT_PORT=/d;/^INFINITY_HTTP_PORT=/d;/^INFINITY_PSQL_PORT=/d;/^EXPOSE_MYSQL_PORT=/d;/^MINIO_PORT=/d;/^MINIO_CONSOLE_PORT=/d;/^REDIS_PORT=/d;/^TEI_PORT=/d;/^KIBANA_PORT=/d;/^SVR_HTTP_PORT=/d;/^ADMIN_SVR_HTTP_PORT=/d;/^SVR_MCP_PORT=/d;/^GO_HTTP_PORT=/d;/^GO_ADMIN_PORT=/d;/^SANDBOX_EXECUTOR_MANAGER_PORT=/d;/^SVR_WEB_HTTP_PORT=/d;/^SVR_WEB_HTTPS_PORT=/d;/^NATS_PORT=/d;/^COMPOSE_PROFILES=/d;/^TEI_MODEL=/d;/^RAGFLOW_IMAGE=/d;/^DOC_ENGINE=/d' .env + { + echo "" + echo "ES_PORT=${ES_PORT}" + echo "OS_PORT=${OS_PORT}" + echo "INFINITY_THRIFT_PORT=${INFINITY_THRIFT_PORT}" + echo "INFINITY_HTTP_PORT=${INFINITY_HTTP_PORT}" + echo "INFINITY_PSQL_PORT=${INFINITY_PSQL_PORT}" + echo "EXPOSE_MYSQL_PORT=${EXPOSE_MYSQL_PORT}" + echo "MINIO_PORT=${MINIO_PORT}" + echo "MINIO_CONSOLE_PORT=${MINIO_CONSOLE_PORT}" + echo "REDIS_PORT=${REDIS_PORT}" + echo "NATS_PORT=${NATS_PORT}" + echo "TEI_PORT=${TEI_PORT}" + echo "KIBANA_PORT=${KIBANA_PORT}" + echo "SVR_HTTP_PORT=${SVR_HTTP_PORT}" + echo "ADMIN_SVR_HTTP_PORT=${ADMIN_SVR_HTTP_PORT}" + echo "SVR_MCP_PORT=${SVR_MCP_PORT}" + echo "GO_HTTP_PORT=${GO_HTTP_PORT}" + echo "GO_ADMIN_PORT=${GO_ADMIN_PORT}" + echo "SANDBOX_EXECUTOR_MANAGER_PORT=${SANDBOX_EXECUTOR_MANAGER_PORT}" + echo "SVR_WEB_HTTP_PORT=${SVR_WEB_HTTP_PORT}" + echo "SVR_WEB_HTTPS_PORT=${SVR_WEB_HTTPS_PORT}" + echo "COMPOSE_PROFILES=${DOC_ENGINE},cpu,tei-cpu,deepdoc" + echo "TEI_MODEL=BAAI/bge-small-en-v1.5" + echo "RAGFLOW_IMAGE=${RAGFLOW_IMAGE}" + echo "DOC_ENGINE=${DOC_ENGINE}" + } >> .env + echo "HOST_ADDRESS=http://host.docker.internal:${SVR_HTTP_PORT}" >> ${GITHUB_ENV} + + # Patch entrypoint.sh for coverage + sed -i '/"\$PY" api\/ragflow_server.py \${INIT_SUPERUSER_ARGS} &/c\ echo "Ensuring coverage is installed..."\n "$PY" -m pip install coverage -i https://mirrors.aliyun.com/pypi/simple\n export COVERAGE_FILE=/ragflow/logs/.coverage\n echo "Starting ragflow_server with coverage..."\n "$PY" -m coverage run --source=./api/apps --omit="*/tests/*,*/migrations/*" -a api/ragflow_server.py ${INIT_SUPERUSER_ARGS} &' ./entrypoint.sh + + + - name: Start ragflow:nightly for Elasticsearch + if: env.HAS_PYTHON == 'true' + run: | + sudo docker compose -f docker/docker-compose.yml -p ${COMPOSE_PROJECT_NAME} down -v || true + sudo docker ps -a --filter "label=com.docker.compose.project=${COMPOSE_PROJECT_NAME}" -q | xargs -r sudo docker rm -f + sudo docker compose -f docker/docker-compose.yml -p ${COMPOSE_PROJECT_NAME} up -d + + - name: Run sdk tests against Elasticsearch + if: env.HAS_PYTHON == 'true' + run: | + export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" + svc_ready=0 + for i in $(seq 1 60); do + if sudo docker exec ${RAGFLOW_CONTAINER} curl -sf --connect-timeout 5 "${HOST_ADDRESS}/api/v1/system/ping" > /dev/null 2>&1; then + svc_ready=1 + break + fi + echo "Waiting for service to be available... ($i/60)" + sleep 5 + done + if [ "$svc_ready" -ne 1 ]; then + echo "Service did not become ready after 5 minutes. Docker logs:" + sudo docker logs ${RAGFLOW_CONTAINER} + exit 1 + fi + echo "Start to run test sdk on Elasticsearch" + source .venv/bin/activate && set -o pipefail; pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} --junitxml=pytest-es-sdk.xml --cov=sdk/python/ragflow_sdk --cov-branch --cov-report=xml:coverage-es-sdk.xml test/testcases/test_sdk_api 2>&1 | tee es_sdk_test.log + + - name: Run New RESTFUL api tests against Elasticsearch + if: env.HAS_PYTHON == 'true' + run: | + export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" + svc_ready=0 + for i in $(seq 1 60); do + if sudo docker exec ${RAGFLOW_CONTAINER} curl -sf --connect-timeout 5 "${HOST_ADDRESS}/api/v1/system/ping" > /dev/null 2>&1; then + svc_ready=1 + break + fi + echo "Waiting for service to be available... ($i/60)" + sleep 5 + done + if [ "$svc_ready" -ne 1 ]; then + echo "Service did not become ready after 5 minutes. Docker logs:" + sudo docker logs ${RAGFLOW_CONTAINER} + exit 1 + fi + source .venv/bin/activate && set -o pipefail; pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/restful_api 2>&1 | tee es_restful_api_test.log + + - name: RAGFlow CLI retrieval test Elasticsearch + if: env.HAS_PYTHON == 'true' + env: + PYTHONPATH: ${{ github.workspace }} + run: | + set -euo pipefail + source .venv/bin/activate + + export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" + + EMAIL="ci-${GITHUB_RUN_ID}@example.com" + PASS="ci-pass-${GITHUB_RUN_ID}" + DATASET="ci_dataset_${GITHUB_RUN_ID}" + + CLI="python admin/client/ragflow_cli.py" + + LOG_FILE="es_cli_test.log" + : > "${LOG_FILE}" + + ERROR_RE='Traceback|ModuleNotFoundError|ImportError|Parse error|Bad response|Fail to|code:\\s*[1-9]' + run_cli() { + local logfile="$1" + shift + local allow_re="" + if [[ "${1:-}" == "--allow" ]]; then + allow_re="$2" + shift 2 + fi + local cmd_display="$*" + echo "===== $(date -u +\"%Y-%m-%dT%H:%M:%SZ\") CMD: ${cmd_display} =====" | tee -a "${logfile}" + local tmp_log + tmp_log="$(mktemp)" + set +e + timeout 500s "$@" 2>&1 | tee "${tmp_log}" + local status=${PIPESTATUS[0]} + set -e + cat "${tmp_log}" >> "${logfile}" + if grep -qiE "${ERROR_RE}" "${tmp_log}"; then + if [[ -n "${allow_re}" ]] && grep -qiE "${allow_re}" "${tmp_log}"; then + echo "Allowed CLI error markers in ${logfile}" + rm -f "${tmp_log}" + return 0 + fi + echo "Detected CLI error markers in ${logfile}" + rm -f "${tmp_log}" + exit 1 + fi + rm -f "${tmp_log}" + return ${status} + } + + set -a + source docker/.env + set +a + + HOST_ADDRESS="http://host.docker.internal:${SVR_HTTP_PORT}" + USER_HOST="$(echo "${HOST_ADDRESS}" | sed -E 's#^https?://([^:/]+).*#\1#')" + USER_PORT="${SVR_HTTP_PORT}" + ADMIN_HOST="${USER_HOST}" + ADMIN_PORT="${ADMIN_SVR_HTTP_PORT}" + + svc_ready=0 + for i in $(seq 1 60); do + if sudo docker exec ${RAGFLOW_CONTAINER} curl -sf --connect-timeout 5 "${HOST_ADDRESS}/api/v1/system/ping" > /dev/null 2>&1; then + svc_ready=1 + break + fi + echo "Waiting for service to be available... ($i/60)" + sleep 5 + done + if [ "$svc_ready" -ne 1 ]; then + echo "Service did not become ready after 5 minutes. Docker logs:" + sudo docker logs ${RAGFLOW_CONTAINER} + exit 1 + fi + + admin_ready=0 + for i in $(seq 1 30); do + if run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "ping"; then + admin_ready=1 + break + fi + sleep 1 + done + if [[ "${admin_ready}" -ne 1 ]]; then + echo "Admin service did not become ready" + exit 1 + fi + + run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "show version" + ALLOW_USER_EXISTS_RE='already exists|already exist|duplicate|already.*registered|exist(s)?' + run_cli "${LOG_FILE}" --allow "${ALLOW_USER_EXISTS_RE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "create user '$EMAIL' '$PASS'" + + user_ready=0 + for i in $(seq 1 30); do + if run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "ping"; then + user_ready=1 + break + fi + sleep 1 + done + if [[ "${user_ready}" -ne 1 ]]; then + echo "User service did not become ready" + exit 1 + fi + + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "show version" + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "create dataset '$DATASET' with embedding 'BAAI/bge-small-en-v1.5@Builtin' parser 'auto'" + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "import 'test/benchmark/test_docs/Doc1.pdf,test/benchmark/test_docs/Doc2.pdf' into dataset '$DATASET'" + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "parse dataset '$DATASET' sync" + run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "Benchmark 16 100 search 'what are these documents about' on datasets '$DATASET'" + + - name: Stop ragflow to save coverage Elasticsearch + if: ${{ !cancelled() && env.HAS_PYTHON == 'true' }} + run: | + # Send SIGINT to ragflow_server.py to trigger coverage save + PID=$(sudo docker exec ${RAGFLOW_CONTAINER} ps aux | grep "ragflow_server.py" | grep -v grep | awk '{print $2}' | head -n 1) + if [ -n "$PID" ]; then + echo "Sending SIGINT to ragflow_server.py (PID: $PID)..." + sudo docker exec ${RAGFLOW_CONTAINER} kill -INT $PID + # Wait for process to exit and coverage file to be written + sleep 10 + else + echo "ragflow_server.py not found!" + fi + sudo docker compose -f docker/docker-compose.yml -p ${COMPOSE_PROJECT_NAME} stop + + - name: Generate server coverage report Elasticsearch + if: ${{ !cancelled() && env.HAS_PYTHON == 'true' }} + run: | + # .coverage file should be in docker/ragflow-logs/.coverage + if [ -f docker/ragflow-logs/.coverage ]; then + echo "Found .coverage file" + cp docker/ragflow-logs/.coverage .coverage + source .venv/bin/activate + # Create .coveragerc to map container paths to host paths + echo "[paths]" > .coveragerc + echo "source =" >> .coveragerc + echo " ." >> .coveragerc + echo " /ragflow" >> .coveragerc + coverage xml -o coverage-es-server.xml + rm .coveragerc + # Clean up for next run + sudo rm docker/ragflow-logs/.coverage + else + echo ".coverage file not found!" + fi + + - name: Collect ragflow log Elasticsearch + if: ${{ !cancelled() && env.HAS_PYTHON == 'true' }} + run: | + if [ -d docker/ragflow-logs ]; then + cp -r docker/ragflow-logs ${ARTIFACTS_DIR}/ragflow-logs-es + echo "ragflow log" && tail -n 200 docker/ragflow-logs/ragflow_server.log || true + else + echo "No docker/ragflow-logs directory found; skipping log collection" + fi + sudo rm -rf docker/ragflow-logs || true + + - name: Stop ragflow:nightly for Elasticsearch + if: ${{ always() && env.HAS_PYTHON == 'true' }} + run: | + # Sometimes `docker compose down` fail due to hang container, heavy load etc. Need to remove such containers to release resources(for example, listen ports). + sudo docker compose -f docker/docker-compose.yml -p ${COMPOSE_PROJECT_NAME} down -v || true + sudo docker ps -a --filter "label=com.docker.compose.project=${COMPOSE_PROJECT_NAME}" -q | xargs -r sudo docker rm -f + if [[ -n ${RAGFLOW_IMAGE} ]]; then + sudo docker rmi -f ${RAGFLOW_IMAGE} + fi + if [[ -n ${PORT_RESERVATION:-} ]]; then + rm -f "${PORT_RESERVATION}" + fi \ No newline at end of file