name: tests permissions: contents: read on: push: branches: - 'main' - '*.*.*' paths-ignore: - 'docs/**' - '*.md' - '*.mdx' # The only difference between pull_request and pull_request_target is the context in which the workflow runs: # — pull_request_target workflows use the workflow files from the default branch, and secrets are available. # — pull_request workflows use the workflow files from the pull request branch, and secrets are unavailable. pull_request: types: [opened, synchronize, reopened, ready_for_review, labeled] paths-ignore: - 'docs/**' - '*.md' - '*.mdx' schedule: - cron: '0 16 * * *' # This schedule runs every 16:00:00Z(00:00:00+08:00) # https://docs.github.com/en/actions/using-jobs/using-concurrency concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: ragflow_preflight: name: ragflow_preflight # https://docs.github.com/en/actions/using-jobs/using-conditions-to-control-job-execution # https://github.com/orgs/community/discussions/26261 if: ${{ github.event_name != 'pull_request' || (github.event.pull_request.draft == false && contains(github.event.pull_request.labels.*.name, 'ci') && (github.event.action != 'labeled' || github.event.label.name == 'ci')) }} runs-on: [ "self-hosted", "ragflow-test" ] outputs: http_api_test_level: ${{ steps.test_level.outputs.http_api_test_level }} steps: - name: Ensure workspace ownership run: | echo "Workflow triggered by ${{ github.event_name }}" echo "chown -R ${USER} ${GITHUB_WORKSPACE}" && sudo chown -R ${USER} ${GITHUB_WORKSPACE} # https://github.com/actions/checkout/issues/1781 - name: Check out code uses: actions/checkout@v6 with: ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && format('refs/pull/{0}/merge', github.event.pull_request.number) || github.sha }} fetch-depth: 0 fetch-tags: true - name: Check workflow duplication if: ${{ !cancelled() && !failure() }} run: | if [[ ${GITHUB_EVENT_NAME} != "pull_request" && ${GITHUB_EVENT_NAME} != "schedule" ]]; then HEAD=$(git rev-parse HEAD) # Find a PR that introduced a given commit gh auth login --with-token <<< "${{ secrets.GITHUB_TOKEN }}" PR_NUMBER=$(gh pr list --search ${HEAD} --state merged --json number --jq .[0].number) echo "HEAD=${HEAD}" echo "PR_NUMBER=${PR_NUMBER}" if [[ -n "${PR_NUMBER}" ]]; then PR_SHA_FP=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/PR_${PR_NUMBER} if [[ -f "${PR_SHA_FP}" ]]; then read -r PR_SHA PR_RUN_ID < "${PR_SHA_FP}" # Calculate the hash of the current workspace content HEAD_SHA=$(git rev-parse HEAD^{tree}) if [[ "${HEAD_SHA}" == "${PR_SHA}" ]]; then echo "Cancel myself since the workspace content hash is the same with PR #${PR_NUMBER} merged. See ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${PR_RUN_ID} for details." gh run cancel ${GITHUB_RUN_ID} while true; do status=$(gh run view ${GITHUB_RUN_ID} --json status -q .status) [ "${status}" = "completed" ] && break sleep 5 done exit 1 fi fi fi elif [[ ${GITHUB_EVENT_NAME} == "pull_request" ]]; then PR_NUMBER=${{ github.event.pull_request.number }} PR_SHA_FP=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/PR_${PR_NUMBER} # Calculate the hash of the current workspace content PR_SHA=$(git rev-parse HEAD^{tree}) echo "PR #${PR_NUMBER} workspace content hash: ${PR_SHA}" mkdir -p ${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY} echo "${PR_SHA} ${GITHUB_RUN_ID}" > ${PR_SHA_FP} fi ARTIFACTS_DIR=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/${GITHUB_RUN_ID} echo "ARTIFACTS_DIR=${ARTIFACTS_DIR}" >> ${GITHUB_ENV} rm -rf ${ARTIFACTS_DIR} && mkdir -p ${ARTIFACTS_DIR} # https://github.com/astral-sh/ruff-action - name: Static check with Ruff uses: astral-sh/ruff-action@v3 with: version: ">=0.11.x" args: "check" # - name: Check comments of changed Python files # if: ${{ false }} # run: | # if [[ ${{ github.event_name }} == 'pull_request' || ${{ github.event_name }} == 'pull_request_target' ]]; then # CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }} \ # | grep -E '\.(py)$' || true) # # if [ -n "$CHANGED_FILES" ]; then # echo "Check comments of changed Python files with check_comment_ascii.py" # # readarray -t files <<< "$CHANGED_FILES" # HAS_ERROR=0 # # for file in "${files[@]}"; do # if [ -f "$file" ]; then # if python3 check_comment_ascii.py "$file"; then # echo "✅ $file" # else # echo "❌ $file" # HAS_ERROR=1 # fi # fi # done # # if [ $HAS_ERROR -ne 0 ]; then # exit 1 # fi # else # echo "No Python files changed" # fi # fi - name: Check format of changed Go files if: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }} run: | CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }} \ | grep -E '\.go$' || true) if [ -n "$CHANGED_FILES" ]; then echo "Check gofmt of changed Go files" readarray -t files <<< "$CHANGED_FILES" HAS_ERROR=0 for file in "${files[@]}"; do if [ -f "$file" ]; then if [ -z "$(gofmt -l "$file")" ]; then echo "✅ $file" else echo "❌ $file (run: gofmt -w \"$file\")" HAS_ERROR=1 fi fi done if [ $HAS_ERROR -ne 0 ]; then exit 1 fi else echo "No Go files changed" fi - name: Set test level id: test_level run: | set -euo pipefail if [[ ${GITHUB_EVENT_NAME} == "schedule" ]]; then export HTTP_API_TEST_LEVEL=p3 else export HTTP_API_TEST_LEVEL=p2 fi echo "HTTP_API_TEST_LEVEL=${HTTP_API_TEST_LEVEL}" >> ${GITHUB_ENV} echo "http_api_test_level=${HTTP_API_TEST_LEVEL}" >> ${GITHUB_OUTPUT} - name: Prepare Python test environment run: | uv sync --python 3.13 --group test --frozen uv pip install -e sdk/python - name: Run unit test run: | source .venv/bin/activate which pytest || echo "pytest not in PATH" echo "Start to run unit test" python3 run_tests.py -i ragflow_tests_infinity: name: ragflow_tests_infinity needs: ragflow_preflight if: ${{ github.event_name != 'pull_request' || (github.event.pull_request.draft == false && contains(github.event.pull_request.labels.*.name, 'ci') && (github.event.action != 'labeled' || github.event.label.name == 'ci')) }} runs-on: [ "self-hosted", "ragflow-test" ] env: DOC_ENGINE: infinity RAGFLOW_IMAGE: infiniflow/ragflow:${{ github.run_id }}-infinity HTTP_API_TEST_LEVEL: ${{ needs.ragflow_preflight.outputs.http_api_test_level }} steps: - name: Ensure workspace ownership run: | echo "Workflow triggered by ${{ github.event_name }}" echo "chown -R ${USER} ${GITHUB_WORKSPACE}" && sudo chown -R ${USER} ${GITHUB_WORKSPACE} - name: Check out code uses: actions/checkout@v6 with: ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && format('refs/pull/{0}/merge', github.event.pull_request.number) || github.sha }} fetch-depth: 0 fetch-tags: true - name: Build ragflow go server run: | set -euo pipefail BUILDER_CONTAINER=ragflow_build_${GITHUB_RUN_ID}_${DOC_ENGINE}_$(od -An -N4 -tx4 /dev/urandom | tr -d ' ') cleanup_builder() { if [[ -n "${BUILDER_CONTAINER:-}" ]]; then sudo docker rm -f -v "${BUILDER_CONTAINER}" >/dev/null 2>&1 || true fi } trap cleanup_builder EXIT TZ=${TZ:-$(readlink -f /etc/localtime | awk -F '/zoneinfo/' '{print $2}')} sudo docker run --privileged -d --name "${BUILDER_CONTAINER}" \ -e TZ="${TZ}" \ -e UV_INDEX=https://mirrors.aliyun.com/pypi/simple \ -v "${PWD}:/ragflow" \ -v "${PWD}/internal/cpp/resource:/usr/share/infinity/resource" \ infiniflow/infinity_builder:ubuntu22_clang20 sudo docker exec "${BUILDER_CONTAINER}" bash -c 'git config --global safe.directory "*" && cd /ragflow && ./build.sh --cpp' ./build.sh --go - name: Run Go unit tests # Runs after `./build.sh --go`, which guarantees the C++ static # library (librag_tokenizer_c_api.a) is present on disk. The Go # test binaries link against it transitively through # `internal/binding`, so running `go test` before the C++ build # fails the link step. # # Excludes packages whose tests fail for environmental reasons # unrelated to the diff: # - internal/storage: TestMinioStorage_* needs a MinIO server # at localhost:9000; not started by this job. # - internal/tokenizer: tests need /usr/share/infinity/resource # dict files, only mounted inside the docker builder, not # in the Go test environment. # - internal/handler: TestListAgentVersionsHandler_Success and # sqlite setup (e.g. "no such table: user_tenant") are # pre-existing flakes unrelated to the diff. run: | set -euo pipefail PKGS=$(go list ./... 2>/dev/null \ | grep -v '/internal/storage$' \ | grep -v '/internal/tokenizer$' \ | grep -v '/internal/handler$' \ | grep -v '/internal/deepdoc/parser/pdf/pdfium' \ | grep -v '/internal/deepdoc/parser/pdf/pdfoxide' \ | grep -v '/internal/deepdoc/parser/pdf' || true) if [ -z "$PKGS" ]; then ./build.sh --test else ./build.sh --test -- $PKGS fi - name: Build ragflow:nightly run: | set -euo pipefail sudo docker pull ubuntu:24.04 sudo DOCKER_BUILDKIT=1 docker build --build-arg NEED_MIRROR=1 --build-arg HTTPS_PROXY=${HTTPS_PROXY} --build-arg HTTP_PROXY=${HTTP_PROXY} -f Dockerfile -t ${RAGFLOW_IMAGE} . - name: Prepare Python test environment run: | uv sync --python 3.13 --group test --frozen uv pip install -e sdk/python - name: Prepare function test environment working-directory: docker run: | set -euo pipefail # install ss sudo apt update && sudo apt install -y iproute2 RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-${HOME}} COMPOSE_PROJECT_NAME="${GITHUB_RUN_ID}-${DOC_ENGINE}" echo "COMPOSE_PROJECT_NAME=${COMPOSE_PROJECT_NAME}" >> ${GITHUB_ENV} echo "RAGFLOW_CONTAINER=${COMPOSE_PROJECT_NAME}-ragflow-cpu-1" >> ${GITHUB_ENV} ARTIFACTS_DIR=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/${GITHUB_RUN_ID}/${DOC_ENGINE} echo "ARTIFACTS_DIR=${ARTIFACTS_DIR}" >> ${GITHUB_ENV} rm -rf "${ARTIFACTS_DIR}" && mkdir -p "${ARTIFACTS_DIR}" # Determine runner number (default to 1 if not found) RUNNER_NUM=$(sudo docker inspect $(hostname) --format '{{index .Config.Labels "com.docker.compose.container-number"}}' 2>/dev/null || true) RUNNER_NUM=${RUNNER_NUM:-1} # Engine-specific offset partitions keep concurrent engine jobs from # choosing the same host ports when they land on the same self-hosted runner. # A lock plus reservation file closes the check/start race between parallel jobs. PORT_BASES=(1200 1201 23817 23820 5432 5455 9000 9001 6379 6380 6601 9380 9381 9382 9384 9383 9385 80 443 4222) PARTITION_SIZE=6000 case "${DOC_ENGINE}" in elasticsearch) PARTITION_BASE=1000 ;; infinity) PARTITION_BASE=31000 ;; *) echo "Unsupported DOC_ENGINE=${DOC_ENGINE}" >&2; exit 1 ;; esac PORT_LOCK_DIR=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/port-locks mkdir -p "${PORT_LOCK_DIR}" port_offset_available() { local offset=$1 local base port for base in "${PORT_BASES[@]}"; do port=$((base + offset)) if ss -ltnH "sport = :${port}" | grep -q .; then return 1 fi done return 0 } cleanup_stale_port_locks() { local now stale_after lock lock_ts now=$(date -u +%s) stale_after=$((6 * 60 * 60)) for lock in "${PORT_LOCK_DIR}"/*.lock; do [[ -e "${lock}" ]] || continue lock_ts=$(awk '{print $3}' "${lock}" 2>/dev/null || true) if [[ "${lock_ts}" =~ ^[0-9]+$ ]] && (( now - lock_ts > stale_after )); then rm -f "${lock}" fi done } reserve_port_offset() { local attempt candidate reservation cleanup_stale_port_locks for attempt in $(seq 0 59); do candidate=$(( PARTITION_BASE + ((GITHUB_RUN_ID + RUNNER_NUM * 1000 + attempt * 97) % PARTITION_SIZE) )) reservation="${PORT_LOCK_DIR}/${candidate}.lock" if ( set -o noclobber; echo "${GITHUB_RUN_ID} ${DOC_ENGINE} $(date -u +%s)" > "${reservation}" ) 2>/dev/null; then if port_offset_available "${candidate}"; then PORT_OFFSET=${candidate} PORT_RESERVATION=${reservation} return 0 fi rm -f "${reservation}" fi done return 1 } if ! reserve_port_offset; then echo "Failed to reserve a free host port range for ${DOC_ENGINE} docker compose" >&2 exit 1 fi echo "PORT_RESERVATION=${PORT_RESERVATION}" >> ${GITHUB_ENV} echo "Using ${DOC_ENGINE} host port offset ${PORT_OFFSET}" ES_PORT=$((1200 + PORT_OFFSET)) OS_PORT=$((1201 + PORT_OFFSET)) INFINITY_THRIFT_PORT=$((23817 + PORT_OFFSET)) INFINITY_HTTP_PORT=$((23820 + PORT_OFFSET)) INFINITY_PSQL_PORT=$((5432 + PORT_OFFSET)) EXPOSE_MYSQL_PORT=$((5455 + PORT_OFFSET)) MINIO_PORT=$((9000 + PORT_OFFSET)) MINIO_CONSOLE_PORT=$((9001 + PORT_OFFSET)) REDIS_PORT=$((6379 + PORT_OFFSET)) NATS_PORT=$((4222 + PORT_OFFSET)) TEI_PORT=$((6380 + PORT_OFFSET)) KIBANA_PORT=$((6601 + PORT_OFFSET)) SVR_HTTP_PORT=$((9380 + PORT_OFFSET)) ADMIN_SVR_HTTP_PORT=$((9381 + PORT_OFFSET)) SVR_MCP_PORT=$((9382 + PORT_OFFSET)) GO_HTTP_PORT=$((9384 + PORT_OFFSET)) GO_ADMIN_PORT=$((9383 + PORT_OFFSET)) SANDBOX_EXECUTOR_MANAGER_PORT=$((9385 + PORT_OFFSET)) SVR_WEB_HTTP_PORT=$((80 + PORT_OFFSET)) SVR_WEB_HTTPS_PORT=$((443 + PORT_OFFSET)) # Persist computed ports into .env so docker-compose uses the correct host bindings. # Remove previous CI overrides first; docker compose uses the last duplicate key. sed -i '/^ES_PORT=/d;/^OS_PORT=/d;/^INFINITY_THRIFT_PORT=/d;/^INFINITY_HTTP_PORT=/d;/^INFINITY_PSQL_PORT=/d;/^EXPOSE_MYSQL_PORT=/d;/^MINIO_PORT=/d;/^MINIO_CONSOLE_PORT=/d;/^REDIS_PORT=/d;/^TEI_PORT=/d;/^KIBANA_PORT=/d;/^SVR_HTTP_PORT=/d;/^ADMIN_SVR_HTTP_PORT=/d;/^SVR_MCP_PORT=/d;/^GO_HTTP_PORT=/d;/^GO_ADMIN_PORT=/d;/^SANDBOX_EXECUTOR_MANAGER_PORT=/d;/^SVR_WEB_HTTP_PORT=/d;/^SVR_WEB_HTTPS_PORT=/d;/^NATS_PORT=/d;/^COMPOSE_PROFILES=/d;/^TEI_MODEL=/d;/^RAGFLOW_IMAGE=/d;/^DOC_ENGINE=/d' .env { echo "" echo "ES_PORT=${ES_PORT}" echo "OS_PORT=${OS_PORT}" echo "INFINITY_THRIFT_PORT=${INFINITY_THRIFT_PORT}" echo "INFINITY_HTTP_PORT=${INFINITY_HTTP_PORT}" echo "INFINITY_PSQL_PORT=${INFINITY_PSQL_PORT}" echo "EXPOSE_MYSQL_PORT=${EXPOSE_MYSQL_PORT}" echo "MINIO_PORT=${MINIO_PORT}" echo "MINIO_CONSOLE_PORT=${MINIO_CONSOLE_PORT}" echo "REDIS_PORT=${REDIS_PORT}" echo "NATS_PORT=${NATS_PORT}" echo "TEI_PORT=${TEI_PORT}" echo "KIBANA_PORT=${KIBANA_PORT}" echo "SVR_HTTP_PORT=${SVR_HTTP_PORT}" echo "ADMIN_SVR_HTTP_PORT=${ADMIN_SVR_HTTP_PORT}" echo "SVR_MCP_PORT=${SVR_MCP_PORT}" echo "GO_HTTP_PORT=${GO_HTTP_PORT}" echo "GO_ADMIN_PORT=${GO_ADMIN_PORT}" echo "SANDBOX_EXECUTOR_MANAGER_PORT=${SANDBOX_EXECUTOR_MANAGER_PORT}" echo "SVR_WEB_HTTP_PORT=${SVR_WEB_HTTP_PORT}" echo "SVR_WEB_HTTPS_PORT=${SVR_WEB_HTTPS_PORT}" echo "COMPOSE_PROFILES=${DOC_ENGINE},cpu,tei-cpu,deepdoc" echo "TEI_MODEL=BAAI/bge-small-en-v1.5" echo "RAGFLOW_IMAGE=${RAGFLOW_IMAGE}" echo "DOC_ENGINE=${DOC_ENGINE}" } >> .env echo "HOST_ADDRESS=http://host.docker.internal:${SVR_HTTP_PORT}" >> ${GITHUB_ENV} # Patch entrypoint.sh for coverage sed -i '/"\$PY" api\/ragflow_server.py \${INIT_SUPERUSER_ARGS} &/c\ echo "Ensuring coverage is installed..."\n "$PY" -m pip install coverage -i https://mirrors.aliyun.com/pypi/simple\n export COVERAGE_FILE=/ragflow/logs/.coverage\n echo "Starting ragflow_server with coverage..."\n "$PY" -m coverage run --source=./api/apps --omit="*/tests/*,*/migrations/*" -a api/ragflow_server.py ${INIT_SUPERUSER_ARGS} &' ./entrypoint.sh - name: Start ragflow:nightly for Infinity run: | sudo docker compose -f docker/docker-compose.yml -p ${COMPOSE_PROJECT_NAME} down -v || true sudo docker ps -a --filter "label=com.docker.compose.project=${COMPOSE_PROJECT_NAME}" -q | xargs -r sudo docker rm -f sudo docker compose -f docker/docker-compose.yml -p ${COMPOSE_PROJECT_NAME} up -d - name: Run sdk tests against Infinity run: | export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" svc_ready=0 for i in $(seq 1 60); do if sudo docker exec ${RAGFLOW_CONTAINER} curl -sf --connect-timeout 5 "${HOST_ADDRESS}/api/v1/system/ping" > /dev/null 2>&1; then svc_ready=1 break fi echo "Waiting for service to be available... ($i/60)" sleep 5 done if [ "$svc_ready" -ne 1 ]; then echo "Service did not become ready after 5 minutes. Docker logs:" sudo docker logs ${RAGFLOW_CONTAINER} exit 1 fi echo "Start to run test sdk on Infinity" source .venv/bin/activate && set -o pipefail; DOC_ENGINE=infinity pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} --junitxml=pytest-infinity-sdk.xml --cov=sdk/python/ragflow_sdk --cov-branch --cov-report=xml:coverage-infinity-sdk.xml test/testcases/test_sdk_api 2>&1 | tee infinity_sdk_test.log - name: Run New RESTFUL api tests against Infinity run: | export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" svc_ready=0 for i in $(seq 1 60); do if sudo docker exec ${RAGFLOW_CONTAINER} curl -sf --connect-timeout 5 "${HOST_ADDRESS}/api/v1/system/ping" > /dev/null 2>&1; then svc_ready=1 break fi echo "Waiting for service to be available... ($i/60)" sleep 5 done if [ "$svc_ready" -ne 1 ]; then echo "Service did not become ready after 5 minutes. Docker logs:" sudo docker logs ${RAGFLOW_CONTAINER} exit 1 fi source .venv/bin/activate && set -o pipefail; DOC_ENGINE=infinity pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/restful_api 2>&1 | tee infinity_restful_api_test.log - name: RAGFlow CLI retrieval test Infinity env: PYTHONPATH: ${{ github.workspace }} run: | set -euo pipefail source .venv/bin/activate export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" EMAIL="ci-${GITHUB_RUN_ID}@example.com" PASS="ci-pass-${GITHUB_RUN_ID}" DATASET="ci_dataset_${GITHUB_RUN_ID}" CLI="python admin/client/ragflow_cli.py" LOG_FILE="infinity_cli_test.log" : > "${LOG_FILE}" ERROR_RE='Traceback|ModuleNotFoundError|ImportError|Parse error|Bad response|Fail to|code:\\s*[1-9]' run_cli() { local logfile="$1" shift local allow_re="" if [[ "${1:-}" == "--allow" ]]; then allow_re="$2" shift 2 fi local cmd_display="$*" echo "===== $(date -u +\"%Y-%m-%dT%H:%M:%SZ\") CMD: ${cmd_display} =====" | tee -a "${logfile}" local tmp_log tmp_log="$(mktemp)" set +e timeout 500s "$@" 2>&1 | tee "${tmp_log}" local status=${PIPESTATUS[0]} set -e cat "${tmp_log}" >> "${logfile}" if grep -qiE "${ERROR_RE}" "${tmp_log}"; then if [[ -n "${allow_re}" ]] && grep -qiE "${allow_re}" "${tmp_log}"; then echo "Allowed CLI error markers in ${logfile}" rm -f "${tmp_log}" return 0 fi echo "Detected CLI error markers in ${logfile}" rm -f "${tmp_log}" exit 1 fi rm -f "${tmp_log}" return ${status} } set -a source docker/.env set +a HOST_ADDRESS="http://host.docker.internal:${SVR_HTTP_PORT}" USER_HOST="$(echo "${HOST_ADDRESS}" | sed -E 's#^https?://([^:/]+).*#\1#')" USER_PORT="${SVR_HTTP_PORT}" ADMIN_HOST="${USER_HOST}" ADMIN_PORT="${ADMIN_SVR_HTTP_PORT}" svc_ready=0 for i in $(seq 1 60); do if sudo docker exec ${RAGFLOW_CONTAINER} curl -sf --connect-timeout 5 "${HOST_ADDRESS}/api/v1/system/ping" > /dev/null 2>&1; then svc_ready=1 break fi echo "Waiting for service to be available... ($i/60)" sleep 5 done if [ "$svc_ready" -ne 1 ]; then echo "Service did not become ready after 5 minutes. Docker logs:" sudo docker logs ${RAGFLOW_CONTAINER} exit 1 fi admin_ready=0 for i in $(seq 1 30); do if run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "ping"; then admin_ready=1 break fi sleep 1 done if [[ "${admin_ready}" -ne 1 ]]; then echo "Admin service did not become ready" exit 1 fi run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "show version" ALLOW_USER_EXISTS_RE='already exists|already exist|duplicate|already.*registered|exist(s)?' run_cli "${LOG_FILE}" --allow "${ALLOW_USER_EXISTS_RE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "create user '$EMAIL' '$PASS'" user_ready=0 for i in $(seq 1 30); do if run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "ping"; then user_ready=1 break fi sleep 1 done if [[ "${user_ready}" -ne 1 ]]; then echo "User service did not become ready" exit 1 fi run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "show version" run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "create dataset '$DATASET' with embedding 'BAAI/bge-small-en-v1.5@Builtin' parser 'auto'" run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "import 'test/benchmark/test_docs/Doc1.pdf,test/benchmark/test_docs/Doc2.pdf' into dataset '$DATASET'" run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "parse dataset '$DATASET' sync" run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "Benchmark 16 100 search 'what are these documents about' on datasets '$DATASET'" - name: Stop ragflow to save coverage Infinity if: ${{ !cancelled() }} run: | # Send SIGINT to ragflow_server.py to trigger coverage save PID=$(sudo docker exec ${RAGFLOW_CONTAINER} ps aux | grep "ragflow_server.py" | grep -v grep | awk '{print $2}' | head -n 1) if [ -n "$PID" ]; then echo "Sending SIGINT to ragflow_server.py (PID: $PID)..." sudo docker exec ${RAGFLOW_CONTAINER} kill -INT $PID # Wait for process to exit and coverage file to be written sleep 10 else echo "ragflow_server.py not found!" fi sudo docker compose -f docker/docker-compose.yml -p ${COMPOSE_PROJECT_NAME} stop - name: Generate server coverage report Infinity if: ${{ !cancelled() }} run: | # .coverage file should be in docker/ragflow-logs/.coverage if [ -f docker/ragflow-logs/.coverage ]; then echo "Found .coverage file" cp docker/ragflow-logs/.coverage .coverage source .venv/bin/activate # Create .coveragerc to map container paths to host paths echo "[paths]" > .coveragerc echo "source =" >> .coveragerc echo " ." >> .coveragerc echo " /ragflow" >> .coveragerc coverage xml -o coverage-infinity-server.xml rm .coveragerc else echo ".coverage file not found!" fi - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v5 if: ${{ !cancelled() }} with: token: ${{ secrets.CODECOV_TOKEN }} fail_ci_if_error: false - name: Collect ragflow log Infinity if: ${{ !cancelled() }} run: | if [ -d docker/ragflow-logs ]; then cp -r docker/ragflow-logs ${ARTIFACTS_DIR}/ragflow-logs-infinity echo "ragflow log" && tail -n 200 docker/ragflow-logs/ragflow_server.log || true else echo "No docker/ragflow-logs directory found; skipping log collection" fi sudo rm -rf docker/ragflow-logs || true - name: Stop ragflow:nightly for Infinity if: always() # always run this step even if previous steps failed run: | # Sometimes `docker compose down` fail due to hang container, heavy load etc. Need to remove such containers to release resources(for example, listen ports). sudo docker compose -f docker/docker-compose.yml -p ${COMPOSE_PROJECT_NAME} down -v || true sudo docker ps -a --filter "label=com.docker.compose.project=${COMPOSE_PROJECT_NAME}" -q | xargs -r sudo docker rm -f if [[ -n ${RAGFLOW_IMAGE} ]]; then sudo docker rmi -f ${RAGFLOW_IMAGE} fi if [[ -n ${PORT_RESERVATION:-} ]]; then rm -f "${PORT_RESERVATION}" fi ragflow_tests_elasticsearch: name: ragflow_tests_elasticsearch needs: ragflow_preflight if: ${{ github.event_name != 'pull_request' || (github.event.pull_request.draft == false && contains(github.event.pull_request.labels.*.name, 'ci') && (github.event.action != 'labeled' || github.event.label.name == 'ci')) }} runs-on: [ "self-hosted", "ragflow-test" ] env: DOC_ENGINE: elasticsearch RAGFLOW_IMAGE: infiniflow/ragflow:${{ github.run_id }}-elasticsearch HTTP_API_TEST_LEVEL: ${{ needs.ragflow_preflight.outputs.http_api_test_level }} steps: - name: Ensure workspace ownership run: | echo "Workflow triggered by ${{ github.event_name }}" echo "chown -R ${USER} ${GITHUB_WORKSPACE}" && sudo chown -R ${USER} ${GITHUB_WORKSPACE} - name: Check out code uses: actions/checkout@v6 with: ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && format('refs/pull/{0}/merge', github.event.pull_request.number) || github.sha }} fetch-depth: 0 fetch-tags: true - name: Build ragflow go server run: | set -euo pipefail BUILDER_CONTAINER=ragflow_build_${GITHUB_RUN_ID}_${DOC_ENGINE}_$(od -An -N4 -tx4 /dev/urandom | tr -d ' ') cleanup_builder() { if [[ -n "${BUILDER_CONTAINER:-}" ]]; then sudo docker rm -f -v "${BUILDER_CONTAINER}" >/dev/null 2>&1 || true fi } trap cleanup_builder EXIT TZ=${TZ:-$(readlink -f /etc/localtime | awk -F '/zoneinfo/' '{print $2}')} sudo docker run --privileged -d --name "${BUILDER_CONTAINER}" \ -e TZ="${TZ}" \ -e UV_INDEX=https://mirrors.aliyun.com/pypi/simple \ -v "${PWD}:/ragflow" \ -v "${PWD}/internal/cpp/resource:/usr/share/infinity/resource" \ infiniflow/infinity_builder:ubuntu22_clang20 sudo docker exec "${BUILDER_CONTAINER}" bash -c 'git config --global safe.directory "*" && cd /ragflow && ./build.sh --cpp' ./build.sh --go - name: Run Go unit tests # Runs after `./build.sh --go`, which guarantees the C++ static # library (librag_tokenizer_c_api.a) is present on disk. The Go # test binaries link against it transitively through # `internal/binding`, so running `go test` before the C++ build # fails the link step. # # Excludes packages whose tests fail for environmental reasons # unrelated to the diff: # - internal/storage: TestMinioStorage_* needs a MinIO server # at localhost:9000; not started by this job. # - internal/tokenizer: tests need /usr/share/infinity/resource # dict files, only mounted inside the docker builder, not # in the Go test environment. # - internal/handler: TestListAgentVersionsHandler_Success and # sqlite setup (e.g. "no such table: user_tenant") are # pre-existing flakes unrelated to the diff. run: | set -euo pipefail PKGS=$(go list ./... 2>/dev/null \ | grep -v '/internal/storage$' \ | grep -v '/internal/tokenizer$' \ | grep -v '/internal/handler$' \ | grep -v '/internal/deepdoc/parser/pdf/pdfium' \ | grep -v '/internal/deepdoc/parser/pdf/pdfoxide' \ | grep -v '/internal/deepdoc/parser/pdf' || true) if [ -z "$PKGS" ]; then ./build.sh --test else ./build.sh --test -- $PKGS fi - name: Build ragflow:nightly run: | set -euo pipefail sudo docker pull ubuntu:24.04 sudo DOCKER_BUILDKIT=1 docker build --build-arg NEED_MIRROR=1 --build-arg HTTPS_PROXY=${HTTPS_PROXY} --build-arg HTTP_PROXY=${HTTP_PROXY} -f Dockerfile -t ${RAGFLOW_IMAGE} . - name: Prepare Python test environment run: | uv sync --python 3.13 --group test --frozen uv pip install -e sdk/python - name: Prepare function test environment working-directory: docker run: | set -euo pipefail # install ss sudo apt update && sudo apt install -y iproute2 RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-${HOME}} COMPOSE_PROJECT_NAME="${GITHUB_RUN_ID}-${DOC_ENGINE}" echo "COMPOSE_PROJECT_NAME=${COMPOSE_PROJECT_NAME}" >> ${GITHUB_ENV} echo "RAGFLOW_CONTAINER=${COMPOSE_PROJECT_NAME}-ragflow-cpu-1" >> ${GITHUB_ENV} ARTIFACTS_DIR=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/${GITHUB_RUN_ID}/${DOC_ENGINE} echo "ARTIFACTS_DIR=${ARTIFACTS_DIR}" >> ${GITHUB_ENV} rm -rf "${ARTIFACTS_DIR}" && mkdir -p "${ARTIFACTS_DIR}" # Determine runner number (default to 1 if not found) RUNNER_NUM=$(sudo docker inspect $(hostname) --format '{{index .Config.Labels "com.docker.compose.container-number"}}' 2>/dev/null || true) RUNNER_NUM=${RUNNER_NUM:-1} # Engine-specific offset partitions keep concurrent engine jobs from # choosing the same host ports when they land on the same self-hosted runner. # A lock plus reservation file closes the check/start race between parallel jobs. PORT_BASES=(1200 1201 23817 23820 5432 5455 9000 9001 6379 6380 6601 9380 9381 9382 9384 9383 9385 80 443 4222) PARTITION_SIZE=6000 case "${DOC_ENGINE}" in elasticsearch) PARTITION_BASE=1000 ;; infinity) PARTITION_BASE=31000 ;; *) echo "Unsupported DOC_ENGINE=${DOC_ENGINE}" >&2; exit 1 ;; esac PORT_LOCK_DIR=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/port-locks mkdir -p "${PORT_LOCK_DIR}" port_offset_available() { local offset=$1 local base port for base in "${PORT_BASES[@]}"; do port=$((base + offset)) if ss -ltnH "sport = :${port}" | grep -q .; then return 1 fi done return 0 } cleanup_stale_port_locks() { local now stale_after lock lock_ts now=$(date -u +%s) stale_after=$((6 * 60 * 60)) for lock in "${PORT_LOCK_DIR}"/*.lock; do [[ -e "${lock}" ]] || continue lock_ts=$(awk '{print $3}' "${lock}" 2>/dev/null || true) if [[ "${lock_ts}" =~ ^[0-9]+$ ]] && (( now - lock_ts > stale_after )); then rm -f "${lock}" fi done } reserve_port_offset() { local attempt candidate reservation cleanup_stale_port_locks for attempt in $(seq 0 59); do candidate=$(( PARTITION_BASE + ((GITHUB_RUN_ID + RUNNER_NUM * 1000 + attempt * 97) % PARTITION_SIZE) )) reservation="${PORT_LOCK_DIR}/${candidate}.lock" if ( set -o noclobber; echo "${GITHUB_RUN_ID} ${DOC_ENGINE} $(date -u +%s)" > "${reservation}" ) 2>/dev/null; then if port_offset_available "${candidate}"; then PORT_OFFSET=${candidate} PORT_RESERVATION=${reservation} return 0 fi rm -f "${reservation}" fi done return 1 } if ! reserve_port_offset; then echo "Failed to reserve a free host port range for ${DOC_ENGINE} docker compose" >&2 exit 1 fi echo "PORT_RESERVATION=${PORT_RESERVATION}" >> ${GITHUB_ENV} echo "Using ${DOC_ENGINE} host port offset ${PORT_OFFSET}" ES_PORT=$((1200 + PORT_OFFSET)) OS_PORT=$((1201 + PORT_OFFSET)) INFINITY_THRIFT_PORT=$((23817 + PORT_OFFSET)) INFINITY_HTTP_PORT=$((23820 + PORT_OFFSET)) INFINITY_PSQL_PORT=$((5432 + PORT_OFFSET)) EXPOSE_MYSQL_PORT=$((5455 + PORT_OFFSET)) MINIO_PORT=$((9000 + PORT_OFFSET)) MINIO_CONSOLE_PORT=$((9001 + PORT_OFFSET)) REDIS_PORT=$((6379 + PORT_OFFSET)) NATS_PORT=$((4222 + PORT_OFFSET)) TEI_PORT=$((6380 + PORT_OFFSET)) KIBANA_PORT=$((6601 + PORT_OFFSET)) SVR_HTTP_PORT=$((9380 + PORT_OFFSET)) ADMIN_SVR_HTTP_PORT=$((9381 + PORT_OFFSET)) SVR_MCP_PORT=$((9382 + PORT_OFFSET)) GO_HTTP_PORT=$((9384 + PORT_OFFSET)) GO_ADMIN_PORT=$((9383 + PORT_OFFSET)) SANDBOX_EXECUTOR_MANAGER_PORT=$((9385 + PORT_OFFSET)) SVR_WEB_HTTP_PORT=$((80 + PORT_OFFSET)) SVR_WEB_HTTPS_PORT=$((443 + PORT_OFFSET)) # Persist computed ports into .env so docker-compose uses the correct host bindings. # Remove previous CI overrides first; docker compose uses the last duplicate key. sed -i '/^ES_PORT=/d;/^OS_PORT=/d;/^INFINITY_THRIFT_PORT=/d;/^INFINITY_HTTP_PORT=/d;/^INFINITY_PSQL_PORT=/d;/^EXPOSE_MYSQL_PORT=/d;/^MINIO_PORT=/d;/^MINIO_CONSOLE_PORT=/d;/^REDIS_PORT=/d;/^TEI_PORT=/d;/^KIBANA_PORT=/d;/^SVR_HTTP_PORT=/d;/^ADMIN_SVR_HTTP_PORT=/d;/^SVR_MCP_PORT=/d;/^GO_HTTP_PORT=/d;/^GO_ADMIN_PORT=/d;/^SANDBOX_EXECUTOR_MANAGER_PORT=/d;/^SVR_WEB_HTTP_PORT=/d;/^SVR_WEB_HTTPS_PORT=/d;/^NATS_PORT=/d;/^COMPOSE_PROFILES=/d;/^TEI_MODEL=/d;/^RAGFLOW_IMAGE=/d;/^DOC_ENGINE=/d' .env { echo "" echo "ES_PORT=${ES_PORT}" echo "OS_PORT=${OS_PORT}" echo "INFINITY_THRIFT_PORT=${INFINITY_THRIFT_PORT}" echo "INFINITY_HTTP_PORT=${INFINITY_HTTP_PORT}" echo "INFINITY_PSQL_PORT=${INFINITY_PSQL_PORT}" echo "EXPOSE_MYSQL_PORT=${EXPOSE_MYSQL_PORT}" echo "MINIO_PORT=${MINIO_PORT}" echo "MINIO_CONSOLE_PORT=${MINIO_CONSOLE_PORT}" echo "REDIS_PORT=${REDIS_PORT}" echo "NATS_PORT=${NATS_PORT}" echo "TEI_PORT=${TEI_PORT}" echo "KIBANA_PORT=${KIBANA_PORT}" echo "SVR_HTTP_PORT=${SVR_HTTP_PORT}" echo "ADMIN_SVR_HTTP_PORT=${ADMIN_SVR_HTTP_PORT}" echo "SVR_MCP_PORT=${SVR_MCP_PORT}" echo "GO_HTTP_PORT=${GO_HTTP_PORT}" echo "GO_ADMIN_PORT=${GO_ADMIN_PORT}" echo "SANDBOX_EXECUTOR_MANAGER_PORT=${SANDBOX_EXECUTOR_MANAGER_PORT}" echo "SVR_WEB_HTTP_PORT=${SVR_WEB_HTTP_PORT}" echo "SVR_WEB_HTTPS_PORT=${SVR_WEB_HTTPS_PORT}" echo "COMPOSE_PROFILES=${DOC_ENGINE},cpu,tei-cpu,deepdoc" echo "TEI_MODEL=BAAI/bge-small-en-v1.5" echo "RAGFLOW_IMAGE=${RAGFLOW_IMAGE}" echo "DOC_ENGINE=${DOC_ENGINE}" } >> .env echo "HOST_ADDRESS=http://host.docker.internal:${SVR_HTTP_PORT}" >> ${GITHUB_ENV} # Patch entrypoint.sh for coverage sed -i '/"\$PY" api\/ragflow_server.py \${INIT_SUPERUSER_ARGS} &/c\ echo "Ensuring coverage is installed..."\n "$PY" -m pip install coverage -i https://mirrors.aliyun.com/pypi/simple\n export COVERAGE_FILE=/ragflow/logs/.coverage\n echo "Starting ragflow_server with coverage..."\n "$PY" -m coverage run --source=./api/apps --omit="*/tests/*,*/migrations/*" -a api/ragflow_server.py ${INIT_SUPERUSER_ARGS} &' ./entrypoint.sh - name: Start ragflow:nightly for Elasticsearch run: | sudo docker compose -f docker/docker-compose.yml -p ${COMPOSE_PROJECT_NAME} down -v || true sudo docker ps -a --filter "label=com.docker.compose.project=${COMPOSE_PROJECT_NAME}" -q | xargs -r sudo docker rm -f sudo docker compose -f docker/docker-compose.yml -p ${COMPOSE_PROJECT_NAME} up -d - name: Run sdk tests against Elasticsearch run: | export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" svc_ready=0 for i in $(seq 1 60); do if sudo docker exec ${RAGFLOW_CONTAINER} curl -sf --connect-timeout 5 "${HOST_ADDRESS}/api/v1/system/ping" > /dev/null 2>&1; then svc_ready=1 break fi echo "Waiting for service to be available... ($i/60)" sleep 5 done if [ "$svc_ready" -ne 1 ]; then echo "Service did not become ready after 5 minutes. Docker logs:" sudo docker logs ${RAGFLOW_CONTAINER} exit 1 fi echo "Start to run test sdk on Elasticsearch" source .venv/bin/activate && set -o pipefail; pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} --junitxml=pytest-es-sdk.xml --cov=sdk/python/ragflow_sdk --cov-branch --cov-report=xml:coverage-es-sdk.xml test/testcases/test_sdk_api 2>&1 | tee es_sdk_test.log - name: Run New RESTFUL api tests against Elasticsearch run: | export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" svc_ready=0 for i in $(seq 1 60); do if sudo docker exec ${RAGFLOW_CONTAINER} curl -sf --connect-timeout 5 "${HOST_ADDRESS}/api/v1/system/ping" > /dev/null 2>&1; then svc_ready=1 break fi echo "Waiting for service to be available... ($i/60)" sleep 5 done if [ "$svc_ready" -ne 1 ]; then echo "Service did not become ready after 5 minutes. Docker logs:" sudo docker logs ${RAGFLOW_CONTAINER} exit 1 fi source .venv/bin/activate && set -o pipefail; pytest -s --tb=short --level=${HTTP_API_TEST_LEVEL} test/testcases/restful_api 2>&1 | tee es_restful_api_test.log - name: RAGFlow CLI retrieval test Elasticsearch env: PYTHONPATH: ${{ github.workspace }} run: | set -euo pipefail source .venv/bin/activate export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" EMAIL="ci-${GITHUB_RUN_ID}@example.com" PASS="ci-pass-${GITHUB_RUN_ID}" DATASET="ci_dataset_${GITHUB_RUN_ID}" CLI="python admin/client/ragflow_cli.py" LOG_FILE="es_cli_test.log" : > "${LOG_FILE}" ERROR_RE='Traceback|ModuleNotFoundError|ImportError|Parse error|Bad response|Fail to|code:\\s*[1-9]' run_cli() { local logfile="$1" shift local allow_re="" if [[ "${1:-}" == "--allow" ]]; then allow_re="$2" shift 2 fi local cmd_display="$*" echo "===== $(date -u +\"%Y-%m-%dT%H:%M:%SZ\") CMD: ${cmd_display} =====" | tee -a "${logfile}" local tmp_log tmp_log="$(mktemp)" set +e timeout 500s "$@" 2>&1 | tee "${tmp_log}" local status=${PIPESTATUS[0]} set -e cat "${tmp_log}" >> "${logfile}" if grep -qiE "${ERROR_RE}" "${tmp_log}"; then if [[ -n "${allow_re}" ]] && grep -qiE "${allow_re}" "${tmp_log}"; then echo "Allowed CLI error markers in ${logfile}" rm -f "${tmp_log}" return 0 fi echo "Detected CLI error markers in ${logfile}" rm -f "${tmp_log}" exit 1 fi rm -f "${tmp_log}" return ${status} } set -a source docker/.env set +a HOST_ADDRESS="http://host.docker.internal:${SVR_HTTP_PORT}" USER_HOST="$(echo "${HOST_ADDRESS}" | sed -E 's#^https?://([^:/]+).*#\1#')" USER_PORT="${SVR_HTTP_PORT}" ADMIN_HOST="${USER_HOST}" ADMIN_PORT="${ADMIN_SVR_HTTP_PORT}" svc_ready=0 for i in $(seq 1 60); do if sudo docker exec ${RAGFLOW_CONTAINER} curl -sf --connect-timeout 5 "${HOST_ADDRESS}/api/v1/system/ping" > /dev/null 2>&1; then svc_ready=1 break fi echo "Waiting for service to be available... ($i/60)" sleep 5 done if [ "$svc_ready" -ne 1 ]; then echo "Service did not become ready after 5 minutes. Docker logs:" sudo docker logs ${RAGFLOW_CONTAINER} exit 1 fi admin_ready=0 for i in $(seq 1 30); do if run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "ping"; then admin_ready=1 break fi sleep 1 done if [[ "${admin_ready}" -ne 1 ]]; then echo "Admin service did not become ready" exit 1 fi run_cli "${LOG_FILE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "show version" ALLOW_USER_EXISTS_RE='already exists|already exist|duplicate|already.*registered|exist(s)?' run_cli "${LOG_FILE}" --allow "${ALLOW_USER_EXISTS_RE}" $CLI --type admin --host "$ADMIN_HOST" --port "$ADMIN_PORT" --username "admin@ragflow.io" --password "admin" command "create user '$EMAIL' '$PASS'" user_ready=0 for i in $(seq 1 30); do if run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "ping"; then user_ready=1 break fi sleep 1 done if [[ "${user_ready}" -ne 1 ]]; then echo "User service did not become ready" exit 1 fi run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "show version" run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "create dataset '$DATASET' with embedding 'BAAI/bge-small-en-v1.5@Builtin' parser 'auto'" run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "import 'test/benchmark/test_docs/Doc1.pdf,test/benchmark/test_docs/Doc2.pdf' into dataset '$DATASET'" run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "parse dataset '$DATASET' sync" run_cli "${LOG_FILE}" $CLI --type user --host "$USER_HOST" --port "$USER_PORT" --username "$EMAIL" --password "$PASS" command "Benchmark 16 100 search 'what are these documents about' on datasets '$DATASET'" - name: Stop ragflow to save coverage Elasticsearch if: ${{ !cancelled() }} run: | # Send SIGINT to ragflow_server.py to trigger coverage save PID=$(sudo docker exec ${RAGFLOW_CONTAINER} ps aux | grep "ragflow_server.py" | grep -v grep | awk '{print $2}' | head -n 1) if [ -n "$PID" ]; then echo "Sending SIGINT to ragflow_server.py (PID: $PID)..." sudo docker exec ${RAGFLOW_CONTAINER} kill -INT $PID # Wait for process to exit and coverage file to be written sleep 10 else echo "ragflow_server.py not found!" fi sudo docker compose -f docker/docker-compose.yml -p ${COMPOSE_PROJECT_NAME} stop - name: Generate server coverage report Elasticsearch if: ${{ !cancelled() }} run: | # .coverage file should be in docker/ragflow-logs/.coverage if [ -f docker/ragflow-logs/.coverage ]; then echo "Found .coverage file" cp docker/ragflow-logs/.coverage .coverage source .venv/bin/activate # Create .coveragerc to map container paths to host paths echo "[paths]" > .coveragerc echo "source =" >> .coveragerc echo " ." >> .coveragerc echo " /ragflow" >> .coveragerc coverage xml -o coverage-es-server.xml rm .coveragerc # Clean up for next run sudo rm docker/ragflow-logs/.coverage else echo ".coverage file not found!" fi - name: Collect ragflow log Elasticsearch if: ${{ !cancelled() }} run: | if [ -d docker/ragflow-logs ]; then cp -r docker/ragflow-logs ${ARTIFACTS_DIR}/ragflow-logs-es echo "ragflow log" && tail -n 200 docker/ragflow-logs/ragflow_server.log || true else echo "No docker/ragflow-logs directory found; skipping log collection" fi sudo rm -rf docker/ragflow-logs || true - name: Stop ragflow:nightly for Elasticsearch if: always() # always run this step even if previous steps failed run: | # Sometimes `docker compose down` fail due to hang container, heavy load etc. Need to remove such containers to release resources(for example, listen ports). sudo docker compose -f docker/docker-compose.yml -p ${COMPOSE_PROJECT_NAME} down -v || true sudo docker ps -a --filter "label=com.docker.compose.project=${COMPOSE_PROJECT_NAME}" -q | xargs -r sudo docker rm -f if [[ -n ${RAGFLOW_IMAGE} ]]; then sudo docker rmi -f ${RAGFLOW_IMAGE} fi if [[ -n ${PORT_RESERVATION:-} ]]; then rm -f "${PORT_RESERVATION}" fi