mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-03 01:01:56 +08:00
Refactor: migrate pdf_parser.py to golang (#16323)
### What problem does this PR solve? Http API based on onnx model. pdf_parser.py to golang ### Type of change - [x] Refactoring
This commit is contained in:
12
docker/.env
12
docker/.env
@@ -25,7 +25,7 @@ DOC_ENGINE=${DOC_ENGINE:-elasticsearch}
|
||||
# - `gpu`
|
||||
DEVICE=${DEVICE:-cpu}
|
||||
|
||||
COMPOSE_PROFILES=${DOC_ENGINE},${DEVICE}
|
||||
COMPOSE_PROFILES=${DOC_ENGINE},${DEVICE},deepdoc
|
||||
|
||||
# The version of Elasticsearch.
|
||||
STACK_VERSION=${STACK_VERSION:-8.11.3}
|
||||
@@ -308,3 +308,13 @@ THREAD_POOL_MAX_WORKERS=128
|
||||
|
||||
#Option to disable login form for SSO
|
||||
DISABLE_PASSWORD_LOGIN=false
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# DeepDoc OSS Vision Service
|
||||
# -----------------------------------------------------------------------------
|
||||
# URL for the deepdoc vision API (DLA, OCR, TSR) served by OSS ONNX models.
|
||||
# The `deepdoc` service defined in docker-compose.yml provides this endpoint.
|
||||
# When unset, the parser falls back to inline ONNX Runtime inference.
|
||||
DEEPDOC_URL=http://deepdoc:9390
|
||||
# Docker image for the OSS deepdoc service. CPU-only; uses ONNX Runtime.
|
||||
DEEPDOC_IMAGE=deepdoc_oss:latest
|
||||
|
||||
@@ -89,6 +89,17 @@ The [.env](./.env) file contains important environment variables for Docker.
|
||||
> - `RAGFLOW_IMAGE=swr.cn-north-4.myhuaweicloud.com/infiniflow/ragflow:nightly` or,
|
||||
> - `RAGFLOW_IMAGE=registry.cn-hangzhou.aliyuncs.com/infiniflow/ragflow:nightly`.
|
||||
|
||||
### DeepDoc Vision Service (OSS)
|
||||
|
||||
- `DEEPDOC_URL`
|
||||
URL for the deepdoc vision API serving DLA (layout analysis), OCR (text detection/recognition), and TSR (table structure recognition). The `deepdoc` service in `docker-compose.yml` provides this endpoint. Defaults to `http://deepdoc:9390`. When unset, the parser falls back to inline ONNX Runtime inference.
|
||||
|
||||
> The OSS deepdoc service runs on CPU using ONNX Runtime models. No GPU required.
|
||||
> API endpoints: `GET /health`, `GET /model`, `POST /predict/dla`, `POST /predict/tsr`, `POST /predict/ocr`.
|
||||
|
||||
- `DEEPDOC_IMAGE`
|
||||
Docker image for the OSS deepdoc service. Defaults to `infiniflow/deepdoc_oss:latest`.
|
||||
|
||||
### Timezone
|
||||
|
||||
- `TZ`
|
||||
@@ -167,6 +178,13 @@ Before setting `DOC_ENGINE=oceanbase`, make sure the host OS allows the file des
|
||||
- `host`: The API server's IP address inside the Docker container. Defaults to `0.0.0.0`.
|
||||
- `port`: The API server's serving port inside the Docker container. Defaults to `9380`.
|
||||
|
||||
- `deepdoc`
|
||||
The OSS DeepDoc vision service provides DLA, OCR, and TSR inference via ONNX Runtime.
|
||||
Defined in `docker-compose.yml`, it is started automatically as a dependency of `ragflow-cpu` and `ragflow-gpu`.
|
||||
- `image`: Docker image. Defaults to `infiniflow/deepdoc_oss:latest`.
|
||||
- `port`: Serving port inside the container. Defaults to `9390`.
|
||||
- Health check: `curl -f http://localhost:9390/health` every 10s.
|
||||
|
||||
- `mysql`
|
||||
- `name`: The MySQL database name. Defaults to `rag_flow`.
|
||||
- `user`: The username for MySQL.
|
||||
|
||||
@@ -2,10 +2,28 @@ include:
|
||||
- ./docker-compose-base.yml
|
||||
# To ensure that the container processes the locally modified `service_conf.yaml.template` instead of the one included in its image, you need to mount the local `service_conf.yaml.template` to the container.
|
||||
services:
|
||||
deepdoc:
|
||||
image: ${DEEPDOC_IMAGE:-deepdoc_oss:latest}
|
||||
profiles:
|
||||
- deepdoc
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: Dockerfile_deepdoc_oss
|
||||
networks:
|
||||
- ragflow
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9390/health"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 60
|
||||
|
||||
ragflow-cpu:
|
||||
depends_on:
|
||||
mysql:
|
||||
condition: service_healthy
|
||||
deepdoc:
|
||||
condition: service_healthy
|
||||
profiles:
|
||||
- cpu
|
||||
image: ${RAGFLOW_IMAGE}
|
||||
@@ -57,6 +75,8 @@ services:
|
||||
depends_on:
|
||||
mysql:
|
||||
condition: service_healthy
|
||||
deepdoc:
|
||||
condition: service_healthy
|
||||
profiles:
|
||||
- gpu
|
||||
image: ${RAGFLOW_IMAGE}
|
||||
|
||||
Reference in New Issue
Block a user