mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-05 19:08:38 +08:00
Fix: use .a to replace .so for pdfium/pdf_oxide/office_oxide (#16496)
### Summary Use .a to replace .so for pdfium/pdf_oxide/office_oxide --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
10
.github/workflows/tests.yml
vendored
10
.github/workflows/tests.yml
vendored
@@ -234,10 +234,7 @@ jobs:
|
||||
PKGS=$(go list ./... 2>/dev/null \
|
||||
| grep -v '/internal/storage$' \
|
||||
| grep -v '/internal/tokenizer$' \
|
||||
| grep -v '/internal/handler$' \
|
||||
| grep -v '/internal/deepdoc/parser/pdf/pdfium' \
|
||||
| grep -v '/internal/deepdoc/parser/pdf/pdfoxide' \
|
||||
| grep -v '/internal/deepdoc/parser/pdf' || true)
|
||||
| grep -v '/internal/handler$' || true)
|
||||
if [ -z "$PKGS" ]; then
|
||||
./build.sh --test
|
||||
else
|
||||
@@ -680,10 +677,7 @@ jobs:
|
||||
PKGS=$(go list ./... 2>/dev/null \
|
||||
| grep -v '/internal/storage$' \
|
||||
| grep -v '/internal/tokenizer$' \
|
||||
| grep -v '/internal/handler$' \
|
||||
| grep -v '/internal/deepdoc/parser/pdf/pdfium' \
|
||||
| grep -v '/internal/deepdoc/parser/pdf/pdfoxide' \
|
||||
| grep -v '/internal/deepdoc/parser/pdf' || true)
|
||||
| grep -v '/internal/handler$' || true)
|
||||
if [ -z "$PKGS" ]; then
|
||||
./build.sh --test
|
||||
else
|
||||
|
||||
324
build.sh
324
build.sh
@@ -22,17 +22,41 @@ RAGFLOW_CLI_BINARY="$PROJECT_ROOT/bin/ragflow-cli"
|
||||
# Strip symbols from Go binaries (set via --strip / -s)
|
||||
STRIP_SYMBOLS=""
|
||||
|
||||
# office_oxide native library settings
|
||||
OFFICE_OXIDE_PREFIX="${HOME}/.office_oxide"
|
||||
# Native static library settings. These are the user-cache paths (~/ragflow-native-libs/).
|
||||
# If /opt/ragflow-native-libs/ exists (pre-seeded in CI runner image), it takes priority
|
||||
# and skips the network (download_deps.py) fallback.
|
||||
SYSTEM_DEPS="/opt/ragflow-native-libs"
|
||||
|
||||
# office_oxide native library settings — static linking
|
||||
OFFICE_OXIDE_PREFIX="${HOME}/ragflow-native-libs/office_oxide"
|
||||
OFFICE_OXIDE_VERSION="0.1.2"
|
||||
|
||||
# pdfium native library settings (from pypdfium2_raw PyPI wheel)
|
||||
PDFIUM_PREFIX="${HOME}/.pdfium"
|
||||
PDFIUM_VERSION="0.5.0"
|
||||
# pdfium native library settings — static linking (kognitos/pdfium-static)
|
||||
PDFIUM_STATIC_PREFIX="${HOME}/ragflow-native-libs/pdfium-static"
|
||||
PDFIUM_STATIC_VERSION="7809"
|
||||
|
||||
# pdf_oxide native library settings (from GitHub Release)
|
||||
PDF_OXIDE_PREFIX="${HOME}/.pdf_oxide"
|
||||
PDF_OXIDE_VERSION="0.3.63"
|
||||
# pdf_oxide native library settings — static linking (go-ffi tarball)
|
||||
PDF_OXIDE_PREFIX="${HOME}/ragflow-native-libs/pdf_oxide"
|
||||
PDF_OXIDE_VERSION="0.3.67"
|
||||
|
||||
# Copy a dependency from the system pre-seed directory to the user cache.
|
||||
# Returns 0 if the dep was copied or already exists in cache, 1 otherwise.
|
||||
_seed_from_system() {
|
||||
local dep_name="$1" # e.g. "pdfium-static", "pdf_oxide", "office_oxide"
|
||||
local dep_dir="${HOME}/ragflow-native-libs/${dep_name}"
|
||||
local sys_dir="${SYSTEM_DEPS}/${dep_name}"
|
||||
|
||||
if [ -d "$dep_dir" ]; then
|
||||
return 0 # already cached
|
||||
fi
|
||||
if [ -d "$sys_dir" ]; then
|
||||
echo " ${dep_name} → ${sys_dir} (system)"
|
||||
mkdir -p "$(dirname "$dep_dir")"
|
||||
cp -r "$sys_dir" "$dep_dir"
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
echo -e "${GREEN}=== RAGFlow Go Server Build Script ===${NC}"
|
||||
|
||||
@@ -138,14 +162,9 @@ _download_and_extract() {
|
||||
# Check / install office_oxide native library (Rust → C FFI library)
|
||||
check_office_oxide_deps() {
|
||||
print_section "Checking office_oxide native library"
|
||||
_seed_from_system "office_oxide"
|
||||
|
||||
local lib_file header_path
|
||||
case "$(uname -s)" in
|
||||
Linux) lib_file="liboffice_oxide.so" ;;
|
||||
Darwin) lib_file="liboffice_oxide.dylib" ;;
|
||||
*) echo -e "${RED}Unsupported OS for office_oxide${NC}"; return 1 ;;
|
||||
esac
|
||||
|
||||
local lib_file="liboffice_oxide.a"
|
||||
local lib_path="${OFFICE_OXIDE_PREFIX}/lib/${lib_file}"
|
||||
local header_path="${OFFICE_OXIDE_PREFIX}/include/office_oxide_c/office_oxide.h"
|
||||
|
||||
@@ -154,177 +173,65 @@ check_office_oxide_deps() {
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "office_oxide native library not found. Installing..."
|
||||
|
||||
# Map platform to the release asset name. Note: the GitHub release archives
|
||||
# omit the version number from the native-* asset filenames.
|
||||
local asset_name
|
||||
case "$(uname -s)" in
|
||||
Linux)
|
||||
case "$(uname -m)" in
|
||||
x86_64) asset_name="native-linux-x86_64" ;;
|
||||
aarch64|arm64) asset_name="native-linux-aarch64" ;;
|
||||
*) echo -e "${RED}Unsupported arch: $(uname -m)${NC}"; return 1 ;;
|
||||
esac
|
||||
;;
|
||||
Darwin)
|
||||
case "$(uname -m)" in
|
||||
x86_64) asset_name="native-macos-x86_64" ;;
|
||||
aarch64|arm64) asset_name="native-macos-aarch64" ;;
|
||||
*) echo -e "${RED}Unsupported arch: $(uname -m)${NC}"; return 1 ;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
|
||||
local release_url="https://github.com/yfedoseev/office_oxide/releases/download/v${OFFICE_OXIDE_VERSION}/${asset_name}.tar.gz"
|
||||
|
||||
mkdir -p "${OFFICE_OXIDE_PREFIX}"
|
||||
_download_and_extract "$release_url" "${OFFICE_OXIDE_PREFIX}"
|
||||
|
||||
if [ ! -f "$lib_path" ]; then
|
||||
echo -e "${YELLOW}Warning: Failed to install office_oxide native library (missing ${lib_path})${NC}"
|
||||
echo " Try: curl -fsSL ${release_url} | tar xzf - -C ${OFFICE_OXIDE_PREFIX}"
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}✓ office_oxide native library installed${NC}"
|
||||
echo -e "${RED}Error: office_oxide native library not found${NC}"
|
||||
echo " Expected: ${lib_path}"
|
||||
echo " Run: uv run download_deps.py"
|
||||
echo " Or manually download: https://github.com/yfedoseev/office_oxide/releases/download/v${OFFICE_OXIDE_VERSION}/native-linux-x86_64.tar.gz"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Check / install pdfium native library (libpdfium.so from pypdfium2_raw wheel).
|
||||
# Check pdfium static library (must be pre-installed via download_deps.py or CI image).
|
||||
check_pdfium_deps() {
|
||||
# 1. Check .venv (uv sync provides pypdfium2_raw).
|
||||
local venv_py="${PROJECT_ROOT}/.venv/bin/python3"
|
||||
if [ -x "$venv_py" ]; then
|
||||
local venv_so=$("$venv_py" -c "import pypdfium2_raw,os;print(os.path.join(os.path.dirname(pypdfium2_raw.__file__),'libpdfium.so'))" 2>/dev/null)
|
||||
if [ -n "$venv_so" ] && [ -f "$venv_so" ]; then
|
||||
echo " pdfium → ${venv_so} (.venv)"
|
||||
export CGO_LDFLAGS="$CGO_LDFLAGS -L$(dirname "$venv_so") -Wl,-rpath,$(dirname "$venv_so")"
|
||||
export LD_LIBRARY_PATH="$(dirname "$venv_so"):${LD_LIBRARY_PATH}"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
_seed_from_system "pdfium-static"
|
||||
local lib_path="${PDFIUM_STATIC_PREFIX}/lib/libpdfium.a"
|
||||
|
||||
# 2. Check cache.
|
||||
local lib_path="${PDFIUM_PREFIX}/libpdfium.so"
|
||||
if [ -f "$lib_path" ]; then
|
||||
echo " pdfium → ${PDFIUM_PREFIX}"
|
||||
echo " pdfium (static) → ${PDFIUM_STATIC_PREFIX}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo " pdfium not found, installing..."
|
||||
|
||||
# 3. Map platform to PyPI wheel platform tag.
|
||||
local whl_platform
|
||||
case "$(uname -s)" in
|
||||
Linux)
|
||||
case "$(uname -m)" in
|
||||
x86_64) whl_platform="manylinux_2_17_x86_64.manylinux2014_x86_64" ;;
|
||||
aarch64|arm64) whl_platform="manylinux_2_17_aarch64.manylinux2014_aarch64" ;;
|
||||
*) echo " pdfium → unsupported arch"; return 1 ;;
|
||||
esac
|
||||
;;
|
||||
Darwin)
|
||||
case "$(uname -m)" in
|
||||
x86_64) whl_platform="macosx_11_0_x86_64" ;;
|
||||
arm64) whl_platform="macosx_11_0_arm64" ;;
|
||||
*) echo " pdfium → unsupported arch"; return 1 ;;
|
||||
esac
|
||||
;;
|
||||
*) echo " pdfium → unsupported OS"; return 1 ;;
|
||||
esac
|
||||
|
||||
# 4. Download .whl from PyPI and extract libpdfium.so (zero pip dependency).
|
||||
local whl_url
|
||||
whl_url=$(curl -fsSL "https://pypi.org/pypi/pypdfium2_raw/${PDFIUM_VERSION}/json" 2>/dev/null \
|
||||
| grep -o '"url":"[^"]*'${whl_platform}'[^"]*"' | head -1 | cut -d'"' -f4)
|
||||
|
||||
if [ -n "$whl_url" ] && { command -v curl >/dev/null 2>&1 || command -v wget >/dev/null 2>&1; }; then
|
||||
local tmp_whl="$(mktemp)"
|
||||
if command -v curl >/dev/null 2>&1; then
|
||||
curl -fsSL "$whl_url" -o "$tmp_whl"
|
||||
else
|
||||
wget -q "$whl_url" -O "$tmp_whl"
|
||||
fi
|
||||
mkdir -p "${PDFIUM_PREFIX}"
|
||||
# Wheel is a zip; extract libpdfium.so via python3 or unzip.
|
||||
if command -v python3 >/dev/null 2>&1; then
|
||||
python3 -c "
|
||||
import zipfile, os, shutil
|
||||
with zipfile.ZipFile('$tmp_whl') as z:
|
||||
for n in z.namelist():
|
||||
if n.endswith('libpdfium.so'):
|
||||
z.extract(n, '${PDFIUM_PREFIX}')
|
||||
os.rename(os.path.join('${PDFIUM_PREFIX}', n), '$lib_path')
|
||||
# Remove empty pypdfium2_raw dir
|
||||
d = os.path.join('${PDFIUM_PREFIX}', 'pypdfium2_raw')
|
||||
if os.path.isdir(d): shutil.rmtree(d, ignore_errors=True)
|
||||
break
|
||||
" 2>/dev/null
|
||||
elif command -v unzip >/dev/null 2>&1; then
|
||||
unzip -q -o "$tmp_whl" -d "${PDFIUM_PREFIX}" 'pypdfium2_raw/libpdfium.so' 2>/dev/null
|
||||
[ -f "${PDFIUM_PREFIX}/pypdfium2_raw/libpdfium.so" ] && mv "${PDFIUM_PREFIX}/pypdfium2_raw/libpdfium.so" "$lib_path"
|
||||
rm -rf "${PDFIUM_PREFIX}/pypdfium2_raw"
|
||||
fi
|
||||
rm -f "$tmp_whl"
|
||||
fi
|
||||
|
||||
if [ -f "$lib_path" ]; then
|
||||
echo -e "${GREEN}✓ pdfium installed to ${PDFIUM_PREFIX}${NC}"
|
||||
else
|
||||
echo " pdfium → install failed (requires .venv, curl/wget + python3, or pre-cached ~/.pdfium)"
|
||||
return 1
|
||||
fi
|
||||
echo " pdfium (static) not found"
|
||||
echo " Expected: ${lib_path}"
|
||||
echo " Run: uv run download_deps.py"
|
||||
echo " Or: curl -fsSL https://github.com/kognitos/pdfium-static/releases/download/chromium%2F${PDFIUM_STATIC_VERSION}/pdfium-linux-x64-static.tgz | tar xz -C ${PDFIUM_STATIC_PREFIX}"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Check / install pdf_oxide native library (Rust -> C FFI library).
|
||||
# Check / install pdf_oxide static library (go-ffi tarball from GitHub Release).
|
||||
check_pdf_oxide_deps() {
|
||||
local lib_path="${PDF_OXIDE_PREFIX}/libpdf_oxide.so"
|
||||
|
||||
if [ -f "$lib_path" ]; then
|
||||
echo " pdf_oxide → ${PDF_OXIDE_PREFIX} (shared)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Also check for static library (user's local installation).
|
||||
local static_path="${PDF_OXIDE_PREFIX}/libpdf_oxide.a"
|
||||
if [ -f "$static_path" ]; then
|
||||
echo " pdf_oxide → ${PDF_OXIDE_PREFIX} (static)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo " pdf_oxide not found, installing..."
|
||||
|
||||
# Map platform to the release asset name.
|
||||
local asset_name
|
||||
_seed_from_system "pdf_oxide"
|
||||
# Map platform to tarball-internal subdirectory.
|
||||
local platform_subdir
|
||||
case "$(uname -s)" in
|
||||
Linux)
|
||||
case "$(uname -m)" in
|
||||
x86_64) asset_name="libpdf_oxide-v${PDF_OXIDE_VERSION}-linux-x86_64" ;;
|
||||
aarch64|arm64) asset_name="libpdf_oxide-v${PDF_OXIDE_VERSION}-linux-aarch64" ;;
|
||||
*) echo " pdf_oxide → unsupported arch"; return 1 ;;
|
||||
x86_64) platform_subdir="linux_amd64" ;;
|
||||
aarch64|arm64) platform_subdir="linux_arm64" ;;
|
||||
*) echo " pdf_oxide (static) → unsupported arch"; return 1 ;;
|
||||
esac
|
||||
;;
|
||||
Darwin)
|
||||
case "$(uname -m)" in
|
||||
x86_64) asset_name="libpdf_oxide-v${PDF_OXIDE_VERSION}-darwin-x86_64" ;;
|
||||
arm64) asset_name="libpdf_oxide-v${PDF_OXIDE_VERSION}-darwin-arm64" ;;
|
||||
*) echo " pdf_oxide → unsupported arch"; return 1 ;;
|
||||
x86_64) platform_subdir="darwin_amd64" ;;
|
||||
arm64) platform_subdir="darwin_arm64" ;;
|
||||
*) echo " pdf_oxide (static) → unsupported arch"; return 1 ;;
|
||||
esac
|
||||
;;
|
||||
*) echo " pdf_oxide → unsupported OS"; return 1 ;;
|
||||
*) echo " pdf_oxide (static) → unsupported OS"; return 1 ;;
|
||||
esac
|
||||
|
||||
local release_url="https://github.com/yfedoseev/pdf_oxide/releases/download/v${PDF_OXIDE_VERSION}/${asset_name}.tar.gz"
|
||||
|
||||
mkdir -p "${PDF_OXIDE_PREFIX}"
|
||||
_download_and_extract "$release_url" "${PDF_OXIDE_PREFIX}"
|
||||
local lib_path="${PDF_OXIDE_PREFIX}/lib/${platform_subdir}/libpdf_oxide.a"
|
||||
|
||||
if [ -f "$lib_path" ]; then
|
||||
echo -e "${GREEN}✓ pdf_oxide installed to ${PDF_OXIDE_PREFIX}${NC}"
|
||||
else
|
||||
echo " pdf_oxide → install failed"
|
||||
return 1
|
||||
echo " pdf_oxide (static) → ${PDF_OXIDE_PREFIX}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo " pdf_oxide (static) not found"
|
||||
echo " Expected: ${lib_path}"
|
||||
echo " Run: uv run download_deps.py"
|
||||
echo " Or: curl -fsSL https://github.com/yfedoseev/pdf_oxide/releases/download/v${PDF_OXIDE_VERSION}/pdf_oxide-go-ffi-linux-amd64.tar.gz | tar xz -C ${PDF_OXIDE_PREFIX}"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Build C++ static library
|
||||
@@ -405,7 +312,6 @@ build_go() {
|
||||
eval "$install_cmd"
|
||||
fi
|
||||
|
||||
check_office_oxide_deps || true
|
||||
setup_cgo_env
|
||||
|
||||
local strip_flags=()
|
||||
@@ -446,44 +352,70 @@ build_go() {
|
||||
echo -e "${GREEN}✓ Go ingestor built successfully: $INGESTOR_BINARY${NC}"
|
||||
}
|
||||
|
||||
# Configure CGO flags for native libraries.
|
||||
# setup_cgo_env — base: -I and -L paths only, no -l flags (those live in
|
||||
# each package's own #cgo LDFLAGS pragma). Safe to call even when native
|
||||
# libs are absent — just skips the paths that don't exist.
|
||||
# setup_cgo_env_pdf — pdfium / pdf_oxide -L paths. Non-fatal when libs
|
||||
# are missing. Only called by run_go_tests.
|
||||
# Configure CGO flags for native libraries (office_oxide, pdfium, pdf_oxide).
|
||||
# All three are statically linked — no LD_LIBRARY_PATH or -Wl,-rpath needed.
|
||||
setup_cgo_env() {
|
||||
# ── office_oxide (header + search path only, no -loffice_oxide) ───
|
||||
if [ -f "${OFFICE_OXIDE_PREFIX}/include/office_oxide_c/office_oxide.h" ]; then
|
||||
export CGO_CFLAGS="-I${OFFICE_OXIDE_PREFIX}/include/office_oxide_c${CGO_CFLAGS:+ $CGO_CFLAGS}"
|
||||
fi
|
||||
if [ -f "${OFFICE_OXIDE_PREFIX}/lib/liboffice_oxide.so" ] || [ -f "${OFFICE_OXIDE_PREFIX}/lib/liboffice_oxide.dylib" ]; then
|
||||
export CGO_LDFLAGS="-L${OFFICE_OXIDE_PREFIX}/lib${CGO_LDFLAGS:+ $CGO_LDFLAGS}"
|
||||
export LD_LIBRARY_PATH="${OFFICE_OXIDE_PREFIX}/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
|
||||
fi
|
||||
# ── office_oxide ──────────────────────────────────────────────────
|
||||
check_office_oxide_deps
|
||||
export CGO_CFLAGS="-I${OFFICE_OXIDE_PREFIX}/include/office_oxide_c${CGO_CFLAGS:+ $CGO_CFLAGS}"
|
||||
export CGO_LDFLAGS="${OFFICE_OXIDE_PREFIX}/lib/liboffice_oxide.a"
|
||||
|
||||
echo "CGO_CFLAGS: $CGO_CFLAGS"
|
||||
echo "CGO_LDFLAGS: $CGO_LDFLAGS"
|
||||
}
|
||||
|
||||
setup_cgo_env_pdf() {
|
||||
# ── pdfium ────────────────────────────────────────────────────────
|
||||
check_pdfium_deps || true
|
||||
if [ -f "${PDFIUM_PREFIX}/libpdfium.so" ]; then
|
||||
export CGO_LDFLAGS="$CGO_LDFLAGS -L${PDFIUM_PREFIX}"
|
||||
export LD_LIBRARY_PATH="${PDFIUM_PREFIX}:${LD_LIBRARY_PATH}"
|
||||
check_pdfium_deps || return 1
|
||||
export CGO_LDFLAGS="$CGO_LDFLAGS ${PDFIUM_STATIC_PREFIX}/lib/libpdfium.a"
|
||||
# Linux: Chromium-built objects use Clang's .eh_frame format which GNU ld
|
||||
# cannot merge. Use lld (LLVM linker) which handles them correctly.
|
||||
# --allow-multiple-definition: pdf_oxide and office_oxide are both Rust
|
||||
# staticlibs that embed the Rust runtime; linking them together produces
|
||||
# duplicate rust_eh_personality symbols.
|
||||
if [ "$(uname -s)" = "Linux" ]; then
|
||||
if ! command -v ld.lld >/dev/null 2>&1; then
|
||||
echo -e "${RED}Error: ld.lld not found. Install with: sudo apt install lld-20${NC}"
|
||||
echo " lld is required to static-link Chromium-built pdfium (.eh_frame format)"
|
||||
return 1
|
||||
fi
|
||||
export CGO_LDFLAGS="$CGO_LDFLAGS \
|
||||
${PDFIUM_STATIC_PREFIX}/lib/libc++.a \
|
||||
${PDFIUM_STATIC_PREFIX}/lib/libc++abi.a \
|
||||
-fuse-ld=lld -Wl,--allow-multiple-definition"
|
||||
fi
|
||||
|
||||
# ── pdf_oxide ─────────────────────────────────────────────────────
|
||||
check_pdf_oxide_deps || true
|
||||
if [ -f "${PDF_OXIDE_PREFIX}/libpdf_oxide.so" ]; then
|
||||
export CGO_LDFLAGS="$CGO_LDFLAGS -L${PDF_OXIDE_PREFIX}"
|
||||
export LD_LIBRARY_PATH="${PDF_OXIDE_PREFIX}:${LD_LIBRARY_PATH}"
|
||||
elif [ -f "${PDF_OXIDE_PREFIX}/libpdf_oxide.a" ]; then
|
||||
export CGO_LDFLAGS="$CGO_LDFLAGS ${PDF_OXIDE_PREFIX}/libpdf_oxide.a"
|
||||
fi
|
||||
check_pdf_oxide_deps || return 1
|
||||
# The go-ffi tarball places the .a under lib/<platform_subdir>/.
|
||||
local pdf_oxide_subdir
|
||||
case "$(uname -s)" in
|
||||
Linux)
|
||||
case "$(uname -m)" in
|
||||
x86_64) pdf_oxide_subdir="linux_amd64" ;;
|
||||
aarch64|arm64) pdf_oxide_subdir="linux_arm64" ;;
|
||||
*) echo "pdf_oxide: unsupported arch"; return 1 ;;
|
||||
esac
|
||||
;;
|
||||
Darwin)
|
||||
case "$(uname -m)" in
|
||||
x86_64) pdf_oxide_subdir="darwin_amd64" ;;
|
||||
arm64) pdf_oxide_subdir="darwin_arm64" ;;
|
||||
*) echo "pdf_oxide: unsupported arch"; return 1 ;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
export CGO_LDFLAGS="$CGO_LDFLAGS ${PDF_OXIDE_PREFIX}/lib/${pdf_oxide_subdir}/libpdf_oxide.a"
|
||||
|
||||
echo "CGO_LDFLAGS (with PDF): $CGO_LDFLAGS"
|
||||
# ── platform-specific system libraries ────────────────────────────
|
||||
case "$(uname -s)" in
|
||||
Linux)
|
||||
export CGO_LDFLAGS="$CGO_LDFLAGS -lm -lpthread -ldl -lrt -lgcc_s -lutil -lc"
|
||||
;;
|
||||
Darwin)
|
||||
export CGO_LDFLAGS="$CGO_LDFLAGS \
|
||||
-framework CoreFoundation -framework Security \
|
||||
-framework SystemConfiguration -liconv -lresolv"
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "CGO_CFLAGS: $CGO_CFLAGS"
|
||||
echo "CGO_LDFLAGS: $CGO_LDFLAGS"
|
||||
}
|
||||
|
||||
# Run Go unit tests with the same CGO env as `build_go`. Pass any extra args
|
||||
@@ -492,9 +424,7 @@ run_go_tests() {
|
||||
print_section "Running Go tests"
|
||||
|
||||
cd "$PROJECT_ROOT"
|
||||
check_office_oxide_deps || true
|
||||
setup_cgo_env
|
||||
setup_cgo_env_pdf
|
||||
|
||||
if [ "$#" -eq 0 ]; then
|
||||
set -- ./...
|
||||
@@ -534,10 +464,6 @@ run() {
|
||||
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# Set LD_LIBRARY_PATH for native libraries that were linked at build time.
|
||||
# Libraries are only in the search path when they were present during build.
|
||||
setup_cgo_env
|
||||
|
||||
# admin_server must be running before ragflow_server, otherwise ragflow_server's
|
||||
# heartbeats to admin will error out (see internal/development.md).
|
||||
print_section "Starting admin server (background)"
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
//go:build cgo
|
||||
|
||||
// Package pdfium renders PDF pages using the system's libpdfium.so
|
||||
// (bundled with pypdfium2). It exists solely to replace pdf_oxide's
|
||||
// Package pdfium renders PDF pages using libpdfium (statically linked
|
||||
// at build time via CGO_LDFLAGS). It exists solely to replace pdf_oxide's
|
||||
// RenderPageRaw for use cases where image quality matters for downstream
|
||||
// OCR/DLA — pdf_oxide still handles all text/char/table extraction.
|
||||
package pdfium
|
||||
|
||||
/*
|
||||
#cgo LDFLAGS: -lpdfium -lm -lpthread -ldl
|
||||
#cgo LDFLAGS: -lm -lpthread -ldl
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
@@ -27,12 +27,24 @@ docker compose -f docker/docker-compose-base.yml up -d
|
||||
./build.sh -s --go
|
||||
```
|
||||
|
||||
> **Note**: If you use IDEs like GoLand to run/debug directly (via Run/Debug buttons), or run `go build` / `go run` from command line, you must set the following two CGO environment variables in your run configuration or shell:
|
||||
> **Note**: If you use IDEs like GoLand to run/debug directly (via Run/Debug buttons), or run `go build` / `go run` from command line, run `./build.sh --go` first to download native dependencies. Then set the following CGO environment variables in your run configuration or shell:
|
||||
>
|
||||
> ```bash
|
||||
> export CGO_CFLAGS="-I${HOME}/.office_oxide/include/office_oxide_c"
|
||||
> export CGO_LDFLAGS="-L${HOME}/.office_oxide/lib -loffice_oxide -Wl,-rpath,${HOME}/.office_oxide/lib"
|
||||
> RAGFLOW_DEPS="${HOME}/ragflow-native-libs"
|
||||
> PLATFORM="linux_amd64" # or darwin_amd64, linux_arm64, darwin_arm64
|
||||
>
|
||||
> export CGO_CFLAGS="-I${RAGFLOW_DEPS}/office_oxide/include/office_oxide_c"
|
||||
> export CGO_LDFLAGS="\
|
||||
> ${RAGFLOW_DEPS}/office_oxide/lib/liboffice_oxide.a \
|
||||
> ${RAGFLOW_DEPS}/pdfium-static/lib/libpdfium.a \
|
||||
> ${RAGFLOW_DEPS}/pdfium-static/lib/libc++.a \
|
||||
> ${RAGFLOW_DEPS}/pdfium-static/lib/libc++abi.a \
|
||||
> ${RAGFLOW_DEPS}/pdf_oxide/lib/${PLATFORM}/libpdf_oxide.a \
|
||||
> -fuse-ld=lld \
|
||||
> -lm -lpthread -ldl -lrt -lgcc_s -lutil -lc"
|
||||
> ```
|
||||
>
|
||||
> All three native libraries are statically linked — no `LD_LIBRARY_PATH` or `-Wl,-rpath` needed.
|
||||
|
||||
## 3. Run Go Version RAGFlow
|
||||
Note: admin_server must be started first; otherwise, ragflow_server will encounter errors when sending heartbeats.
|
||||
@@ -85,7 +97,7 @@ Type \? for help, \q to quit
|
||||
RAGFlow(api/default)> REGISTER USER 'aaa@aaa.com' AS 'aaa' PASSWORD 'aaa';
|
||||
Register successfully
|
||||
RAGFlow(api/default)> login user 'aaa@aaa.com';
|
||||
password for aaa@aaa.com: Password:
|
||||
password for aaa@aaa.com: Password:
|
||||
Login user aaa@aaa.com successfully
|
||||
RAGFlow(api/default)> logout;
|
||||
SUCCESS
|
||||
|
||||
@@ -67,6 +67,15 @@ def get_urls(use_china_mirrors=False) -> list[Union[str, list[str]]]:
|
||||
# compatibility contract.
|
||||
"https://github.com/browserbase/stagehand/releases/download/stagehand-server-v3/v3.7.2/stagehand-server-v3-linux-x64",
|
||||
"https://github.com/browserbase/stagehand/releases/download/stagehand-server-v3/v3.7.2/stagehand-server-v3-linux-arm64",
|
||||
# Native static libraries for Go build (pdfium, pdf_oxide, office_oxide)
|
||||
# Used by build.sh's check_*_deps functions — pre-downloaded to avoid
|
||||
# network access during CI.
|
||||
["https://github.com/kognitos/pdfium-static/releases/download/chromium%2F7809/pdfium-linux-x64-static.tgz",
|
||||
"pdfium-linux-x64-static.tgz"],
|
||||
["https://github.com/yfedoseev/pdf_oxide/releases/download/v0.3.67/pdf_oxide-go-ffi-linux-amd64.tar.gz",
|
||||
"pdf_oxide-go-ffi-linux-amd64.tar.gz"],
|
||||
["https://github.com/yfedoseev/office_oxide/releases/download/v0.1.2/native-linux-x86_64.tar.gz",
|
||||
"office_oxide-linux-x86_64.tar.gz"],
|
||||
]
|
||||
else:
|
||||
return [
|
||||
@@ -95,6 +104,15 @@ def get_urls(use_china_mirrors=False) -> list[Union[str, list[str]]]:
|
||||
# compatibility contract.
|
||||
"https://github.com/browserbase/stagehand/releases/download/stagehand-server-v3/v3.7.2/stagehand-server-v3-linux-x64",
|
||||
"https://github.com/browserbase/stagehand/releases/download/stagehand-server-v3/v3.7.2/stagehand-server-v3-linux-arm64",
|
||||
# Native static libraries for Go build (pdfium, pdf_oxide, office_oxide)
|
||||
# Used by build.sh's check_*_deps functions — pre-downloaded to avoid
|
||||
# network access during CI.
|
||||
["https://github.com/kognitos/pdfium-static/releases/download/chromium%2F7809/pdfium-linux-x64-static.tgz",
|
||||
"pdfium-linux-x64-static.tgz"],
|
||||
["https://github.com/yfedoseev/pdf_oxide/releases/download/v0.3.67/pdf_oxide-go-ffi-linux-amd64.tar.gz",
|
||||
"pdf_oxide-go-ffi-linux-amd64.tar.gz"],
|
||||
["https://github.com/yfedoseev/office_oxide/releases/download/v0.1.2/native-linux-x86_64.tar.gz",
|
||||
"office_oxide-linux-x86_64.tar.gz"],
|
||||
]
|
||||
|
||||
|
||||
@@ -136,6 +154,29 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(filename):
|
||||
urllib.request.urlretrieve(download_url, filename)
|
||||
|
||||
# Extract native static libraries to ~/ragflow-native-libs for Go build.
|
||||
# Ensures build.sh can find them without network access.
|
||||
native_deps_dir = os.path.expanduser("~/ragflow-native-libs")
|
||||
extractions = [
|
||||
("pdfium-linux-x64-static.tgz", "pdfium-static"),
|
||||
("pdf_oxide-go-ffi-linux-amd64.tar.gz", "pdf_oxide"),
|
||||
("office_oxide-linux-x86_64.tar.gz", "office_oxide"),
|
||||
]
|
||||
import tarfile
|
||||
for archive, subdir in extractions:
|
||||
archive_path = os.path.join(os.getcwd(), archive)
|
||||
if not os.path.isfile(archive_path):
|
||||
print(f" Skipping extraction: {archive} not found")
|
||||
continue
|
||||
target = os.path.join(native_deps_dir, subdir)
|
||||
if os.path.isdir(target):
|
||||
print(f" ✓ {subdir} already extracted to {target}")
|
||||
continue
|
||||
os.makedirs(target, exist_ok=True)
|
||||
print(f" Extracting {archive} → {target}")
|
||||
with tarfile.open(archive_path) as tf:
|
||||
tf.extractall(target)
|
||||
|
||||
local_dir = os.path.abspath("nltk_data")
|
||||
for data in ["wordnet", "punkt", "punkt_tab"]:
|
||||
print(f"Downloading nltk {data}...")
|
||||
|
||||
Reference in New Issue
Block a user