2025-11-06 09:36:38 +08:00
|
|
|
#
|
|
|
|
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
|
|
|
|
#
|
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
|
#
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
#
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
|
# limitations under the License.
|
|
|
|
|
#
|
|
|
|
|
import os
|
|
|
|
|
import json
|
|
|
|
|
import secrets
|
|
|
|
|
import logging
|
2026-05-07 10:10:02 +08:00
|
|
|
from datetime import date
|
|
|
|
|
|
2025-11-06 09:36:38 +08:00
|
|
|
from common.constants import RAG_FLOW_SERVICE_NAME
|
|
|
|
|
from common.file_utils import get_project_base_directory
|
|
|
|
|
from common.config_utils import get_base_config, decrypt_database_config
|
|
|
|
|
from common.misc_utils import pip_install_torch
|
|
|
|
|
from common.constants import SVR_QUEUE_NAME, Storage
|
|
|
|
|
|
|
|
|
|
import rag.utils
|
|
|
|
|
import rag.utils.es_conn
|
|
|
|
|
import rag.utils.infinity_conn
|
2025-11-20 10:00:14 +08:00
|
|
|
import rag.utils.ob_conn
|
2025-11-06 09:36:38 +08:00
|
|
|
import rag.utils.opensearch_conn
|
|
|
|
|
from rag.utils.azure_sas_conn import RAGFlowAzureSasBlob
|
|
|
|
|
from rag.utils.azure_spn_conn import RAGFlowAzureSpnBlob
|
2025-12-04 09:44:05 +07:00
|
|
|
from rag.utils.gcs_conn import RAGFlowGCS
|
2025-11-06 09:36:38 +08:00
|
|
|
from rag.utils.minio_conn import RAGFlowMinio
|
|
|
|
|
from rag.utils.opendal_conn import OpenDALStorage
|
2026-03-04 13:07:45 +08:00
|
|
|
from rag.utils.redis_conn import REDIS_CONN
|
2025-11-06 09:36:38 +08:00
|
|
|
from rag.utils.s3_conn import RAGFlowS3
|
|
|
|
|
from rag.utils.oss_conn import RAGFlowOSS
|
|
|
|
|
|
|
|
|
|
from rag.nlp import search
|
|
|
|
|
|
2025-12-25 21:18:13 +08:00
|
|
|
import memory.utils.es_conn as memory_es_conn
|
|
|
|
|
import memory.utils.infinity_conn as memory_infinity_conn
|
2026-02-03 16:46:17 +08:00
|
|
|
import memory.utils.ob_conn as memory_ob_conn
|
2025-12-25 21:18:13 +08:00
|
|
|
|
2026-05-06 06:40:35 +00:00
|
|
|
TIMEZONE = os.getenv("TZ", "Asia/Shanghai")
|
|
|
|
|
|
2025-11-06 09:36:38 +08:00
|
|
|
LLM = None
|
|
|
|
|
LLM_FACTORY = None
|
|
|
|
|
LLM_BASE_URL = None
|
|
|
|
|
CHAT_MDL = ""
|
|
|
|
|
EMBEDDING_MDL = ""
|
|
|
|
|
RERANK_MDL = ""
|
|
|
|
|
ASR_MDL = ""
|
|
|
|
|
IMAGE2TEXT_MDL = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CHAT_CFG = ""
|
|
|
|
|
EMBEDDING_CFG = ""
|
|
|
|
|
RERANK_CFG = ""
|
|
|
|
|
ASR_CFG = ""
|
|
|
|
|
IMAGE2TEXT_CFG = ""
|
|
|
|
|
API_KEY = None
|
|
|
|
|
PARSERS = None
|
|
|
|
|
HOST_IP = None
|
|
|
|
|
HOST_PORT = None
|
|
|
|
|
SECRET_KEY = None
|
|
|
|
|
FACTORY_LLM_INFOS = None
|
|
|
|
|
ALLOWED_LLM_FACTORIES = None
|
|
|
|
|
|
|
|
|
|
DATABASE_TYPE = os.getenv("DB_TYPE", "mysql")
|
|
|
|
|
DATABASE = decrypt_database_config(name=DATABASE_TYPE)
|
|
|
|
|
|
|
|
|
|
# authentication
|
|
|
|
|
AUTHENTICATION_CONF = None
|
|
|
|
|
|
|
|
|
|
# client
|
|
|
|
|
CLIENT_AUTHENTICATION = None
|
|
|
|
|
HTTP_APP_KEY = None
|
|
|
|
|
GITHUB_OAUTH = None
|
|
|
|
|
FEISHU_OAUTH = None
|
|
|
|
|
OAUTH_CONFIG = None
|
|
|
|
|
DOC_ENGINE = os.getenv('DOC_ENGINE', 'elasticsearch')
|
2025-11-26 11:06:37 +08:00
|
|
|
DOC_ENGINE_INFINITY = (DOC_ENGINE.lower() == "infinity")
|
2026-01-31 15:11:54 +08:00
|
|
|
DOC_ENGINE_OCEANBASE = (DOC_ENGINE.lower() == "oceanbase")
|
2025-11-26 11:06:37 +08:00
|
|
|
|
2025-11-06 09:36:38 +08:00
|
|
|
|
|
|
|
|
docStoreConn = None
|
2025-12-25 21:18:13 +08:00
|
|
|
msgStoreConn = None
|
2025-11-06 09:36:38 +08:00
|
|
|
|
|
|
|
|
retriever = None
|
|
|
|
|
kg_retriever = None
|
|
|
|
|
|
|
|
|
|
# user registration switch
|
|
|
|
|
REGISTER_ENABLED = 1
|
|
|
|
|
|
feat: Add `disable_password_login` configuration to support SSO-only authentication (#13151)
### What problem does this PR solve?
Enterprise deployments that use an external Identity Provider (e.g.,
Microsoft Entra ID, Okta, Keycloak) need the ability to enforce SSO-only
authentication by hiding the email/password login form. Currently, the
login page always shows the password form alongside OAuth buttons, with
no way to disable it.
This PR adds a `disable_password_login` configuration option under the
existing `authentication` section in `service_conf.yaml`. When set to
`true`, the login page only displays configured OAuth/SSO buttons and
hides the email/password form, "Remember me" checkbox, and "Sign up"
link.
The flag can be set via:
- `service_conf.yaml` (`authentication.disable_password_login: true`)
- Environment variable (`DISABLE_PASSWORD_LOGIN=true`)
Default behavior is unchanged (`false`).
### Behavior
| `disable_password_login` | OAuth configured | Result |
|---|---|---|
| `false` (default) | No | Standard email/password form |
| `false` | Yes | Email/password form + SSO buttons below |
| `true` | Yes | **SSO buttons only** (no form, no sign up link) |
| `true` | No | Empty card (admin should configure OAuth first) |
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### Files changed (5)
1. `docker/service_conf.yaml.template` — added `disable_password_login:
false` under authentication
2. `common/settings.py` — added `DISABLE_PASSWORD_LOGIN` global variable
and loader in `init_settings()`
3. `common/config_utils.py` — fixed `TypeError` in `show_configs()` when
authentication section contains non-dict values (e.g., booleans)
4. `api/apps/system_app.py` — exposed `disablePasswordLogin` flag in
`/config` endpoint
5. `web/src/pages/login/index.tsx` — conditionally render password form
based on config flag; OAuth buttons always render when channels exist
---------
Co-authored-by: Ahmad Intisar <ahmadintisar@Ahmads-MacBook-M4-Pro.local>
2026-03-02 11:06:03 +05:00
|
|
|
# SSO-only mode: hide password login form
|
|
|
|
|
DISABLE_PASSWORD_LOGIN = False
|
2025-11-06 09:36:38 +08:00
|
|
|
|
|
|
|
|
# sandbox-executor-manager
|
|
|
|
|
SANDBOX_HOST = None
|
|
|
|
|
STRONG_TEST_COUNT = int(os.environ.get("STRONG_TEST_COUNT", "8"))
|
|
|
|
|
|
|
|
|
|
SMTP_CONF = None
|
|
|
|
|
MAIL_SERVER = ""
|
|
|
|
|
MAIL_PORT = 000
|
|
|
|
|
MAIL_USE_SSL = True
|
|
|
|
|
MAIL_USE_TLS = False
|
|
|
|
|
MAIL_USERNAME = ""
|
|
|
|
|
MAIL_PASSWORD = ""
|
|
|
|
|
MAIL_DEFAULT_SENDER = ()
|
|
|
|
|
MAIL_FRONTEND_URL = ""
|
|
|
|
|
|
|
|
|
|
# move from rag.settings
|
|
|
|
|
ES = {}
|
|
|
|
|
INFINITY = {}
|
|
|
|
|
AZURE = {}
|
|
|
|
|
S3 = {}
|
|
|
|
|
MINIO = {}
|
2025-11-20 10:00:14 +08:00
|
|
|
OB = {}
|
2025-11-06 09:36:38 +08:00
|
|
|
OSS = {}
|
|
|
|
|
OS = {}
|
2025-12-04 09:44:05 +07:00
|
|
|
GCS = {}
|
2025-11-06 09:36:38 +08:00
|
|
|
|
|
|
|
|
DOC_MAXIMUM_SIZE: int = 128 * 1024 * 1024
|
|
|
|
|
DOC_BULK_SIZE: int = 4
|
|
|
|
|
EMBEDDING_BATCH_SIZE: int = 16
|
|
|
|
|
|
|
|
|
|
PARALLEL_DEVICES: int = 0
|
|
|
|
|
|
|
|
|
|
STORAGE_IMPL_TYPE = os.getenv('STORAGE_IMPL', 'MINIO')
|
|
|
|
|
STORAGE_IMPL = None
|
|
|
|
|
|
2026-05-27 21:54:17 +08:00
|
|
|
def get_svr_queue_name(priority: int, suffix: str = "common") -> str:
|
|
|
|
|
"""
|
|
|
|
|
Generate queue name with two dimensions: priority and suffix.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
priority: Task priority (0=low, 1=high)
|
|
|
|
|
suffix: Task type suffix (common/resume/graphrag/raptor/mindmap)
|
|
|
|
|
Currently only "common" is used, other suffixes are reserved.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Queue name string
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
get_svr_queue_name(0, "common") -> "te.0.common"
|
|
|
|
|
get_svr_queue_name(1, "common") -> "te.1.common"
|
|
|
|
|
get_svr_queue_name(0) -> "te.0.common" # default suffix="common"
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
return f"{SVR_QUEUE_NAME}.{priority}.common"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_svr_queue_names(suffix:str):
|
|
|
|
|
"""Return queue names sorted by priority (high to low)."""
|
|
|
|
|
return [get_svr_queue_name(priority, suffix) for priority in [1, 0]]
|
2025-11-06 09:36:38 +08:00
|
|
|
|
2026-05-07 10:10:02 +08:00
|
|
|
def init_secret_key():
|
|
|
|
|
secret_key = os.environ.get("RAGFLOW_SECRET_KEY")
|
|
|
|
|
if secret_key and len(secret_key) >= 32:
|
|
|
|
|
return secret_key
|
|
|
|
|
|
|
|
|
|
# Check if there's a configured secret key
|
|
|
|
|
configured_key = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("secret_key")
|
|
|
|
|
if configured_key and configured_key != str(date.today()) and len(configured_key) >= 32:
|
|
|
|
|
return configured_key
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_secret_key():
|
|
|
|
|
global SECRET_KEY
|
|
|
|
|
if SECRET_KEY is None:
|
|
|
|
|
return _get_or_create_secret_key()
|
|
|
|
|
return SECRET_KEY
|
|
|
|
|
|
2025-11-06 09:36:38 +08:00
|
|
|
def _get_or_create_secret_key():
|
2026-03-04 13:07:45 +08:00
|
|
|
# secret_key = os.environ.get("RAGFLOW_SECRET_KEY")
|
|
|
|
|
# if secret_key and len(secret_key) >= 32:
|
|
|
|
|
# return secret_key
|
|
|
|
|
#
|
|
|
|
|
# # Check if there's a configured secret key
|
|
|
|
|
# configured_key = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("secret_key")
|
|
|
|
|
# if configured_key and configured_key != str(date.today()) and len(configured_key) >= 32:
|
|
|
|
|
# return configured_key
|
2025-11-06 09:36:38 +08:00
|
|
|
|
|
|
|
|
# Generate a new secure key and warn about it
|
|
|
|
|
import logging
|
|
|
|
|
|
2026-03-04 13:07:45 +08:00
|
|
|
generated_key = secrets.token_hex(32)
|
|
|
|
|
secret_key = REDIS_CONN.get_or_create_secret_key("ragflow:system:secret_key", generated_key)
|
2026-05-07 17:14:22 +08:00
|
|
|
if generated_key == secret_key:
|
|
|
|
|
logging.warning("SECURITY WARNING: Using auto-generated SECRET_KEY.")
|
2026-03-04 13:07:45 +08:00
|
|
|
return secret_key
|
2025-11-06 09:36:38 +08:00
|
|
|
|
|
|
|
|
class StorageFactory:
|
|
|
|
|
storage_mapping = {
|
|
|
|
|
Storage.MINIO: RAGFlowMinio,
|
|
|
|
|
Storage.AZURE_SPN: RAGFlowAzureSpnBlob,
|
|
|
|
|
Storage.AZURE_SAS: RAGFlowAzureSasBlob,
|
|
|
|
|
Storage.AWS_S3: RAGFlowS3,
|
|
|
|
|
Storage.OSS: RAGFlowOSS,
|
2025-12-04 09:44:05 +07:00
|
|
|
Storage.OPENDAL: OpenDALStorage,
|
|
|
|
|
Storage.GCS: RAGFlowGCS,
|
2025-11-06 09:36:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def create(cls, storage: Storage):
|
|
|
|
|
return cls.storage_mapping[storage]()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def init_settings():
|
|
|
|
|
global DATABASE_TYPE, DATABASE
|
|
|
|
|
DATABASE_TYPE = os.getenv("DB_TYPE", "mysql")
|
|
|
|
|
DATABASE = decrypt_database_config(name=DATABASE_TYPE)
|
2025-12-30 15:09:52 +08:00
|
|
|
|
2025-11-06 09:36:38 +08:00
|
|
|
global ALLOWED_LLM_FACTORIES, LLM_FACTORY, LLM_BASE_URL
|
|
|
|
|
llm_settings = get_base_config("user_default_llm", {}) or {}
|
|
|
|
|
llm_default_models = llm_settings.get("default_models", {}) or {}
|
|
|
|
|
LLM_FACTORY = llm_settings.get("factory", "") or ""
|
|
|
|
|
LLM_BASE_URL = llm_settings.get("base_url", "") or ""
|
|
|
|
|
ALLOWED_LLM_FACTORIES = llm_settings.get("allowed_factories", None)
|
|
|
|
|
|
|
|
|
|
global REGISTER_ENABLED
|
|
|
|
|
try:
|
|
|
|
|
REGISTER_ENABLED = int(os.environ.get("REGISTER_ENABLED", "1"))
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
feat: Add `disable_password_login` configuration to support SSO-only authentication (#13151)
### What problem does this PR solve?
Enterprise deployments that use an external Identity Provider (e.g.,
Microsoft Entra ID, Okta, Keycloak) need the ability to enforce SSO-only
authentication by hiding the email/password login form. Currently, the
login page always shows the password form alongside OAuth buttons, with
no way to disable it.
This PR adds a `disable_password_login` configuration option under the
existing `authentication` section in `service_conf.yaml`. When set to
`true`, the login page only displays configured OAuth/SSO buttons and
hides the email/password form, "Remember me" checkbox, and "Sign up"
link.
The flag can be set via:
- `service_conf.yaml` (`authentication.disable_password_login: true`)
- Environment variable (`DISABLE_PASSWORD_LOGIN=true`)
Default behavior is unchanged (`false`).
### Behavior
| `disable_password_login` | OAuth configured | Result |
|---|---|---|
| `false` (default) | No | Standard email/password form |
| `false` | Yes | Email/password form + SSO buttons below |
| `true` | Yes | **SSO buttons only** (no form, no sign up link) |
| `true` | No | Empty card (admin should configure OAuth first) |
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### Files changed (5)
1. `docker/service_conf.yaml.template` — added `disable_password_login:
false` under authentication
2. `common/settings.py` — added `DISABLE_PASSWORD_LOGIN` global variable
and loader in `init_settings()`
3. `common/config_utils.py` — fixed `TypeError` in `show_configs()` when
authentication section contains non-dict values (e.g., booleans)
4. `api/apps/system_app.py` — exposed `disablePasswordLogin` flag in
`/config` endpoint
5. `web/src/pages/login/index.tsx` — conditionally render password form
based on config flag; OAuth buttons always render when channels exist
---------
Co-authored-by: Ahmad Intisar <ahmadintisar@Ahmads-MacBook-M4-Pro.local>
2026-03-02 11:06:03 +05:00
|
|
|
global DISABLE_PASSWORD_LOGIN
|
|
|
|
|
try:
|
|
|
|
|
env_val = os.environ.get("DISABLE_PASSWORD_LOGIN", "").lower()
|
|
|
|
|
if env_val in ("1", "true", "yes"):
|
|
|
|
|
DISABLE_PASSWORD_LOGIN = True
|
|
|
|
|
else:
|
|
|
|
|
authentication_conf = get_base_config("authentication", {})
|
|
|
|
|
DISABLE_PASSWORD_LOGIN = bool(authentication_conf.get("disable_password_login", False))
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
2025-11-06 09:36:38 +08:00
|
|
|
global FACTORY_LLM_INFOS
|
|
|
|
|
try:
|
|
|
|
|
with open(os.path.join(get_project_base_directory(), "conf", "llm_factories.json"), "r") as f:
|
|
|
|
|
FACTORY_LLM_INFOS = json.load(f)["factory_llm_infos"]
|
|
|
|
|
except Exception:
|
|
|
|
|
FACTORY_LLM_INFOS = []
|
|
|
|
|
|
|
|
|
|
global API_KEY
|
|
|
|
|
API_KEY = llm_settings.get("api_key")
|
|
|
|
|
|
|
|
|
|
global PARSERS
|
|
|
|
|
PARSERS = llm_settings.get(
|
|
|
|
|
"parsers", "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,email:Email,tag:Tag"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
global CHAT_MDL, EMBEDDING_MDL, RERANK_MDL, ASR_MDL, IMAGE2TEXT_MDL
|
|
|
|
|
chat_entry = _parse_model_entry(llm_default_models.get("chat_model", CHAT_MDL))
|
|
|
|
|
embedding_entry = _parse_model_entry(llm_default_models.get("embedding_model", EMBEDDING_MDL))
|
|
|
|
|
rerank_entry = _parse_model_entry(llm_default_models.get("rerank_model", RERANK_MDL))
|
|
|
|
|
asr_entry = _parse_model_entry(llm_default_models.get("asr_model", ASR_MDL))
|
|
|
|
|
image2text_entry = _parse_model_entry(llm_default_models.get("image2text_model", IMAGE2TEXT_MDL))
|
|
|
|
|
|
|
|
|
|
global CHAT_CFG, EMBEDDING_CFG, RERANK_CFG, ASR_CFG, IMAGE2TEXT_CFG
|
|
|
|
|
CHAT_CFG = _resolve_per_model_config(chat_entry, LLM_FACTORY, API_KEY, LLM_BASE_URL)
|
|
|
|
|
EMBEDDING_CFG = _resolve_per_model_config(embedding_entry, LLM_FACTORY, API_KEY, LLM_BASE_URL)
|
|
|
|
|
RERANK_CFG = _resolve_per_model_config(rerank_entry, LLM_FACTORY, API_KEY, LLM_BASE_URL)
|
|
|
|
|
ASR_CFG = _resolve_per_model_config(asr_entry, LLM_FACTORY, API_KEY, LLM_BASE_URL)
|
|
|
|
|
IMAGE2TEXT_CFG = _resolve_per_model_config(image2text_entry, LLM_FACTORY, API_KEY, LLM_BASE_URL)
|
|
|
|
|
|
|
|
|
|
CHAT_MDL = CHAT_CFG.get("model", "") or ""
|
fix: set default embedding model for TEI profile in Docker deployment (#11824)
## What's changed
fix: unify embedding model fallback logic for both TEI and non-TEI
Docker deployments
> This fix targets **Docker / `docker-compose` deployments**, ensuring a
valid default embedding model is always set—regardless of the compose
profile used.
## Changes
| Scenario | New Behavior |
|--------|--------------|
| **Non-`tei-` profile** (e.g., default deployment) | `EMBEDDING_MDL` is
now correctly initialized from `EMBEDDING_CFG` (derived from
`user_default_llm`), ensuring custom defaults like `bge-m3@Ollama` are
properly applied to new tenants. |
| **`tei-` profile** (`COMPOSE_PROFILES` contains `tei-`) | Still
respects the `TEI_MODEL` environment variable. If unset, falls back to
`EMBEDDING_CFG`. Only when both are empty does it use the built-in
default (`BAAI/bge-small-en-v1.5`), preventing an empty embedding model.
|
## Why This Change?
- **In non-TEI mode**: The previous logic would reset `EMBEDDING_MDL` to
an empty string, causing pre-configured defaults (e.g., `bge-m3@Ollama`
in the Docker image) to be ignored—leading to tenant initialization
failures or silent misconfigurations.
- **In TEI mode**: Users need the ability to override the model via
`TEI_MODEL`, but without a safe fallback, missing configuration could
break the system. The new logic adopts a **“config-first,
env-var-override”** strategy for robustness in containerized
environments.
## Implementation
- Updated the assignment logic for `EMBEDDING_MDL` in
`rag/common/settings.py` to follow a unified fallback chain:
EMBEDDING_CFG → TEI_MODEL (if tei- profile active) → built-in default
## Testing
Verified in Docker deployments:
1. **`COMPOSE_PROFILES=`** (no TEI)
→ New tenants get `bge-m3@Ollama` as the default embedding model
2. **`COMPOSE_PROFILES=tei-gpu` with no `TEI_MODEL` set**
→ Falls back to `BAAI/bge-small-en-v1.5`
3. **`COMPOSE_PROFILES=tei-gpu` with `TEI_MODEL=my-model`**
→ New tenants use `my-model` as the embedding model
Closes #8916
fix #11522
fix #11306
2025-12-09 09:38:44 +08:00
|
|
|
EMBEDDING_MDL = EMBEDDING_CFG.get("model", "") or ""
|
|
|
|
|
compose_profiles = os.getenv("COMPOSE_PROFILES", "")
|
|
|
|
|
if "tei-" in compose_profiles:
|
|
|
|
|
EMBEDDING_MDL = os.getenv("TEI_MODEL", EMBEDDING_MDL or "BAAI/bge-small-en-v1.5")
|
2025-11-06 09:36:38 +08:00
|
|
|
RERANK_MDL = RERANK_CFG.get("model", "") or ""
|
|
|
|
|
ASR_MDL = ASR_CFG.get("model", "") or ""
|
|
|
|
|
IMAGE2TEXT_MDL = IMAGE2TEXT_CFG.get("model", "") or ""
|
|
|
|
|
|
|
|
|
|
global HOST_IP, HOST_PORT
|
|
|
|
|
HOST_IP = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1")
|
|
|
|
|
HOST_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port")
|
|
|
|
|
|
|
|
|
|
global SECRET_KEY
|
2026-05-07 10:10:02 +08:00
|
|
|
SECRET_KEY = init_secret_key()
|
2025-11-06 09:36:38 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# authentication
|
|
|
|
|
authentication_conf = get_base_config("authentication", {})
|
|
|
|
|
|
|
|
|
|
global CLIENT_AUTHENTICATION, HTTP_APP_KEY, GITHUB_OAUTH, FEISHU_OAUTH, OAUTH_CONFIG
|
|
|
|
|
# client
|
|
|
|
|
CLIENT_AUTHENTICATION = authentication_conf.get("client", {}).get("switch", False)
|
|
|
|
|
HTTP_APP_KEY = authentication_conf.get("client", {}).get("http_app_key")
|
|
|
|
|
GITHUB_OAUTH = get_base_config("oauth", {}).get("github")
|
|
|
|
|
FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu")
|
|
|
|
|
OAUTH_CONFIG = get_base_config("oauth", {})
|
|
|
|
|
|
2026-01-31 15:11:54 +08:00
|
|
|
global DOC_ENGINE, DOC_ENGINE_INFINITY, DOC_ENGINE_OCEANBASE, docStoreConn, ES, OB, OS, INFINITY
|
Feature rtl support (#13118)
### What problem does this PR solve?
This PR adds comprehensive **Right-to-Left (RTL) language support**,
primarily targeting Arabic and other RTL scripts (Hebrew, Persian, Urdu,
etc.).
Previously, RTL content had multiple rendering issues:
- Incorrect sentence splitting for Arabic punctuation in citation logic
- Misaligned text in chat messages and markdown components
- Improper positioning of blockquotes and “think” sections
- Incorrect table alignment
- Citation placement ambiguity in RTL prompts
- UI layout inconsistencies when mixing LTR and RTL text
This PR introduces backend and frontend improvements to properly detect,
render, and style RTL content while preserving existing LTR behavior.
#### Backend
- Updated sentence boundary regex in `rag/nlp/search.py` to include
Arabic punctuation:
- `،` (comma)
- `؛` (semicolon)
- `؟` (question mark)
- `۔` (Arabic full stop)
- Ensures citation insertion works correctly in RTL sentences.
- Updated citation prompt instructions to clarify citation placement
rules for RTL languages.
#### Frontend
- Introduced a new utility: `text-direction.ts`
- Detects text direction based on Unicode ranges.
- Supports Arabic, Hebrew, Syriac, Thaana, and related scripts.
- Provides `getDirAttribute()` for automatic `dir` assignment.
- Applied dynamic `dir` attributes across:
- Markdown rendering
- Chat messages
- Search results
- Tables
- Hover cards and reference popovers
- Added proper RTL styling in LESS:
- Text alignment adjustments
- Blockquote border flipping
- Section indentation correction
- Table direction switching
- Use of `<bdi>` for figure labels to prevent bidirectional conflicts
#### DevOps / Environment
- Added Windows backend launch script with retry handling.
- Updated dependency metadata.
- Adjusted development-only React debugging behavior.
---
### Type of change
- [x] Bug Fix (non-breaking change which fixes RTL rendering and
citation issues)
- [x] New Feature (non-breaking change which adds RTL detection and
dynamic direction handling)
---------
Co-authored-by: 6ba3i <isbaaoui09@gmail.com>
Co-authored-by: Ahmad Intisar <ahmadintisar@Ahmads-MacBook-M4-Pro.local>
Co-authored-by: Ahmad Intisar <168020872+ahmadintisar@users.noreply.github.com>
Co-authored-by: Liu An <asiro@qq.com>
2026-03-02 08:03:44 +03:00
|
|
|
DOC_ENGINE = os.environ.get("DOC_ENGINE", "elasticsearch").strip()
|
2025-11-26 11:06:37 +08:00
|
|
|
DOC_ENGINE_INFINITY = (DOC_ENGINE.lower() == "infinity")
|
2026-01-31 15:11:54 +08:00
|
|
|
DOC_ENGINE_OCEANBASE = (DOC_ENGINE.lower() == "oceanbase")
|
2025-11-06 09:36:38 +08:00
|
|
|
lower_case_doc_engine = DOC_ENGINE.lower()
|
|
|
|
|
if lower_case_doc_engine == "elasticsearch":
|
|
|
|
|
ES = get_base_config("es", {})
|
|
|
|
|
docStoreConn = rag.utils.es_conn.ESConnection()
|
|
|
|
|
elif lower_case_doc_engine == "infinity":
|
2026-01-19 19:35:14 +08:00
|
|
|
INFINITY = get_base_config("infinity", {
|
|
|
|
|
"uri": "infinity:23817",
|
|
|
|
|
"postgres_port": 5432,
|
|
|
|
|
"db_name": "default_db"
|
|
|
|
|
})
|
2025-11-06 09:36:38 +08:00
|
|
|
docStoreConn = rag.utils.infinity_conn.InfinityConnection()
|
|
|
|
|
elif lower_case_doc_engine == "opensearch":
|
|
|
|
|
OS = get_base_config("os", {})
|
|
|
|
|
docStoreConn = rag.utils.opensearch_conn.OSConnection()
|
2025-11-20 10:00:14 +08:00
|
|
|
elif lower_case_doc_engine == "oceanbase":
|
|
|
|
|
OB = get_base_config("oceanbase", {})
|
|
|
|
|
docStoreConn = rag.utils.ob_conn.OBConnection()
|
2026-01-19 16:07:43 +08:00
|
|
|
elif lower_case_doc_engine == "seekdb":
|
|
|
|
|
OB = get_base_config("seekdb", {})
|
|
|
|
|
docStoreConn = rag.utils.ob_conn.OBConnection()
|
2025-11-06 09:36:38 +08:00
|
|
|
else:
|
|
|
|
|
raise Exception(f"Not supported doc engine: {DOC_ENGINE}")
|
|
|
|
|
|
2025-12-25 21:18:13 +08:00
|
|
|
global msgStoreConn
|
|
|
|
|
# use the same engine for message store
|
|
|
|
|
if DOC_ENGINE == "elasticsearch":
|
|
|
|
|
ES = get_base_config("es", {})
|
|
|
|
|
msgStoreConn = memory_es_conn.ESConnection()
|
|
|
|
|
elif DOC_ENGINE == "infinity":
|
2026-01-19 19:35:14 +08:00
|
|
|
INFINITY = get_base_config("infinity", {
|
|
|
|
|
"uri": "infinity:23817",
|
|
|
|
|
"postgres_port": 5432,
|
|
|
|
|
"db_name": "default_db"
|
|
|
|
|
})
|
2025-12-25 21:18:13 +08:00
|
|
|
msgStoreConn = memory_infinity_conn.InfinityConnection()
|
2026-02-03 16:46:17 +08:00
|
|
|
elif lower_case_doc_engine in ["oceanbase", "seekdb"]:
|
|
|
|
|
msgStoreConn = memory_ob_conn.OBConnection()
|
2025-12-25 21:18:13 +08:00
|
|
|
|
2025-12-04 09:44:05 +07:00
|
|
|
global AZURE, S3, MINIO, OSS, GCS
|
2025-11-06 09:36:38 +08:00
|
|
|
if STORAGE_IMPL_TYPE in ['AZURE_SPN', 'AZURE_SAS']:
|
|
|
|
|
AZURE = get_base_config("azure", {})
|
|
|
|
|
elif STORAGE_IMPL_TYPE == 'AWS_S3':
|
|
|
|
|
S3 = get_base_config("s3", {})
|
|
|
|
|
elif STORAGE_IMPL_TYPE == 'MINIO':
|
|
|
|
|
MINIO = decrypt_database_config(name="minio")
|
|
|
|
|
elif STORAGE_IMPL_TYPE == 'OSS':
|
|
|
|
|
OSS = get_base_config("oss", {})
|
2025-12-04 09:44:05 +07:00
|
|
|
elif STORAGE_IMPL_TYPE == 'GCS':
|
|
|
|
|
GCS = get_base_config("gcs", {})
|
2025-11-06 09:36:38 +08:00
|
|
|
|
|
|
|
|
global STORAGE_IMPL
|
2025-12-15 09:45:18 +08:00
|
|
|
storage_impl = StorageFactory.create(Storage[STORAGE_IMPL_TYPE])
|
|
|
|
|
|
|
|
|
|
# Define crypto settings
|
|
|
|
|
crypto_enabled = os.environ.get("RAGFLOW_CRYPTO_ENABLED", "false").lower() == "true"
|
|
|
|
|
|
|
|
|
|
# Check if encryption is enabled
|
|
|
|
|
if crypto_enabled:
|
|
|
|
|
try:
|
|
|
|
|
from rag.utils.encrypted_storage import create_encrypted_storage
|
|
|
|
|
algorithm = os.environ.get("RAGFLOW_CRYPTO_ALGORITHM", "aes-256-cbc")
|
|
|
|
|
crypto_key = os.environ.get("RAGFLOW_CRYPTO_KEY")
|
|
|
|
|
|
|
|
|
|
STORAGE_IMPL = create_encrypted_storage(storage_impl,
|
|
|
|
|
algorithm=algorithm,
|
|
|
|
|
key=crypto_key,
|
|
|
|
|
encryption_enabled=crypto_enabled)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logging.error(f"Failed to initialize encrypted storage: {e}")
|
|
|
|
|
STORAGE_IMPL = storage_impl
|
|
|
|
|
else:
|
|
|
|
|
STORAGE_IMPL = storage_impl
|
2025-11-06 09:36:38 +08:00
|
|
|
|
|
|
|
|
global retriever, kg_retriever
|
|
|
|
|
retriever = search.Dealer(docStoreConn)
|
2026-01-29 14:23:26 +08:00
|
|
|
from rag.graphrag import search as kg_search
|
2025-11-06 09:36:38 +08:00
|
|
|
|
|
|
|
|
kg_retriever = kg_search.KGSearch(docStoreConn)
|
|
|
|
|
|
|
|
|
|
global SANDBOX_HOST
|
|
|
|
|
if int(os.environ.get("SANDBOX_ENABLED", "0")):
|
|
|
|
|
SANDBOX_HOST = os.environ.get("SANDBOX_HOST", "sandbox-executor-manager")
|
|
|
|
|
|
|
|
|
|
global SMTP_CONF
|
|
|
|
|
SMTP_CONF = get_base_config("smtp", {})
|
|
|
|
|
|
|
|
|
|
global MAIL_SERVER, MAIL_PORT, MAIL_USE_SSL, MAIL_USE_TLS, MAIL_USERNAME, MAIL_PASSWORD, MAIL_DEFAULT_SENDER, MAIL_FRONTEND_URL
|
|
|
|
|
MAIL_SERVER = SMTP_CONF.get("mail_server", "")
|
|
|
|
|
MAIL_PORT = SMTP_CONF.get("mail_port", 000)
|
|
|
|
|
MAIL_USE_SSL = SMTP_CONF.get("mail_use_ssl", True)
|
|
|
|
|
MAIL_USE_TLS = SMTP_CONF.get("mail_use_tls", False)
|
|
|
|
|
MAIL_USERNAME = SMTP_CONF.get("mail_username", "")
|
|
|
|
|
MAIL_PASSWORD = SMTP_CONF.get("mail_password", "")
|
|
|
|
|
mail_default_sender = SMTP_CONF.get("mail_default_sender", [])
|
|
|
|
|
if mail_default_sender and len(mail_default_sender) >= 2:
|
|
|
|
|
MAIL_DEFAULT_SENDER = (mail_default_sender[0], mail_default_sender[1])
|
|
|
|
|
MAIL_FRONTEND_URL = SMTP_CONF.get("mail_frontend_url", "")
|
|
|
|
|
|
|
|
|
|
global DOC_MAXIMUM_SIZE, DOC_BULK_SIZE, EMBEDDING_BATCH_SIZE
|
|
|
|
|
DOC_MAXIMUM_SIZE = int(os.environ.get("MAX_CONTENT_LENGTH", 128 * 1024 * 1024))
|
|
|
|
|
DOC_BULK_SIZE = int(os.environ.get("DOC_BULK_SIZE", 4))
|
|
|
|
|
EMBEDDING_BATCH_SIZE = int(os.environ.get("EMBEDDING_BATCH_SIZE", 16))
|
|
|
|
|
|
2025-12-30 15:09:52 +08:00
|
|
|
os.environ["DOTNET_SYSTEM_GLOBALIZATION_INVARIANT"] = "1"
|
|
|
|
|
|
|
|
|
|
|
2025-11-06 09:36:38 +08:00
|
|
|
def check_and_install_torch():
|
|
|
|
|
global PARALLEL_DEVICES
|
|
|
|
|
try:
|
|
|
|
|
pip_install_torch()
|
|
|
|
|
import torch.cuda
|
|
|
|
|
PARALLEL_DEVICES = torch.cuda.device_count()
|
|
|
|
|
logging.info(f"found {PARALLEL_DEVICES} gpus")
|
|
|
|
|
except Exception:
|
|
|
|
|
logging.info("can't import package 'torch'")
|
|
|
|
|
|
|
|
|
|
def _parse_model_entry(entry):
|
|
|
|
|
if isinstance(entry, str):
|
|
|
|
|
return {"name": entry, "factory": None, "api_key": None, "base_url": None}
|
|
|
|
|
if isinstance(entry, dict):
|
|
|
|
|
name = entry.get("name") or entry.get("model") or ""
|
|
|
|
|
return {
|
|
|
|
|
"name": name,
|
|
|
|
|
"factory": entry.get("factory"),
|
|
|
|
|
"api_key": entry.get("api_key"),
|
|
|
|
|
"base_url": entry.get("base_url"),
|
|
|
|
|
}
|
|
|
|
|
return {"name": "", "factory": None, "api_key": None, "base_url": None}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _resolve_per_model_config(entry_dict, backup_factory, backup_api_key, backup_base_url):
|
|
|
|
|
name = (entry_dict.get("name") or "").strip()
|
|
|
|
|
m_factory = entry_dict.get("factory") or backup_factory or ""
|
|
|
|
|
m_api_key = entry_dict.get("api_key") or backup_api_key or ""
|
|
|
|
|
m_base_url = entry_dict.get("base_url") or backup_base_url or ""
|
|
|
|
|
|
|
|
|
|
if name and "@" not in name and m_factory:
|
|
|
|
|
name = f"{name}@{m_factory}"
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"model": name,
|
|
|
|
|
"factory": m_factory,
|
|
|
|
|
"api_key": m_api_key,
|
|
|
|
|
"base_url": m_base_url,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def print_rag_settings():
|
|
|
|
|
logging.info(f"MAX_CONTENT_LENGTH: {DOC_MAXIMUM_SIZE}")
|
|
|
|
|
logging.info(f"MAX_FILE_COUNT_PER_USER: {int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))}")
|
|
|
|
|
|