diff --git a/.gitignore b/.gitignore index bc2bb8abe3..0aa8576b99 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,7 @@ hudet/ cv/ layout_app.py api/flask_session - +venv/ # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html Cargo.lock @@ -211,3 +211,9 @@ backup # Added by cargo /target + +# Do not include in PR (local dev / build artifacts) +ragflow.egg-info/ +uv-aarch64*.tar.gz +uv-aarch64-unknown-linux-gnu.tar.gz +docker/launch_backend_service_windows.sh diff --git a/Dockerfile b/Dockerfile index d3af16ff05..4be231ba91 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ ARG NEED_MIRROR=0 WORKDIR /ragflow -# Copy models downloaded via download_deps.py +# copy models downloaded via download_deps.py RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \ tar --exclude='.*' -cf - \ diff --git a/agent/canvas.py b/agent/canvas.py index 7a1d3bd234..c9d672e6cc 100644 --- a/agent/canvas.py +++ b/agent/canvas.py @@ -547,18 +547,10 @@ class Canvas(Graph): yield decorate("message", {"content": "", "audio_binary": self.tts(tts_mdl, buff_m)}) buff_m = "" cpn_obj.set_output("content", _m) - cite = re.search(r"\[ID:[ 0-9]+\]", _m) else: yield decorate("message", {"content": cpn_obj.output("content")}) - cite = re.search(r"\[ID:[ 0-9]+\]", cpn_obj.output("content")) - message_end = {} - if cpn_obj.get_param("status"): - message_end["status"] = cpn_obj.get_param("status") - if isinstance(cpn_obj.output("attachment"), dict): - message_end["attachment"] = cpn_obj.output("attachment") - if cite: - message_end["reference"] = self.get_reference() + message_end = self._build_message_end(cpn_obj) yield decorate("message_end", message_end) while partials: @@ -820,6 +812,22 @@ class Canvas(Graph): return {"chunks": {}, "doc_aggs": {}} return self.retrieval[-1] + def _has_reference(self) -> bool: + ref = self.get_reference() + if not isinstance(ref, dict): + return False + return bool(ref.get("chunks") or ref.get("doc_aggs")) + + def _build_message_end(self, cpn_obj) -> dict: + message_end = {} + if cpn_obj.get_param("status"): + message_end["status"] = cpn_obj.get_param("status") + if isinstance(cpn_obj.output("attachment"), dict): + message_end["attachment"] = cpn_obj.output("attachment") + if self._has_reference(): + message_end["reference"] = self.get_reference() + return message_end + def add_memory(self, user:str, assist:str, summ: str): self.memory.append((user, assist, summ)) diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py index 0ed5d830b3..22d38da2f6 100644 --- a/api/db/services/dialog_service.py +++ b/api/db/services/dialog_service.py @@ -35,6 +35,7 @@ from api.db.services.llm_service import LLMBundle from common.metadata_utils import apply_meta_data_filter from api.db.services.tenant_llm_service import TenantLLMService from common.time_utils import current_timestamp, datetime_format +from common.text_utils import normalize_arabic_digits from rag.graphrag.general.mind_map_extractor import MindMapExtractor from rag.advanced_rag import DeepResearcher from rag.app.tag import label_question @@ -377,10 +378,12 @@ BAD_CITATION_PATTERNS = [ re.compile(r"【\s*ID\s*[: ]*\s*(\d+)\s*】"), # 【ID: 12】 re.compile(r"ref\s*(\d+)", flags=re.IGNORECASE), # ref12、REF 12 ] +CITATION_MARKER_PATTERN = re.compile(r"\[(?:ID:)?([0-9\u0660-\u0669\u06F0-\u06F9]+)\]") def repair_bad_citation_formats(answer: str, kbinfos: dict, idx: set): max_index = len(kbinfos["chunks"]) + normalized_answer = normalize_arabic_digits(answer) or "" def safe_add(i): if 0 <= i < max_index: @@ -388,19 +391,36 @@ def repair_bad_citation_formats(answer: str, kbinfos: dict, idx: set): return True return False - def find_and_replace(pattern, group_index=1, repl=lambda i: f"ID:{i}", flags=0): + def find_and_replace(pattern, group_index=1, repl=lambda digits: f"ID:{digits}"): nonlocal answer + nonlocal normalized_answer - def replacement(match): + matches = list(pattern.finditer(normalized_answer)) + if not matches: + return + + parts = [] + last_idx = 0 + for match in matches: + parts.append(answer[last_idx:match.start()]) try: i = int(match.group(group_index)) - if safe_add(i): - return f"[{repl(i)}]" except Exception: - pass - return match.group(0) + parts.append(answer[match.start():match.end()]) + last_idx = match.end() + continue - answer = re.sub(pattern, replacement, answer, flags=flags) + if safe_add(i): + digit_start, digit_end = match.span(group_index) + digits_original = answer[digit_start:digit_end] + parts.append(f"[{repl(digits_original)}]") + else: + parts.append(answer[match.start():match.end()]) + last_idx = match.end() + + parts.append(answer[last_idx:]) + answer = "".join(parts) + normalized_answer = normalize_arabic_digits(answer) or "" for pattern in BAD_CITATION_PATTERNS: find_and_replace(pattern) @@ -627,7 +647,8 @@ async def async_chat(dialog, messages, stream=True, **kwargs): if knowledges and (prompt_config.get("quote", True) and kwargs.get("quote", True)): idx = set([]) - if embd_mdl and not re.search(r"\[ID:([0-9]+)\]", answer): + normalized_answer = normalize_arabic_digits(answer) or "" + if embd_mdl and not CITATION_MARKER_PATTERN.search(normalized_answer): answer, idx = retriever.insert_citations( answer, [ck["content_ltks"] for ck in kbinfos["chunks"]], @@ -637,7 +658,7 @@ async def async_chat(dialog, messages, stream=True, **kwargs): vtweight=dialog.vector_similarity_weight, ) else: - for match in re.finditer(r"\[ID:([0-9]+)\]", answer): + for match in CITATION_MARKER_PATTERN.finditer(normalized_answer): i = int(match.group(1)) if i < len(kbinfos["chunks"]): idx.add(i) diff --git a/common/settings.py b/common/settings.py index 97be3c5215..de26353637 100644 --- a/common/settings.py +++ b/common/settings.py @@ -244,7 +244,7 @@ def init_settings(): OAUTH_CONFIG = get_base_config("oauth", {}) global DOC_ENGINE, DOC_ENGINE_INFINITY, DOC_ENGINE_OCEANBASE, docStoreConn, ES, OB, OS, INFINITY - DOC_ENGINE = os.environ.get("DOC_ENGINE", "elasticsearch") + DOC_ENGINE = os.environ.get("DOC_ENGINE", "elasticsearch").strip() DOC_ENGINE_INFINITY = (DOC_ENGINE.lower() == "infinity") DOC_ENGINE_OCEANBASE = (DOC_ENGINE.lower() == "oceanbase") lower_case_doc_engine = DOC_ENGINE.lower() diff --git a/common/text_utils.py b/common/text_utils.py new file mode 100644 index 0000000000..e19c5bbcbb --- /dev/null +++ b/common/text_utils.py @@ -0,0 +1,48 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import annotations + +import re +import unicodedata + + +ARABIC_PRESENTATION_FORMS_RE = re.compile(r"[\uFB50-\uFDFF\uFE70-\uFEFF]") + + +def normalize_arabic_digits(text: str | None) -> str | None: + if text is None or not isinstance(text, str): + return text + + out = [] + for ch in text: + code = ord(ch) + if 0x0660 <= code <= 0x0669: + out.append(chr(code - 0x0660 + 0x30)) + elif 0x06F0 <= code <= 0x06F9: + out.append(chr(code - 0x06F0 + 0x30)) + else: + out.append(ch) + return "".join(out) + + +def normalize_arabic_presentation_forms(text: str | None) -> str | None: + """Normalize Arabic presentation forms to canonical text when present.""" + if text is None or not isinstance(text, str): + return text + if not ARABIC_PRESENTATION_FORMS_RE.search(text): + return text + return unicodedata.normalize("NFKC", text) diff --git a/rag/app/naive.py b/rag/app/naive.py index fcece22aba..ef84fa69cb 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -41,6 +41,7 @@ from deepdoc.parser.docling_parser import DoclingParser from deepdoc.parser.tcadp_parser import TCADPParser from common.float_utils import normalize_overlapped_percent from common.parser_config_utils import normalize_layout_recognizer +from common.text_utils import normalize_arabic_presentation_forms from rag.nlp import ( concat_img, find_codec, @@ -56,6 +57,33 @@ from rag.nlp import ( ) # noqa: F401 +def _normalize_section_text_for_rtl_presentation_forms(sections): + if not sections: + return sections + + normalized_sections = [] + for section in sections: + if isinstance(section, tuple): + if not section: + normalized_sections.append(section) + continue + text = section[0] + normalized_text = normalize_arabic_presentation_forms(text) + normalized_sections.append((normalized_text, *section[1:])) + continue + if isinstance(section, list): + if not section: + normalized_sections.append(section) + continue + text = section[0] + normalized_text = normalize_arabic_presentation_forms(text) + normalized_sections.append([normalized_text, *section[1:]]) + continue + normalized_sections.append(normalize_arabic_presentation_forms(section)) + + return normalized_sections + + def by_deepdoc(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, pdf_cls=None, **kwargs): callback = callback binary = binary @@ -802,6 +830,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca # sections = (text, image, tables) sections = Docx()(filename, binary) + sections = _normalize_section_text_for_rtl_presentation_forms(sections) # chunks list[dict] # images list - index of image chunk in chunks @@ -843,6 +872,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca paddleocr_llm_name=parser_model_name, **kwargs, ) + sections = _normalize_section_text_for_rtl_presentation_forms(sections) if not sections and not tables: return [] @@ -873,6 +903,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca file_type = "XLSX" if re.search(r"\.xlsx?$", filename, re.IGNORECASE) else "CSV" sections, tables = tcadp_parser.parse_pdf(filepath=filename, binary=binary, callback=callback, output_dir=os.environ.get("TCADP_OUTPUT_DIR", ""), file_type=file_type) + sections = _normalize_section_text_for_rtl_presentation_forms(sections) parser_config["chunk_token_num"] = 0 res = tokenize_table(tables, doc, is_english) callback(0.8, "Finish parsing.") @@ -884,10 +915,12 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca parser_config["chunk_token_num"] = 0 else: sections = [(_, "") for _ in excel_parser(binary) if _] + sections = _normalize_section_text_for_rtl_presentation_forms(sections) elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE): callback(0.1, "Start to parse.") sections = TxtParser()(filename, binary, parser_config.get("chunk_token_num", 128), parser_config.get("delimiter", "\n!?;。;!?")) + sections = _normalize_section_text_for_rtl_presentation_forms(sections) callback(0.8, "Finish parsing.") elif re.search(r"\.(md|markdown|mdx)$", filename, re.IGNORECASE): @@ -900,6 +933,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca delimiter=parser_config.get("delimiter", "\n!?;。;!?"), return_section_images=True, ) + sections = _normalize_section_text_for_rtl_presentation_forms(sections) is_markdown = True @@ -945,6 +979,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca chunk_token_num = int(parser_config.get("chunk_token_num", 128)) sections = HtmlParser()(filename, binary, chunk_token_num) sections = [(_, "") for _ in sections if _] + sections = _normalize_section_text_for_rtl_presentation_forms(sections) callback(0.8, "Finish parsing.") elif re.search(r"\.(json|jsonl|ldjson)$", filename, re.IGNORECASE): @@ -952,6 +987,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca chunk_token_num = int(parser_config.get("chunk_token_num", 128)) sections = JsonParser(chunk_token_num)(binary) sections = [(_, "") for _ in sections if _] + sections = _normalize_section_text_for_rtl_presentation_forms(sections) callback(0.8, "Finish parsing.") elif re.search(r"\.doc$", filename, re.IGNORECASE): @@ -969,6 +1005,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca if doc_parsed.get("content", None) is not None: sections = doc_parsed["content"].split("\n") sections = [(_, "") for _ in sections if _] + sections = _normalize_section_text_for_rtl_presentation_forms(sections) callback(0.8, "Finish parsing.") else: error_msg = f"tika.parser got empty content from {filename}." diff --git a/rag/nlp/search.py b/rag/nlp/search.py index 0d9bd096e6..19f215bc85 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -193,16 +193,18 @@ class Dealer: i += 1 pieces_.append("".join(pieces[st: i]) + "\n") else: + # Sentence boundary regex includes Arabic punctuation (، ؛ ؟ ۔) pieces_.extend( re.split( - r"([^\|][;。?!!\n]|[a-z][.?;!][ \n])", + r"([^\|][;。?!!،؛؟۔\n]|[a-z\u0600-\u06FF][.?;!،؛؟][ \n])", pieces[i])) i += 1 pieces = pieces_ else: - pieces = re.split(r"([^\|][;。?!!\n]|[a-z][.?;!][ \n])", answer) + # Sentence boundary regex includes Arabic punctuation (، ؛ ؟ ۔) + pieces = re.split(r"([^\|][;。?!!،؛؟۔\n]|[a-z\u0600-\u06FF][.?;!،؛؟][ \n])", answer) for i in range(1, len(pieces)): - if re.match(r"([^\|][;。?!!\n]|[a-z][.?;!][ \n])", pieces[i]): + if re.match(r"([^\|][;。?!!،؛؟۔\n]|[a-z\u0600-\u06FF][.?;!،؛؟][ \n])", pieces[i]): pieces[i - 1] += pieces[i][0] pieces[i] = pieces[i][1:] idx = [] diff --git a/rag/prompts/citation_prompt.md b/rag/prompts/citation_prompt.md index ff41ea2199..9c50c8e01f 100644 --- a/rag/prompts/citation_prompt.md +++ b/rag/prompts/citation_prompt.md @@ -9,6 +9,7 @@ Based on the provided document or chat history, add citations to the input text - DO NOT cite content not from - DO NOT modify whitespace or original text - STRICTLY prohibit non-standard formatting (~~, etc.) +- For RTL languages (Arabic, Hebrew, Persian): Place citations at the logical end of sentences (same position as LTR). The frontend handles bidirectional rendering automatically. ## What MUST Be Cited: 1. **Quantitative data**: Numbers, percentages, statistics, measurements @@ -99,6 +100,18 @@ ASSISTANT: Paris is the capital of France. It's known for its rich history, culture, and architecture. The Eiffel Tower was completed in 1889 [ID:301]. The city attracts millions of tourists annually. Paris remains one of the world's most visited destinations. (Note: Only the specific date needs citation, not common knowledge about Paris) +## Example 6: RTL Language (Arabic) + +ID: 401 +└── Content: في أول أيام شهر رمضان، أثار وضع رأس خنزير على مدخل مسجد بمدينة سانت أومير شمالي فرنسا تفاعلات واسعة. + + +USER: ماذا حدث في رمضان؟ + +ASSISTANT: +في أول أيام شهر رمضان، أثار وضع رأس خنزير على مدخل مسجد بمدينة سانت أومير شمالي فرنسا تفاعلات واسعة [ID:401]. +(Note: Citation is placed at the logical end of the sentence, same as LTR languages. The frontend handles RTL display automatically.) + --- Examples END --- REMEMBER: diff --git a/web/src/app.tsx b/web/src/app.tsx index deb5a0af6d..8a0fc46f6b 100644 --- a/web/src/app.tsx +++ b/web/src/app.tsx @@ -1,8 +1,16 @@ import { Toaster as Sonner } from '@/components/ui/sonner'; import { Toaster } from '@/components/ui/toaster'; -import i18n, { changeLanguageAsync } from '@/locales/config'; +import i18n from '@/locales/config'; import { QueryClient, QueryClientProvider } from '@tanstack/react-query'; import { configResponsive } from 'ahooks'; +import { App, ConfigProvider, ConfigProviderProps, theme } from 'antd'; +import pt_BR from 'antd/lib/locale/pt_BR'; +import deDE from 'antd/locale/de_DE'; +import enUS from 'antd/locale/en_US'; +import ru_RU from 'antd/locale/ru_RU'; +import vi_VN from 'antd/locale/vi_VN'; +import zhCN from 'antd/locale/zh_CN'; +import zh_HK from 'antd/locale/zh_HK'; import dayjs from 'dayjs'; import advancedFormat from 'dayjs/plugin/advancedFormat'; import customParseFormat from 'dayjs/plugin/customParseFormat'; @@ -10,9 +18,9 @@ import localeData from 'dayjs/plugin/localeData'; import weekOfYear from 'dayjs/plugin/weekOfYear'; import weekYear from 'dayjs/plugin/weekYear'; import weekday from 'dayjs/plugin/weekday'; -import React, { useEffect } from 'react'; +import React, { useEffect, useState } from 'react'; import { RouterProvider } from 'react-router'; -import { ThemeProvider } from './components/theme-provider'; +import { ThemeProvider, useTheme } from './components/theme-provider'; import { SidebarProvider } from './components/ui/sidebar'; import { TooltipProvider } from './components/ui/tooltip'; import { ThemeEnum } from './constants/common'; @@ -38,6 +46,16 @@ dayjs.extend(localeData); dayjs.extend(weekOfYear); dayjs.extend(weekYear); +const AntLanguageMap = { + en: enUS, + zh: zhCN, + 'zh-TRADITIONAL': zh_HK, + ru: ru_RU, + vi: vi_VN, + 'pt-BR': pt_BR, + de: deDE, +}; + if (process.env.NODE_ENV === 'development') { import('@welldone-software/why-did-you-render').then( (whyDidYouRenderModule) => { @@ -61,17 +79,19 @@ const queryClient = new QueryClient({ }, }); +type Locale = ConfigProviderProps['locale']; + function Root({ children }: React.PropsWithChildren) { - useEffect(() => { - const lng = storage.getLanguage(); - if (lng) { - document.documentElement.lang = lng; - } - }, []); + const { theme: themeragflow } = useTheme(); + const getLocale = (lng: string) => + AntLanguageMap[lng as keyof typeof AntLanguageMap] ?? enUS; + + const [locale, setLocal] = useState(getLocale(storage.getLanguage())); useEffect(() => { const handleLanguageChanged = (lng: string) => { storage.setLanguage(lng); + setLocal(getLocale(lng)); document.documentElement.lang = lng; }; @@ -81,11 +101,28 @@ function Root({ children }: React.PropsWithChildren) { i18n.off('languageChanged', handleLanguageChanged); }; }, []); - return ( - -
{children}
-
+ <> + + + {children} + + + + + ); } @@ -93,7 +130,7 @@ const RootProvider = ({ children }: React.PropsWithChildren) => { useEffect(() => { const lng = storage.getLanguage(); if (lng) { - changeLanguageAsync(lng); + i18n.changeLanguage(lng); } }, []); @@ -105,8 +142,6 @@ const RootProvider = ({ children }: React.PropsWithChildren) => { storageKey="ragflow-ui-theme" > {children} - - diff --git a/web/src/components/floating-chat-widget-markdown.tsx b/web/src/components/floating-chat-widget-markdown.tsx index 89aa51663f..8e098404c8 100644 --- a/web/src/components/floating-chat-widget-markdown.tsx +++ b/web/src/components/floating-chat-widget-markdown.tsx @@ -8,12 +8,15 @@ import { import { IReference, IReferenceChunk } from '@/interfaces/database/chat'; import { currentReg, + parseCitationIndex, preprocessLaTeX, replaceTextByOldReg, replaceThinkToSection, showImage, } from '@/utils/chat'; +import { citationMarkerReg } from '@/utils/citation-utils'; import { getExtension } from '@/utils/document-util'; +import { getDirAttribute } from '@/utils/text-direction'; import { InfoCircleOutlined } from '@ant-design/icons'; import classNames from 'classnames'; import DOMPurify from 'dompurify'; @@ -41,7 +44,8 @@ import { Button } from './ui/button'; import { Popover, PopoverContent, PopoverTrigger } from './ui/popover'; import { Tooltip, TooltipContent, TooltipTrigger } from './ui/tooltip'; -const getChunkIndex = (match: string) => Number(match.replace(/\[|\]/g, '')); +const getChunkIndex = (match: string) => + parseCitationIndex(match.replace(/\[|\]/g, '')); const FloatingChatWidgetMarkdown = ({ reference, @@ -281,14 +285,19 @@ const FloatingChatWidgetMarkdown = ({ [getPopoverContent, getReferenceInfo, handleDocumentButtonClick], ); + const dir = getDirAttribute(content.replace(citationMarkerReg, '')); + return ( -
+
( +

{children}

+ ), 'custom-typography': ({ children }: { children: string }) => renderReference(children), code(props: any) { diff --git a/web/src/components/highlight-markdown/index.tsx b/web/src/components/highlight-markdown/index.tsx index f31a16f695..b19a318088 100644 --- a/web/src/components/highlight-markdown/index.tsx +++ b/web/src/components/highlight-markdown/index.tsx @@ -13,6 +13,8 @@ import remarkMath from 'remark-math'; import 'katex/dist/katex.min.css'; // `rehype-katex` does not import the CSS for you import { preprocessLaTeX } from '@/utils/chat'; +import { citationMarkerReg } from '@/utils/citation-utils'; +import { getDirAttribute } from '@/utils/text-direction'; import { useIsDarkTheme } from '../theme-provider'; import styles from './index.module.less'; @@ -22,37 +24,44 @@ const HighLightMarkdown = ({ children: string | null | undefined; }) => { const isDarkTheme = useIsDarkTheme(); + const dir = children + ? getDirAttribute(children.replace(citationMarkerReg, '')) + : undefined; return ( - - {String(children).replace(/\n$/, '')} - - ) : ( - - {children} - - ); - }, - } as any - } - > - {children ? preprocessLaTeX(children) : children} - +
+ ( +

{children}

+ ), + code(props: any) { + const { children, className, ...rest } = props; + const match = /language-(\w+)/.exec(className || ''); + return match ? ( + + {String(children).replace(/\n$/, '')} + + ) : ( + + {children} + + ); + }, + } as any + } + > + {children ? preprocessLaTeX(children) : children} +
+
); }; diff --git a/web/src/components/home-card.tsx b/web/src/components/home-card.tsx index 06edff0454..d8ec97d749 100644 --- a/web/src/components/home-card.tsx +++ b/web/src/components/home-card.tsx @@ -44,7 +44,10 @@ export function HomeCard({
-
+
{data.name}
{icon} diff --git a/web/src/components/markdown-content/index.module.less b/web/src/components/markdown-content/index.module.less index 2fa7f92f1f..59a8b4771a 100644 --- a/web/src/components/markdown-content/index.module.less +++ b/web/src/components/markdown-content/index.module.less @@ -1,14 +1,19 @@ .markdownContentWrapper { :global(section.think) { - padding-left: 10px; + padding-inline-start: 10px; color: #8b8b8b; - border-left: 2px solid #d5d3d3; + border-inline-start: 2px solid #d5d3d3; margin-bottom: 10px; font-size: 12px; } :global(blockquote) { - padding-left: 10px; - border-left: 4px solid #ccc; + padding-inline-start: 10px; + border-inline-start: 4px solid #ccc; + } + + // RTL Support + &[dir='rtl'] { + text-align: start; } } diff --git a/web/src/components/markdown-content/index.tsx b/web/src/components/markdown-content/index.tsx index b434322cf5..603e0552e8 100644 --- a/web/src/components/markdown-content/index.tsx +++ b/web/src/components/markdown-content/index.tsx @@ -1,7 +1,9 @@ import Image from '@/components/image'; import SvgIcon from '@/components/svg-icon'; import { IReference, IReferenceChunk } from '@/interfaces/database/chat'; +import { citationMarkerReg } from '@/utils/citation-utils'; import { getExtension } from '@/utils/document-util'; +import { getDirAttribute } from '@/utils/text-direction'; import DOMPurify from 'dompurify'; import { useCallback, useEffect, useMemo } from 'react'; import Markdown from 'react-markdown'; @@ -19,6 +21,7 @@ import 'katex/dist/katex.min.css'; // `rehype-katex` does not import the CSS for import { useFetchDocumentThumbnailsByIds } from '@/hooks/use-document-request'; import { currentReg, + parseCitationIndex, preprocessLaTeX, replaceTextByOldReg, replaceThinkToSection, @@ -35,7 +38,7 @@ import { } from '../ui/hover-card'; import styles from './index.module.less'; -const getChunkIndex = (match: string) => Number(match); +const getChunkIndex = (match: string) => parseCitationIndex(match); // TODO: The display of the table is inconsistent with the display previously placed in the MessageItem. const MarkdownContent = ({ @@ -169,6 +172,7 @@ const MarkdownContent = ({ __html: DOMPurify.sanitize(chunkItem?.content ?? ''), }} className={classNames(styles.chunkContentText)} + dir="auto" >
{documentId && (
@@ -213,9 +217,9 @@ const MarkdownContent = ({ return ( - + Fig. {chunkIndex + 1} - + {getPopoverContent(chunkIndex)} @@ -229,42 +233,48 @@ const MarkdownContent = ({ [getPopoverContent], ); + const dir = getDirAttribute(content.replace(citationMarkerReg, '')); + return ( - - renderReference(children), - code(props: any) { - const { children, className, ...rest } = props; - const restProps = omit(rest, 'node'); - const match = /language-(\w+)/.exec(className || ''); - return match ? ( - - {String(children).replace(/\n$/, '')} - - ) : ( - - {children} - - ); - }, - } as any - } - > - {contentWithCursor} - +
+ ( +

{children}

+ ), + 'custom-typography': ({ children }: { children: string }) => + renderReference(children), + code(props: any) { + const { children, className, ...rest } = props; + const restProps = omit(rest, 'node'); + const match = /language-(\w+)/.exec(className || ''); + return match ? ( + + {String(children).replace(/\n$/, '')} + + ) : ( + + {children} + + ); + }, + } as any + } + > + {contentWithCursor} +
+
); }; diff --git a/web/src/components/markdown-content/reference-utils.ts b/web/src/components/markdown-content/reference-utils.ts index ffc80fbf4f..384e5e7341 100644 --- a/web/src/components/markdown-content/reference-utils.ts +++ b/web/src/components/markdown-content/reference-utils.ts @@ -1,5 +1,5 @@ import { IReference } from '@/interfaces/database/chat'; -import { currentReg, showImage } from '@/utils/chat'; +import { currentReg, normalizeCitationDigits, showImage } from '@/utils/chat'; export interface ReferenceMatch { id: string; @@ -15,7 +15,7 @@ export const findAllReferenceMatches = (text: string): ReferenceMatch[] => { let match; while ((match = currentReg.exec(text)) !== null) { matches.push({ - id: match[1], + id: normalizeCitationDigits(match[1]), fullMatch: match[0], start: match.index, end: match.index + match[0].length, diff --git a/web/src/components/message-input/next.tsx b/web/src/components/message-input/next.tsx index 186bf34bab..636c6595ad 100644 --- a/web/src/components/message-input/next.tsx +++ b/web/src/components/message-input/next.tsx @@ -268,7 +268,11 @@ export function NextMessageInput({
{sendLoading ? ( - ) : ( diff --git a/web/src/components/next-markdown-content/index.module.less b/web/src/components/next-markdown-content/index.module.less index 3a26fa4bf7..3d544b1125 100644 --- a/web/src/components/next-markdown-content/index.module.less +++ b/web/src/components/next-markdown-content/index.module.less @@ -1,14 +1,19 @@ .markdownContentWrapper { :global(section.think) { - padding-left: 10px; + padding-inline-start: 10px; color: #8b8b8b; - border-left: 2px solid #d5d3d3; + border-inline-start: 2px solid #d5d3d3; margin-bottom: 10px; font-size: 12px; } :global(blockquote) { - padding-left: 10px; - border-left: 4px solid #ccc; + padding-inline-start: 10px; + border-inline-start: 4px solid #ccc; + } + + // RTL Support + &[dir='rtl'] { + text-align: start; } } @@ -36,6 +41,11 @@ .chunkText; max-height: 45vh; overflow-y: auto; + + // RTL Support + &[dir='rtl'] { + text-align: start; + } } .documentLink { padding: 0; diff --git a/web/src/components/next-markdown-content/index.tsx b/web/src/components/next-markdown-content/index.tsx index 6bb609b046..903a526c3e 100644 --- a/web/src/components/next-markdown-content/index.tsx +++ b/web/src/components/next-markdown-content/index.tsx @@ -18,10 +18,13 @@ import 'katex/dist/katex.min.css'; // `rehype-katex` does not import the CSS for import { currentReg, + parseCitationIndex, preprocessLaTeX, replaceTextByOldReg, replaceThinkToSection, } from '@/utils/chat'; +import { citationMarkerReg } from '@/utils/citation-utils'; +import { getDirAttribute } from '@/utils/text-direction'; import { useFetchDocumentThumbnailsByIds } from '@/hooks/use-document-request'; import { cn } from '@/lib/utils'; @@ -37,7 +40,7 @@ import { } from '../ui/hover-card'; import styles from './index.module.less'; -const getChunkIndex = (match: string) => Number(match); +const getChunkIndex = (match: string) => parseCitationIndex(match); // TODO: The display of the table is inconsistent with the display previously placed in the MessageItem. function MarkdownContent({ reference, @@ -171,6 +174,7 @@ function MarkdownContent({ __html: DOMPurify.sanitize(chunkItem?.content ?? ''), }} className={classNames(styles.chunkContentText, 'w-full')} + dir="auto" >
{documentId && (
@@ -215,9 +219,9 @@ function MarkdownContent({ return ( - + Fig. {chunkIndex + 1} - + {renderPopoverContent(chunkIndex)} @@ -231,42 +235,48 @@ function MarkdownContent({ [renderPopoverContent], ); + const dir = getDirAttribute(content.replace(citationMarkerReg, '')); + return ( - - renderReference(children), - code(props: any) { - const { children, className, ...rest } = props; - const restProps = omit(rest, 'node'); - const match = /language-(\w+)/.exec(className || ''); - return match ? ( - - {String(children).replace(/\n$/, '')} - - ) : ( - - {children} - - ); - }, - } as any - } - > - {contentWithCursor} - +
+ ( +

{children}

+ ), + 'custom-typography': ({ children }: { children: string }) => + renderReference(children), + code(props: any) { + const { children, className, ...rest } = props; + const restProps = omit(rest, 'node'); + const match = /language-(\w+)/.exec(className || ''); + return match ? ( + + {String(children).replace(/\n$/, '')} + + ) : ( + + {children} + + ); + }, + } as any + } + > + {contentWithCursor} +
+
); } diff --git a/web/src/components/next-message-item/index.module.less b/web/src/components/next-message-item/index.module.less index 1ba3f84811..794d5fed39 100644 --- a/web/src/components/next-message-item/index.module.less +++ b/web/src/components/next-message-item/index.module.less @@ -27,6 +27,11 @@ .chunkText(); .messageTextBase(); word-break: break-word; + + // RTL Support + &[dir='rtl'] { + text-align: right; + } } .messageTextDark { .chunkText(); @@ -36,6 +41,21 @@ color: rgb(166, 166, 166); border-left-color: rgb(78, 78, 86); } + + // RTL Support + &[dir='rtl'] { + text-align: right; + + :global(section.think) { + border-left-color: transparent; + border-right-color: rgb(78, 78, 86); + border-right-width: 2px; + border-right-style: solid; + border-left: none; + padding-left: 0; + padding-right: 10px; + } + } } .messageUserText { @@ -43,6 +63,11 @@ .messageTextBase(); word-break: break-word; text-align: justify; + + // RTL Support + &[dir='rtl'] { + text-align: right; + } } .messageEmpty { width: 300px; diff --git a/web/src/components/next-message-item/index.tsx b/web/src/components/next-message-item/index.tsx index 77a41716fb..853d87adbe 100644 --- a/web/src/components/next-message-item/index.tsx +++ b/web/src/components/next-message-item/index.tsx @@ -21,6 +21,8 @@ import { INodeEvent, MessageEventType } from '@/hooks/use-send-message'; import { cn } from '@/lib/utils'; import { AgentChatContext } from '@/pages/agent/context'; import { WorkFlowTimeline } from '@/pages/agent/log-sheet/workflow-timeline'; +import { citationMarkerReg } from '@/utils/citation-utils'; +import { getDirAttribute } from '@/utils/text-direction'; import { isEmpty } from 'lodash'; import { Atom, ChevronDown, ChevronUp } from 'lucide-react'; import MarkdownContent from '../next-markdown-content'; @@ -149,6 +151,7 @@ function MessageItem({ [styles.messageUserText]: !isAssistant, 'bg-bg-card': !isAssistant, })} + dir={getDirAttribute(messageContent.replace(citationMarkerReg, ''))} > {item.data ? ( children diff --git a/web/src/components/next-message-item/utils.ts b/web/src/components/next-message-item/utils.ts index c18988925a..615d7a192d 100644 --- a/web/src/components/next-message-item/utils.ts +++ b/web/src/components/next-message-item/utils.ts @@ -1,12 +1,12 @@ -import { currentReg } from '@/utils/chat'; +import { currentReg, parseCitationIndex } from '@/utils/chat'; export const extractNumbersFromMessageContent = (content: string) => { const matches = content.match(currentReg); if (matches) { const list = matches .map((match) => { - const numMatch = match.match(/\[ID:(\d+)\]/); - return numMatch ? parseInt(numMatch[1], 10) : null; + const parsed = parseCitationIndex(match); + return Number.isNaN(parsed) ? null : parsed; }) .filter((num) => num !== null) as number[]; diff --git a/web/src/components/originui/select-with-search.tsx b/web/src/components/originui/select-with-search.tsx index 806234f964..4fc935c3e0 100644 --- a/web/src/components/originui/select-with-search.tsx +++ b/web/src/components/originui/select-with-search.tsx @@ -164,7 +164,9 @@ export const SelectWithSearch = forwardRef< > {selectLabel || value ? ( - {selectLabel || value} + + {selectLabel || value} + ) : ( {placeholder} diff --git a/web/src/components/rename-dialog/index.tsx b/web/src/components/rename-dialog/index.tsx index f778fe6e2a..9507c877fc 100644 --- a/web/src/components/rename-dialog/index.tsx +++ b/web/src/components/rename-dialog/index.tsx @@ -33,7 +33,12 @@ export function RenameDialog({ onOk={onOk} > - + {t('common.save')} diff --git a/web/src/components/ui/multi-select.tsx b/web/src/components/ui/multi-select.tsx index d2079b234b..b4464aff87 100644 --- a/web/src/components/ui/multi-select.tsx +++ b/web/src/components/ui/multi-select.tsx @@ -471,7 +471,11 @@ export const MultiSelect = React.forwardRef< key={option.value} isSelected={isSelected} toggleOption={toggleOption} - optionTestId={optionTestIdPrefix ? `${optionTestIdPrefix}-option-${idx}` : undefined} + optionTestId={ + optionTestIdPrefix + ? `${optionTestIdPrefix}-option-${idx}` + : undefined + } > ); }, @@ -489,7 +493,11 @@ export const MultiSelect = React.forwardRef< key={option.value} isSelected={isSelected} toggleOption={toggleOption} - optionTestId={optionTestIdPrefix ? `${optionTestIdPrefix}-option-${optIdx}` : undefined} + optionTestId={ + optionTestIdPrefix + ? `${optionTestIdPrefix}-option-${optIdx}` + : undefined + } > ); })} diff --git a/web/src/global.less b/web/src/global.less index 0aa074927c..0fb973cc53 100644 --- a/web/src/global.less +++ b/web/src/global.less @@ -6,7 +6,12 @@ html { } body { - font-family: Inter; + font-family: + 'Inter', + system-ui, + -apple-system, + 'Segoe UI', + sans-serif; margin: 0; height: 100%; } diff --git a/web/src/layouts/next-header.tsx b/web/src/layouts/next-header.tsx index 83618a2f00..5c0cfbac69 100644 --- a/web/src/layouts/next-header.tsx +++ b/web/src/layouts/next-header.tsx @@ -140,7 +140,10 @@ export function Header() { }, [pathname]); return ( -
+
-
+ )} -
+
{(!!data?.length || searchString) && ( <>
diff --git a/web/src/pages/agents/name-form-field.tsx b/web/src/pages/agents/name-form-field.tsx index 9789746a86..5b3260e327 100644 --- a/web/src/pages/agents/name-form-field.tsx +++ b/web/src/pages/agents/name-form-field.tsx @@ -17,7 +17,11 @@ export function NameFormField() { const { t } = useTranslation(); return ( - + ); } diff --git a/web/src/pages/agents/upload-agent-dialog/index.tsx b/web/src/pages/agents/upload-agent-dialog/index.tsx index 0144ef4da0..61123bb019 100644 --- a/web/src/pages/agents/upload-agent-dialog/index.tsx +++ b/web/src/pages/agents/upload-agent-dialog/index.tsx @@ -26,7 +26,12 @@ export function UploadAgentDialog({ - + {t('common.save')} diff --git a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/chunk-card/index.module.less b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/chunk-card/index.module.less index 622e3b9e5e..fac4d99b16 100644 --- a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/chunk-card/index.module.less +++ b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/chunk-card/index.module.less @@ -21,7 +21,8 @@ } .contentText { - word-break: break-all !important; + word-break: break-word; + overflow-wrap: break-word; } .chunkCard { diff --git a/web/src/pages/dataflow-result/components/chunk-card/index.module.less b/web/src/pages/dataflow-result/components/chunk-card/index.module.less index aac7724af4..d6ca2ad497 100644 --- a/web/src/pages/dataflow-result/components/chunk-card/index.module.less +++ b/web/src/pages/dataflow-result/components/chunk-card/index.module.less @@ -19,7 +19,8 @@ } .contentText { - word-break: break-all !important; + word-break: break-word; + overflow-wrap: break-word; } .chunkCard { diff --git a/web/src/pages/next-chats/chat/app-settings/chat-basic-settings.tsx b/web/src/pages/next-chats/chat/app-settings/chat-basic-settings.tsx index 3f06e1818e..76212fcc72 100644 --- a/web/src/pages/next-chats/chat/app-settings/chat-basic-settings.tsx +++ b/web/src/pages/next-chats/chat/app-settings/chat-basic-settings.tsx @@ -16,11 +16,16 @@ import { import { Input } from '@/components/ui/input'; import { Textarea } from '@/components/ui/textarea'; import { useTranslate } from '@/hooks/common-hooks'; +import { getDirAttribute } from '@/utils/text-direction'; import { useFormContext } from 'react-hook-form'; export default function ChatBasicSetting() { const { t } = useTranslate('chat'); const form = useFormContext(); + const nameValue = form.watch('name'); + const descriptionValue = form.watch('description'); + const emptyResponseValue = form.watch('prompt_config.empty_response'); + const prologueValue = form.watch('prompt_config.prologue'); return (
@@ -46,7 +51,7 @@ export default function ChatBasicSetting() { {t('assistantName')} - + @@ -59,7 +64,10 @@ export default function ChatBasicSetting() { {t('description')} - + @@ -74,7 +82,10 @@ export default function ChatBasicSetting() { {t('emptyResponse')} - + @@ -89,7 +100,10 @@ export default function ChatBasicSetting() { {t('setAnOpener')} - + diff --git a/web/src/pages/next-chats/chat/app-settings/chat-prompt-engine.tsx b/web/src/pages/next-chats/chat/app-settings/chat-prompt-engine.tsx index a7c05f7b4a..8f84f09097 100644 --- a/web/src/pages/next-chats/chat/app-settings/chat-prompt-engine.tsx +++ b/web/src/pages/next-chats/chat/app-settings/chat-prompt-engine.tsx @@ -15,12 +15,14 @@ import { import { Textarea } from '@/components/ui/textarea'; import { UseKnowledgeGraphFormField } from '@/components/use-knowledge-graph-item'; import { useTranslate } from '@/hooks/common-hooks'; +import { getDirAttribute } from '@/utils/text-direction'; import { useFormContext } from 'react-hook-form'; import { DynamicVariableForm } from './dynamic-variable'; export function ChatPromptEngine() { const { t } = useTranslate('chat'); const form = useFormContext(); + const systemPromptValue = form.watch('prompt_config.system'); return (
@@ -36,6 +38,7 @@ export function ChatPromptEngine() { rows={8} placeholder={t('messagePlaceholder')} className="overflow-y-auto" + dir={getDirAttribute(systemPromptValue || '')} /> diff --git a/web/src/pages/next-chats/chat/app-settings/dynamic-variable.tsx b/web/src/pages/next-chats/chat/app-settings/dynamic-variable.tsx index 53a593c8a7..8dd4372c03 100644 --- a/web/src/pages/next-chats/chat/app-settings/dynamic-variable.tsx +++ b/web/src/pages/next-chats/chat/app-settings/dynamic-variable.tsx @@ -9,6 +9,7 @@ import { import { BlurInput } from '@/components/ui/input'; import { Separator } from '@/components/ui/separator'; import { Switch } from '@/components/ui/switch'; +import { getDirAttribute } from '@/utils/text-direction'; import { Plus, X } from 'lucide-react'; import { useCallback } from 'react'; import { useFieldArray, useFormContext } from 'react-hook-form'; @@ -58,53 +59,58 @@ export function DynamicVariableForm() {
- {fields.map((field, index) => ( -
- ( - - - - - - - )} - /> + {fields.map((field, index) => { + const typeField = `${name}.${index}.key`; + const keyValue = form.watch(typeField); + return ( +
+ ( + + + + + + + )} + /> - + - ( - - - - - - - )} - /> + ( + + + + + + + )} + /> - -
- ))} + +
+ ); + })}
diff --git a/web/src/pages/next-chats/chat/app-settings/saving-button.tsx b/web/src/pages/next-chats/chat/app-settings/saving-button.tsx index 83cd7a6e8d..bc880f18d1 100644 --- a/web/src/pages/next-chats/chat/app-settings/saving-button.tsx +++ b/web/src/pages/next-chats/chat/app-settings/saving-button.tsx @@ -9,7 +9,11 @@ export function SavingButton({ loading }: SaveButtonProps) { const { t } = useTranslation(); return ( - + {t('common.save')} ); diff --git a/web/src/pages/next-search/markdown-content/index.tsx b/web/src/pages/next-search/markdown-content/index.tsx index 118f2c2aad..fd0895a106 100644 --- a/web/src/pages/next-search/markdown-content/index.tsx +++ b/web/src/pages/next-search/markdown-content/index.tsx @@ -18,10 +18,13 @@ import 'katex/dist/katex.min.css'; // `rehype-katex` does not import the CSS for import { currentReg, + parseCitationIndex, preprocessLaTeX, replaceTextByOldReg, replaceThinkToSection, } from '@/utils/chat'; +import { citationMarkerReg } from '@/utils/citation-utils'; +import { getDirAttribute } from '@/utils/text-direction'; import { Button } from '@/components/ui/button'; import { @@ -46,7 +49,7 @@ const styles = { fileThumbnail: 'inline-block max-w-[40px]', }; -const getChunkIndex = (match: string) => Number(match); +const getChunkIndex = (match: string) => parseCitationIndex(match); // TODO: The display of the table is inconsistent with the display previously placed in the MessageItem. const MarkdownContent = ({ @@ -234,42 +237,51 @@ const MarkdownContent = ({ [getPopoverContent], ); + const dir = getDirAttribute(content.replace(citationMarkerReg, '')); + return ( - - renderReference(children), - code(props: any) { - const { children, className, ...rest } = props; - const restProps = omit(rest, 'node'); - const match = /language-(\w+)/.exec(className || ''); - return match ? ( - - {String(children).replace(/\n$/, '')} - - ) : ( - - {children} - - ); - }, - } as any - } > - {contentWithCursor} - + ( +

{children}

+ ), + 'custom-typography': ({ children }: { children: string }) => + renderReference(children), + code(props: any) { + const { children, className, ...rest } = props; + const restProps = omit(rest, 'node'); + const match = /language-(\w+)/.exec(className || ''); + return match ? ( + + {String(children).replace(/\n$/, '')} + + ) : ( + + {children} + + ); + }, + } as any + } + > + {contentWithCursor} +
+
); }; diff --git a/web/src/pages/next-search/search-setting.tsx b/web/src/pages/next-search/search-setting.tsx index 1608bec6f4..eb5acb6d24 100644 --- a/web/src/pages/next-search/search-setting.tsx +++ b/web/src/pages/next-search/search-setting.tsx @@ -586,7 +586,11 @@ const SearchSetting: React.FC = ({ > {t('search.cancelText')} -