mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-02 00:35:46 +08:00
### What problem does this PR solve? This PR adds comprehensive **Right-to-Left (RTL) language support**, primarily targeting Arabic and other RTL scripts (Hebrew, Persian, Urdu, etc.). Previously, RTL content had multiple rendering issues: - Incorrect sentence splitting for Arabic punctuation in citation logic - Misaligned text in chat messages and markdown components - Improper positioning of blockquotes and “think” sections - Incorrect table alignment - Citation placement ambiguity in RTL prompts - UI layout inconsistencies when mixing LTR and RTL text This PR introduces backend and frontend improvements to properly detect, render, and style RTL content while preserving existing LTR behavior. #### Backend - Updated sentence boundary regex in `rag/nlp/search.py` to include Arabic punctuation: - `،` (comma) - `؛` (semicolon) - `؟` (question mark) - `۔` (Arabic full stop) - Ensures citation insertion works correctly in RTL sentences. - Updated citation prompt instructions to clarify citation placement rules for RTL languages. #### Frontend - Introduced a new utility: `text-direction.ts` - Detects text direction based on Unicode ranges. - Supports Arabic, Hebrew, Syriac, Thaana, and related scripts. - Provides `getDirAttribute()` for automatic `dir` assignment. - Applied dynamic `dir` attributes across: - Markdown rendering - Chat messages - Search results - Tables - Hover cards and reference popovers - Added proper RTL styling in LESS: - Text alignment adjustments - Blockquote border flipping - Section indentation correction - Table direction switching - Use of `<bdi>` for figure labels to prevent bidirectional conflicts #### DevOps / Environment - Added Windows backend launch script with retry handling. - Updated dependency metadata. - Adjusted development-only React debugging behavior. --- ### Type of change - [x] Bug Fix (non-breaking change which fixes RTL rendering and citation issues) - [x] New Feature (non-breaking change which adds RTL detection and dynamic direction handling) --------- Co-authored-by: 6ba3i <isbaaoui09@gmail.com> Co-authored-by: Ahmad Intisar <ahmadintisar@Ahmads-MacBook-M4-Pro.local> Co-authored-by: Ahmad Intisar <168020872+ahmadintisar@users.noreply.github.com> Co-authored-by: Liu An <asiro@qq.com>
111 lines
3.3 KiB
TypeScript
111 lines
3.3 KiB
TypeScript
import {
|
|
ChatVariableEnabledField,
|
|
EmptyConversationId,
|
|
} from '@/constants/chat';
|
|
import { IMessage, Message } from '@/interfaces/database/chat';
|
|
import { omit } from 'lodash';
|
|
import { v4 as uuid } from 'uuid';
|
|
import {
|
|
citationMarkerReg,
|
|
normalizeCitationDigits,
|
|
parseCitationIndex,
|
|
} from './citation-utils';
|
|
|
|
export const isConversationIdExist = (conversationId: string) => {
|
|
return conversationId !== EmptyConversationId && conversationId !== '';
|
|
};
|
|
|
|
export const buildMessageUuid = (message: Partial<Message | IMessage>) => {
|
|
if ('id' in message && message.id) {
|
|
return message.id;
|
|
}
|
|
return uuid();
|
|
};
|
|
|
|
export const buildMessageListWithUuid = (messages?: Message[]) => {
|
|
return (
|
|
messages?.map((x: Message | IMessage) => ({
|
|
...omit(x, 'reference'),
|
|
id: buildMessageUuid(x),
|
|
})) ?? []
|
|
);
|
|
};
|
|
|
|
export const generateConversationId = () => {
|
|
return uuid().replace(/-/g, '');
|
|
};
|
|
|
|
// When rendering each message, add a prefix to the id to ensure uniqueness.
|
|
export const buildMessageUuidWithRole = (
|
|
message: Partial<Message | IMessage>,
|
|
) => {
|
|
return `${message.role}_${message.id}`;
|
|
};
|
|
|
|
// Preprocess LaTeX equations to be rendered by KaTeX
|
|
// ref: https://github.com/remarkjs/react-markdown/issues/785
|
|
//
|
|
// Delimiter matching: we only treat \] and \) as block/inline endings when they
|
|
// are not part of a LaTeX command (e.g. \right], \big), \left)). Use a negative
|
|
// lookbehind (?<![a-zA-Z]) so that \] or \) preceded by a letter (command name)
|
|
// is not considered the closing delimiter. Use greedy matching so we match up to
|
|
// the last valid delimiter and avoid cutting at the first \] or \) inside the
|
|
// equation (e.g. \frac{1}{|y|} or \right]).
|
|
|
|
const BLOCK_MATH_RE = /\\\[([\s\S]*)(?<![a-zA-Z])\\\]/g;
|
|
const INLINE_MATH_RE = /\\\(([\s\S]*)(?<![a-zA-Z])\\\)/g;
|
|
|
|
export const preprocessLaTeX = (content: string) => {
|
|
const blockProcessedContent = content.replace(
|
|
BLOCK_MATH_RE,
|
|
(_, equation) => `$$${equation}$$`,
|
|
);
|
|
const inlineProcessedContent = blockProcessedContent.replace(
|
|
INLINE_MATH_RE,
|
|
(_, equation) => `$${equation}$`,
|
|
);
|
|
return inlineProcessedContent;
|
|
};
|
|
|
|
export function replaceThinkToSection(text: string = '') {
|
|
const pattern = /<think>([\s\S]*?)<\/think>/g;
|
|
|
|
const result = text.replace(pattern, '<section class="think">$1</section>');
|
|
|
|
return result;
|
|
}
|
|
|
|
export function setInitialChatVariableEnabledFieldValue(
|
|
field: ChatVariableEnabledField,
|
|
) {
|
|
return field !== ChatVariableEnabledField.MaxTokensEnabled;
|
|
}
|
|
|
|
const ShowImageFields = ['image', 'table'];
|
|
|
|
export function showImage(filed?: string) {
|
|
return ShowImageFields.some((x) => x === filed);
|
|
}
|
|
|
|
export function setChatVariableEnabledFieldValuePage() {
|
|
const variableCheckBoxFieldMap = Object.values(
|
|
ChatVariableEnabledField,
|
|
).reduce<Record<string, boolean>>((pre, cur) => {
|
|
pre[cur] = cur !== ChatVariableEnabledField.MaxTokensEnabled;
|
|
return pre;
|
|
}, {});
|
|
|
|
return variableCheckBoxFieldMap;
|
|
}
|
|
|
|
const oldReg = /(#{2}[0-9\u0660-\u0669\u06F0-\u06F9]+\${2})/g;
|
|
export const currentReg = citationMarkerReg;
|
|
export { normalizeCitationDigits, parseCitationIndex };
|
|
|
|
// To be compatible with the old index matching mode
|
|
export const replaceTextByOldReg = (text: string) => {
|
|
return text?.replace(oldReg, (substring: string) => {
|
|
return `[ID:${substring.slice(2, -2)}]`;
|
|
});
|
|
};
|