Files
ragflow/web/src/utils/chat.ts
Attili-sys 21bc1ab7ec Feature rtl support (#13118)
### What problem does this PR solve?

This PR adds comprehensive **Right-to-Left (RTL) language support**,
primarily targeting Arabic and other RTL scripts (Hebrew, Persian, Urdu,
etc.).

Previously, RTL content had multiple rendering issues:

- Incorrect sentence splitting for Arabic punctuation in citation logic
- Misaligned text in chat messages and markdown components  
- Improper positioning of blockquotes and “think” sections  
- Incorrect table alignment  
- Citation placement ambiguity in RTL prompts  
- UI layout inconsistencies when mixing LTR and RTL text  

This PR introduces backend and frontend improvements to properly detect,
render, and style RTL content while preserving existing LTR behavior.

#### Backend
- Updated sentence boundary regex in `rag/nlp/search.py` to include
Arabic punctuation:
  - `،` (comma)
  - `؛` (semicolon)
  - `؟` (question mark)
  - `۔` (Arabic full stop)
- Ensures citation insertion works correctly in RTL sentences.
- Updated citation prompt instructions to clarify citation placement
rules for RTL languages.

#### Frontend
- Introduced a new utility: `text-direction.ts`
  - Detects text direction based on Unicode ranges.
  - Supports Arabic, Hebrew, Syriac, Thaana, and related scripts.
  - Provides `getDirAttribute()` for automatic `dir` assignment.

- Applied dynamic `dir` attributes across:
  - Markdown rendering
  - Chat messages
  - Search results
  - Tables
  - Hover cards and reference popovers

- Added proper RTL styling in LESS:
  - Text alignment adjustments
  - Blockquote border flipping
  - Section indentation correction
  - Table direction switching
  - Use of `<bdi>` for figure labels to prevent bidirectional conflicts

#### DevOps / Environment
- Added Windows backend launch script with retry handling.
- Updated dependency metadata.
- Adjusted development-only React debugging behavior.

---

### Type of change

- [x] Bug Fix (non-breaking change which fixes RTL rendering and
citation issues)
- [x] New Feature (non-breaking change which adds RTL detection and
dynamic direction handling)

---------

Co-authored-by: 6ba3i <isbaaoui09@gmail.com>
Co-authored-by: Ahmad Intisar <ahmadintisar@Ahmads-MacBook-M4-Pro.local>
Co-authored-by: Ahmad Intisar <168020872+ahmadintisar@users.noreply.github.com>
Co-authored-by: Liu An <asiro@qq.com>
2026-03-02 13:03:44 +08:00

111 lines
3.3 KiB
TypeScript

import {
ChatVariableEnabledField,
EmptyConversationId,
} from '@/constants/chat';
import { IMessage, Message } from '@/interfaces/database/chat';
import { omit } from 'lodash';
import { v4 as uuid } from 'uuid';
import {
citationMarkerReg,
normalizeCitationDigits,
parseCitationIndex,
} from './citation-utils';
export const isConversationIdExist = (conversationId: string) => {
return conversationId !== EmptyConversationId && conversationId !== '';
};
export const buildMessageUuid = (message: Partial<Message | IMessage>) => {
if ('id' in message && message.id) {
return message.id;
}
return uuid();
};
export const buildMessageListWithUuid = (messages?: Message[]) => {
return (
messages?.map((x: Message | IMessage) => ({
...omit(x, 'reference'),
id: buildMessageUuid(x),
})) ?? []
);
};
export const generateConversationId = () => {
return uuid().replace(/-/g, '');
};
// When rendering each message, add a prefix to the id to ensure uniqueness.
export const buildMessageUuidWithRole = (
message: Partial<Message | IMessage>,
) => {
return `${message.role}_${message.id}`;
};
// Preprocess LaTeX equations to be rendered by KaTeX
// ref: https://github.com/remarkjs/react-markdown/issues/785
//
// Delimiter matching: we only treat \] and \) as block/inline endings when they
// are not part of a LaTeX command (e.g. \right], \big), \left)). Use a negative
// lookbehind (?<![a-zA-Z]) so that \] or \) preceded by a letter (command name)
// is not considered the closing delimiter. Use greedy matching so we match up to
// the last valid delimiter and avoid cutting at the first \] or \) inside the
// equation (e.g. \frac{1}{|y|} or \right]).
const BLOCK_MATH_RE = /\\\[([\s\S]*)(?<![a-zA-Z])\\\]/g;
const INLINE_MATH_RE = /\\\(([\s\S]*)(?<![a-zA-Z])\\\)/g;
export const preprocessLaTeX = (content: string) => {
const blockProcessedContent = content.replace(
BLOCK_MATH_RE,
(_, equation) => `$$${equation}$$`,
);
const inlineProcessedContent = blockProcessedContent.replace(
INLINE_MATH_RE,
(_, equation) => `$${equation}$`,
);
return inlineProcessedContent;
};
export function replaceThinkToSection(text: string = '') {
const pattern = /<think>([\s\S]*?)<\/think>/g;
const result = text.replace(pattern, '<section class="think">$1</section>');
return result;
}
export function setInitialChatVariableEnabledFieldValue(
field: ChatVariableEnabledField,
) {
return field !== ChatVariableEnabledField.MaxTokensEnabled;
}
const ShowImageFields = ['image', 'table'];
export function showImage(filed?: string) {
return ShowImageFields.some((x) => x === filed);
}
export function setChatVariableEnabledFieldValuePage() {
const variableCheckBoxFieldMap = Object.values(
ChatVariableEnabledField,
).reduce<Record<string, boolean>>((pre, cur) => {
pre[cur] = cur !== ChatVariableEnabledField.MaxTokensEnabled;
return pre;
}, {});
return variableCheckBoxFieldMap;
}
const oldReg = /(#{2}[0-9\u0660-\u0669\u06F0-\u06F9]+\${2})/g;
export const currentReg = citationMarkerReg;
export { normalizeCitationDigits, parseCitationIndex };
// To be compatible with the old index matching mode
export const replaceTextByOldReg = (text: string) => {
return text?.replace(oldReg, (substring: string) => {
return `[ID:${substring.slice(2, -2)}]`;
});
};