mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Harden closed-advisory fixes (#16409)
## Summary - harden reopened advisory fixes across REST connector, invoke, document downloads, and markdown rendering - add targeted regression coverage for redirect-safe SSRF handling, invoke SSRF checks, document access control, and markdown sanitization - verify each referenced GHSA against the original GitHub advisory text and align the closed-advisory plan with the implemented remediation ## What changed - add tenant access checks to document download endpoints to avoid cross-tenant document disclosure - add per-hop SSRF validation, DNS pinning, redirect handling, and redirect limits to the REST API connector - ensure invoke requests validate and pin the resolved host and never follow redirects implicitly - keep the generic rate-limited request path wrapped, not just GET and POST helpers - sanitize markdown HTML before rendering in the highlight markdown component ## Validation - `cd web && npm test -- --runInBand src/components/highlight-markdown/__tests__/index.test.tsx` - `.venv/bin/python -m pytest -q test/unit_test/data_source/test_rest_api_connector.py` - targeted `test/testcases/test_web_api/...` unit additions were reviewed, but the suite cannot be executed end-to-end in this environment because parent `test/testcases/conftest.py` requires a local service on `127.0.0.1:9380` ## Notes - all GHSA entries referenced by the plan were checked against the original GitHub advisory text, not sampled - the closed-advisory plan document was updated locally during review, but is intentionally not included in this PR
This commit is contained in:
@@ -0,0 +1,96 @@
|
||||
import { render } from '@testing-library/react';
|
||||
import React from 'react';
|
||||
|
||||
import HighLightMarkdown from '..';
|
||||
|
||||
jest.mock('@/constants/markdown-remark-plugins', () => ({
|
||||
MarkdownRemarkPlugins: [],
|
||||
}));
|
||||
|
||||
// Coderabbit MAJOR #3486038797: the previous mock rendered react-markdown's
|
||||
// output as a plain <div> containing `children` as text, so the spec never
|
||||
// exercised rehypeRaw or the post-preprocessLaTeX sanitization path. With
|
||||
// that mock, `<b>safe</b>` was just text inside a div, and an entity-encoded
|
||||
// `<img onerror=...>` payload would never reach the DOM no matter what the
|
||||
// component did — masking the exact bypass DOMPurify is meant to catch.
|
||||
//
|
||||
// We mock react-markdown to render `children` as raw HTML (via
|
||||
// dangerouslySetInnerHTML). This mimics the real pipeline: if the component
|
||||
// fails to sanitize (e.g. sanitizes BEFORE preprocessLaTeX), the unsafe HTML
|
||||
// will reach this mock and be inserted into the DOM, failing the assertions.
|
||||
jest.mock('react-markdown', () => ({
|
||||
__esModule: true,
|
||||
default: ({ children }: any) => {
|
||||
const ReactLib = jest.requireActual('react');
|
||||
return ReactLib.createElement('div', {
|
||||
dangerouslySetInnerHTML: { __html: children },
|
||||
});
|
||||
},
|
||||
}));
|
||||
|
||||
jest.mock('react-syntax-highlighter', () => ({
|
||||
Prism: ({ children }: any) => {
|
||||
const react = jest.requireActual('react');
|
||||
return react.createElement('pre', null, children);
|
||||
},
|
||||
}));
|
||||
|
||||
jest.mock('react-syntax-highlighter/dist/esm/styles/prism', () => ({
|
||||
oneDark: {},
|
||||
oneLight: {},
|
||||
}));
|
||||
|
||||
jest.mock('rehype-katex', () => jest.fn());
|
||||
jest.mock('rehype-raw', () => jest.fn());
|
||||
|
||||
jest.mock('../../theme-provider', () => ({
|
||||
useIsDarkTheme: () => false,
|
||||
}));
|
||||
|
||||
describe('HighLightMarkdown', () => {
|
||||
it('sanitizes unsafe html before rendering', () => {
|
||||
const { container } = render(
|
||||
React.createElement(
|
||||
HighLightMarkdown,
|
||||
null,
|
||||
'hello <img src=x onerror="alert(1)" /><script>alert(1)</script><b>safe</b>',
|
||||
),
|
||||
);
|
||||
|
||||
// <b>safe</b> is allowed by DOMPurify default profile, so it should
|
||||
// render as a real <b> element with text "safe".
|
||||
expect(container.querySelector('b')?.textContent).toBe('safe');
|
||||
|
||||
// <script> is removed entirely by DOMPurify.
|
||||
expect(container.querySelector('script')).toBeNull();
|
||||
|
||||
// <img> is kept but its dangerous handler attribute must be stripped.
|
||||
// (DOMPurify default profile removes on* event attributes.)
|
||||
const imgs = container.querySelectorAll('img');
|
||||
imgs.forEach((img) => {
|
||||
expect(img.getAttribute('onerror')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
it('strips html encoded as entities (preprocessLaTeX bypass)', () => {
|
||||
// preprocessLaTeX() decodes </>/& back to raw HTML before
|
||||
// rehypeRaw runs. Sanitization must occur AFTER preprocessLaTeX, so
|
||||
// a payload delivered as <img onerror=...> cannot survive.
|
||||
const { container } = render(
|
||||
React.createElement(
|
||||
HighLightMarkdown,
|
||||
null,
|
||||
'<img src=x onerror="alert(1)" /><script>alert(1)</script>safe',
|
||||
),
|
||||
);
|
||||
|
||||
// <script> entirely removed.
|
||||
expect(container.querySelector('script')).toBeNull();
|
||||
// <img> kept (allowed by default profile) but onerror must be stripped.
|
||||
container.querySelectorAll('img').forEach((img) => {
|
||||
expect(img.getAttribute('onerror')).toBeNull();
|
||||
});
|
||||
// The literal word "safe" should still be visible.
|
||||
expect(container.textContent).toContain('safe');
|
||||
});
|
||||
});
|
||||
@@ -1,5 +1,6 @@
|
||||
import { MarkdownRemarkPlugins } from '@/constants/markdown-remark-plugins';
|
||||
import classNames from 'classnames';
|
||||
import DOMPurify from 'dompurify';
|
||||
import Markdown from 'react-markdown';
|
||||
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
|
||||
import {
|
||||
@@ -25,6 +26,12 @@ const HighLightMarkdown = ({
|
||||
children: string | null | undefined;
|
||||
}) => {
|
||||
const isDarkTheme = useIsDarkTheme();
|
||||
// IMPORTANT: preprocessLaTeX() decodes </>/& back to raw HTML before
|
||||
// rehypeRaw parses the markdown. Sanitizing children *before* preprocessLaTeX
|
||||
// would let entity-encoded payloads bypass DOMPurify and inject HTML.
|
||||
// Sanitize the *post*-processed string instead. (Coderabbit CRITICAL #3486038798)
|
||||
const processed = children ? preprocessLaTeX(children) : children;
|
||||
const safeChildren = processed ? DOMPurify.sanitize(processed) : processed;
|
||||
const dir = children
|
||||
? getDirAttribute(children.replace(citationMarkerReg, ''))
|
||||
: undefined;
|
||||
@@ -60,7 +67,7 @@ const HighLightMarkdown = ({
|
||||
} as any
|
||||
}
|
||||
>
|
||||
{children ? preprocessLaTeX(children) : children}
|
||||
{safeChildren}
|
||||
</Markdown>
|
||||
</div>
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user