Harden closed-advisory fixes (#16409)

## Summary
- harden reopened advisory fixes across REST connector, invoke, document
downloads, and markdown rendering
- add targeted regression coverage for redirect-safe SSRF handling,
invoke SSRF checks, document access control, and markdown sanitization
- verify each referenced GHSA against the original GitHub advisory text
and align the closed-advisory plan with the implemented remediation

## What changed
- add tenant access checks to document download endpoints to avoid
cross-tenant document disclosure
- add per-hop SSRF validation, DNS pinning, redirect handling, and
redirect limits to the REST API connector
- ensure invoke requests validate and pin the resolved host and never
follow redirects implicitly
- keep the generic rate-limited request path wrapped, not just GET and
POST helpers
- sanitize markdown HTML before rendering in the highlight markdown
component

## Validation
- `cd web && npm test -- --runInBand
src/components/highlight-markdown/__tests__/index.test.tsx`
- `.venv/bin/python -m pytest -q
test/unit_test/data_source/test_rest_api_connector.py`
- targeted `test/testcases/test_web_api/...` unit additions were
reviewed, but the suite cannot be executed end-to-end in this
environment because parent `test/testcases/conftest.py` requires a local
service on `127.0.0.1:9380`

## Notes
- all GHSA entries referenced by the plan were checked against the
original GitHub advisory text, not sampled
- the closed-advisory plan document was updated locally during review,
but is intentionally not included in this PR
This commit is contained in:
Zhichang Yu
2026-06-28 11:17:54 +08:00
committed by yzc
parent ee165c5dd7
commit faef22c18a
11 changed files with 398 additions and 12 deletions

View File

@@ -0,0 +1,96 @@
import { render } from '@testing-library/react';
import React from 'react';
import HighLightMarkdown from '..';
jest.mock('@/constants/markdown-remark-plugins', () => ({
MarkdownRemarkPlugins: [],
}));
// Coderabbit MAJOR #3486038797: the previous mock rendered react-markdown's
// output as a plain <div> containing `children` as text, so the spec never
// exercised rehypeRaw or the post-preprocessLaTeX sanitization path. With
// that mock, `<b>safe</b>` was just text inside a div, and an entity-encoded
// `<img onerror=...>` payload would never reach the DOM no matter what the
// component did — masking the exact bypass DOMPurify is meant to catch.
//
// We mock react-markdown to render `children` as raw HTML (via
// dangerouslySetInnerHTML). This mimics the real pipeline: if the component
// fails to sanitize (e.g. sanitizes BEFORE preprocessLaTeX), the unsafe HTML
// will reach this mock and be inserted into the DOM, failing the assertions.
jest.mock('react-markdown', () => ({
__esModule: true,
default: ({ children }: any) => {
const ReactLib = jest.requireActual('react');
return ReactLib.createElement('div', {
dangerouslySetInnerHTML: { __html: children },
});
},
}));
jest.mock('react-syntax-highlighter', () => ({
Prism: ({ children }: any) => {
const react = jest.requireActual('react');
return react.createElement('pre', null, children);
},
}));
jest.mock('react-syntax-highlighter/dist/esm/styles/prism', () => ({
oneDark: {},
oneLight: {},
}));
jest.mock('rehype-katex', () => jest.fn());
jest.mock('rehype-raw', () => jest.fn());
jest.mock('../../theme-provider', () => ({
useIsDarkTheme: () => false,
}));
describe('HighLightMarkdown', () => {
it('sanitizes unsafe html before rendering', () => {
const { container } = render(
React.createElement(
HighLightMarkdown,
null,
'hello <img src=x onerror="alert(1)" /><script>alert(1)</script><b>safe</b>',
),
);
// <b>safe</b> is allowed by DOMPurify default profile, so it should
// render as a real <b> element with text "safe".
expect(container.querySelector('b')?.textContent).toBe('safe');
// <script> is removed entirely by DOMPurify.
expect(container.querySelector('script')).toBeNull();
// <img> is kept but its dangerous handler attribute must be stripped.
// (DOMPurify default profile removes on* event attributes.)
const imgs = container.querySelectorAll('img');
imgs.forEach((img) => {
expect(img.getAttribute('onerror')).toBeNull();
});
});
it('strips html encoded as entities (preprocessLaTeX bypass)', () => {
// preprocessLaTeX() decodes &lt;/&gt;/&amp; back to raw HTML before
// rehypeRaw runs. Sanitization must occur AFTER preprocessLaTeX, so
// a payload delivered as &lt;img onerror=...&gt; cannot survive.
const { container } = render(
React.createElement(
HighLightMarkdown,
null,
'&lt;img src=x onerror="alert(1)" /&gt;&lt;script&gt;alert(1)&lt;/script&gt;safe',
),
);
// <script> entirely removed.
expect(container.querySelector('script')).toBeNull();
// <img> kept (allowed by default profile) but onerror must be stripped.
container.querySelectorAll('img').forEach((img) => {
expect(img.getAttribute('onerror')).toBeNull();
});
// The literal word "safe" should still be visible.
expect(container.textContent).toContain('safe');
});
});

View File

@@ -1,5 +1,6 @@
import { MarkdownRemarkPlugins } from '@/constants/markdown-remark-plugins';
import classNames from 'classnames';
import DOMPurify from 'dompurify';
import Markdown from 'react-markdown';
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
import {
@@ -25,6 +26,12 @@ const HighLightMarkdown = ({
children: string | null | undefined;
}) => {
const isDarkTheme = useIsDarkTheme();
// IMPORTANT: preprocessLaTeX() decodes &lt;/&gt;/&amp; back to raw HTML before
// rehypeRaw parses the markdown. Sanitizing children *before* preprocessLaTeX
// would let entity-encoded payloads bypass DOMPurify and inject HTML.
// Sanitize the *post*-processed string instead. (Coderabbit CRITICAL #3486038798)
const processed = children ? preprocessLaTeX(children) : children;
const safeChildren = processed ? DOMPurify.sanitize(processed) : processed;
const dir = children
? getDirAttribute(children.replace(citationMarkerReg, ''))
: undefined;
@@ -60,7 +67,7 @@ const HighLightMarkdown = ({
} as any
}
>
{children ? preprocessLaTeX(children) : children}
{safeChildren}
</Markdown>
</div>
);