mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
fix: the opencc c library uses fgets() to read dicti... in text.c (#13970)
## Summary Fix critical severity security issue in `internal/cpp/opencc/dictionary/text.c`. ## Vulnerability | Field | Value | |-------|-------| | **ID** | V-001 | | **Severity** | CRITICAL | | **Scanner** | multi_agent_ai | | **Rule** | `V-001` | | **File** | `internal/cpp/opencc/dictionary/text.c:107` | **Description**: The OpenCC C library uses fgets() to read dictionary and configuration files without proper bounds validation on subsequent buffer operations. While fgets() itself is bounds-checked, the sprintf() call at config_reader.c:174 constructs file paths by concatenating home_path and filename without verifying the result fits in pkg_filename buffer. An attacker providing malformed OpenCC configuration files with excessively long path components can overflow the fixed-size buffer, overwriting adjacent memory including return addresses and function pointers. ## Changes - `internal/cpp/opencc/config_reader.c` - `internal/cpp/opencc/dictionary/text.c` - `internal/cpp/opencc/utils.c` ## Verification - [x] Build passes - [x] Scanner re-scan confirms fix - [x] LLM code review passed --- *Automated security fix by [OrbisAI Security](https://orbisappsec.com)* <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Bug Fixes** * Improved error detection and handling for malformed configuration and dictionary entries during file parsing. * Enhanced memory cleanup in error recovery paths to prevent potential issues. * Strengthened robustness of string operations and buffer handling throughout the library. <!-- end of auto-generated comment: release notes by coderabbit.ai --> Co-authored-by: Ubuntu <ubuntu@ip-172-31-32-15.us-west-2.compute.internal>
This commit is contained in:
@@ -170,8 +170,9 @@ static char *parse_trim(char *str) {
|
||||
static int parse(config_desc *config, const char *filename, const char *home_path) {
|
||||
FILE *fp = fopen(filename, "rb");
|
||||
if (!fp) {
|
||||
char *pkg_filename = (char *)malloc(sizeof(char) * (strlen(filename) + strlen(home_path) + 2));
|
||||
sprintf(pkg_filename, "%s/%s", home_path, filename);
|
||||
size_t pkg_filename_len = strlen(filename) + strlen(home_path) + 2;
|
||||
char *pkg_filename = (char *)malloc(sizeof(char) * pkg_filename_len);
|
||||
snprintf(pkg_filename, pkg_filename_len, "%s/%s", home_path, filename);
|
||||
printf("pkg_filename %s\n", pkg_filename);
|
||||
fp = fopen(pkg_filename, "rb");
|
||||
if (!fp) {
|
||||
@@ -182,12 +183,26 @@ static int parse(config_desc *config, const char *filename, const char *home_pat
|
||||
free(pkg_filename);
|
||||
}
|
||||
|
||||
config->home_dir = (char *)malloc(sizeof(char) * (strlen(home_path) + 1));
|
||||
sprintf(config->home_dir, "%s", home_path);
|
||||
size_t home_dir_len = strlen(home_path) + 1;
|
||||
config->home_dir = (char *)malloc(sizeof(char) * home_dir_len);
|
||||
snprintf(config->home_dir, home_dir_len, "%s", home_path);
|
||||
|
||||
static char buff[BUFFER_SIZE];
|
||||
char buff[BUFFER_SIZE];
|
||||
|
||||
while (fgets(buff, BUFFER_SIZE, fp) != NULL) {
|
||||
/* Detect line truncation: if buffer is full and last char is not newline,
|
||||
* the line was longer than BUFFER_SIZE-1 bytes. Drain the remainder and
|
||||
* treat this as a parse error to avoid processing partial config lines. */
|
||||
size_t buff_len = strlen(buff);
|
||||
if (buff_len == BUFFER_SIZE - 1 && buff[buff_len - 1] != '\n') {
|
||||
int c;
|
||||
while ((c = fgetc(fp)) != '\n' && c != EOF)
|
||||
;
|
||||
fclose(fp);
|
||||
errnum = CONFIG_ERROR_PARSE;
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *trimed_buff = parse_trim(buff);
|
||||
if (*trimed_buff == ';' || *trimed_buff == '#' || *trimed_buff == '\0') {
|
||||
/* Comment Line or empty line */
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
#include "../encoding.h"
|
||||
|
||||
#define INITIAL_DICTIONARY_SIZE 1024
|
||||
#define ENTRY_BUFF_SIZE 128
|
||||
#define ENTRY_BUFF_SIZE 4096
|
||||
#define ENTRY_WBUFF_SIZE ENTRY_BUFF_SIZE / sizeof(size_t)
|
||||
|
||||
struct _text_dictionary {
|
||||
@@ -69,10 +69,14 @@ int parse_entry(const char *buff, entry *entry_i) {
|
||||
if (ucs4_buff == (ucs4_t *)-1) {
|
||||
/* 發生錯誤 回退內存申請 */
|
||||
ssize_t i;
|
||||
for (i = value_i - 1; i >= 0; --i)
|
||||
for (i = value_i - 1; i >= 0; --i) {
|
||||
free(entry_i->value[i]);
|
||||
entry_i->value[i] = NULL;
|
||||
}
|
||||
free(entry_i->value);
|
||||
entry_i->value = NULL;
|
||||
free(entry_i->key);
|
||||
entry_i->key = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -95,7 +99,7 @@ dictionary_t dictionary_text_open(const char *filename) {
|
||||
text_dictionary->lexicon = (entry *)malloc(sizeof(entry) * text_dictionary->entry_count);
|
||||
text_dictionary->word_buff = NULL;
|
||||
|
||||
static char buff[ENTRY_BUFF_SIZE];
|
||||
char buff[ENTRY_BUFF_SIZE];
|
||||
|
||||
FILE *fp = fopen(filename, "rb");
|
||||
if (fp == NULL) {
|
||||
@@ -105,6 +109,17 @@ dictionary_t dictionary_text_open(const char *filename) {
|
||||
|
||||
size_t i = 0;
|
||||
while (fgets(buff, ENTRY_BUFF_SIZE, fp)) {
|
||||
/* Detect line truncation: if buffer is full and last char is not newline,
|
||||
* the line was longer than ENTRY_BUFF_SIZE-1 bytes. Drain the remainder
|
||||
* and skip this malformed entry to prevent parsing partial data. */
|
||||
size_t buff_len = strlen(buff);
|
||||
if (buff_len == ENTRY_BUFF_SIZE - 1 && buff[buff_len - 1] != '\n') {
|
||||
int c;
|
||||
while ((c = fgetc(fp)) != '\n' && c != EOF)
|
||||
;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i >= text_dictionary->entry_count) {
|
||||
text_dictionary->entry_count += text_dictionary->entry_count;
|
||||
text_dictionary->lexicon = (entry *)realloc(text_dictionary->lexicon, sizeof(entry) * text_dictionary->entry_count);
|
||||
|
||||
@@ -23,8 +23,10 @@ void perr(const char *str) { fputs(str, stderr); }
|
||||
int qsort_int_cmp(const void *a, const void *b) { return *((int *)a) - *((int *)b); }
|
||||
|
||||
char *mstrcpy(const char *str) {
|
||||
char *strbuf = (char *)malloc(sizeof(char) * (strlen(str) + 1));
|
||||
strcpy(strbuf, str);
|
||||
size_t len = strlen(str);
|
||||
char *strbuf = (char *)malloc(sizeof(char) * (len + 1));
|
||||
strncpy(strbuf, str, len);
|
||||
strbuf[len] = '\0';
|
||||
return strbuf;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user