diff --git a/common/data_source/rdbms_connector.py b/common/data_source/rdbms_connector.py index 2902041bd5..05628501c6 100644 --- a/common/data_source/rdbms_connector.py +++ b/common/data_source/rdbms_connector.py @@ -204,11 +204,11 @@ class RDBMSConnector(LoadConnector, PollConnector): value = row_dict[col] if isinstance(value, (dict, list)): value = json.dumps(value, ensure_ascii=False) - # Use brackets around field name to ensure it's distinguishable - # after chunking (TxtParser strips \n delimiters during merge) - content_parts.append(f"【{col}】: {value}") + # Use brackets around field name and put value on a new line + # so that TxtParser preserves field boundaries after chunking. + content_parts.append(f"【{col}】:\n{value}") - content = "\n".join(content_parts) + content = "\n\n".join(content_parts) if self.id_column and self.id_column in row_dict: doc_id = f"{self.db_type}:{self.database}:{row_dict[self.id_column]}" diff --git a/deepdoc/parser/txt_parser.py b/deepdoc/parser/txt_parser.py index 64e200cbc6..6abf8591da 100644 --- a/deepdoc/parser/txt_parser.py +++ b/deepdoc/parser/txt_parser.py @@ -40,7 +40,10 @@ class RAGFlowTxtParser: cks.append(t) tk_nums.append(tnum) else: - cks[-1] += t + if cks[-1]: + cks[-1] += "\n" + t + else: + cks[-1] += t tk_nums[-1] += tnum dels = []