# # Copyright 2026 The InfiniFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import asyncio import io import sys import unittest from contextlib import redirect_stdout from pathlib import Path REPO_ROOT = Path(__file__).resolve().parents[3] if str(REPO_ROOT) not in sys.path: sys.path.insert(0, str(REPO_ROOT)) from api.db.services.dialog_service import _stream_with_think_delta CASES = [ ( "minimax", { "min_tokens": 16, "chunks": [ 'The user has sent a simple greeting "hello". I should respond in a friendly and helpful manner.Hello!', "\n\n How can I help", " you today?", ], "expected": { "think": 'The user has sent a simple greeting "hello". I should respond in a friendly and helpful manner.Hello!', "answer": "\n\n How can I help you today?", }, }, ), ( "deepseek", { "min_tokens": 16, "chunks": [ "We", " need", " to", " respond", " to", " the", " user", "'s", " greeting", ' "', "hello", '".', " The", " assistant", " should", " be", " friendly", " and", " helpful", ".", " A", " simple", " greeting", " back", " is", " appropriate", ",", " perhaps", " with", " an", " offer", " of", " assistance", ".", "Hello", "!", " How", " can", " I", " assist", " you", " today", "?", ], "expected": { "think": 'We need to respond to the user\'s greeting "hello". The assistant should be friendly and helpful. A simple greeting back is appropriate, perhaps with an offer of assistance.', "answer": "Hello! How can I assist you today?", }, }, ), ( "deepseek_repeat", { "min_tokens": 16, "chunks": [ "We", " need", " to", " respond", " to", " the", " user", "'s", ' "', "hello", '"', " again", ".", " The", " user", " just", " said", ' "', "hello", '"', " after", " I", " already", " responded", ".", " Possibly", " they", "'re", " testing", " or", " just", " greeting", " again", ".", " I", "'ll", " respond", " in", " a", " friendly", " manner", ",", " perhaps", " acknowledging", " the", " repeated", " greeting", " and", " inviting", " them", " to", " ask", " something", ".", "Hello", " again", "!", " How", " can", " I", " help", " you", " today", "?", ], "expected": { "think": 'We need to respond to the user\'s "hello" again. The user just said "hello" after I already responded. Possibly they\'re testing or just greeting again. I\'ll respond in a friendly manner, perhaps acknowledging the repeated greeting and inviting them to ask something.', "answer": "Hello again! How can I help you today?", }, }, ), ( "answer_then_think", { "min_tokens": 16, "chunks": [ "前言", " ", "内部推理一", "最终回答", "。", ], "expected": { "think": "内部推理一", "answer": "前言 最终回答。", "markers": ["", ""], }, }, ), ( "close_pending_eof", { "min_tokens": 16, "chunks": [ "先思考完毕答案在这里", ], "expected": { "think": "先思考完毕", "answer": "答案在这里", "markers": ["", ""], }, }, ), ( "mixed_boundary", { "min_tokens": 16, "chunks": [ "前缀", "理由A答案A", " 后缀", ], "expected": { "think": "理由A", "answer": "前缀答案A 后缀", "markers": ["", ""], }, }, ), ( "think_only_eof", { "min_tokens": 16, "chunks": [ "只输出思考,不输出最终答案", ",并且流在这里结束", ], "expected": { "think": "只输出思考,不输出最终答案,并且流在这里结束", "answer": "", "markers": [""], }, }, ), ( "double_think_blocks", { "min_tokens": 16, "chunks": [ "第一段推理答案A", " 第二段推理答案B", ], "expected": { "think": "第一段推理第二段推理", "answer": "答案A 答案B", "markers": ["", "", "", ""], }, }, ), ( "nested_or_malformed_tags", { "min_tokens": 16, "chunks": [ "重复开始", "答案", "", "尾巴", ], "expected": { "think": "重复开始", "answer": "答案尾巴", "markers": ["", "", ""], }, }, ), ( "tiny_think_chunks", { "min_tokens": 16, "chunks": [ "", "A", "B", "C", "D", "E", "", "答", "案", "输", "出", ], "expected": { "think": "ABCDE", "answer": "答案输出", "markers": ["", ""], }, }, ), ( "think_then_answer_then_think", { "min_tokens": 16, "chunks": [ "第一轮推理第一轮答案", " 第二轮推理第二轮答案", ], "expected": { "think": "第一轮推理第二轮推理", "answer": "第一轮答案 第二轮答案", "markers": ["", "", "", ""], }, }, ), ] async def _iter_chunks(chunks): for chunk in chunks: yield chunk async def _collect_case(chunks, min_tokens): think_parts = [] answer_parts = [] markers = [] section = "answer" async for kind, value, _state in _stream_with_think_delta(_iter_chunks(chunks), min_tokens=min_tokens): if kind == "marker": markers.append(value) section = "think" if value == "" else "answer" continue if section == "think": think_parts.append(value) else: answer_parts.append(value) return "".join(think_parts), "".join(answer_parts), markers class TestThinkStreamParser(unittest.TestCase): def test_think_stream_parser_cases(self): for case_name, case in CASES: with self.subTest(case=case_name): buf = io.StringIO() with redirect_stdout(buf): think_text, answer_text, markers = asyncio.run( _collect_case(case["chunks"], case["min_tokens"]) ) expected = case["expected"] self.assertEqual(think_text, expected["think"], case_name) self.assertEqual(answer_text, expected["answer"], case_name) if "markers" in expected: self.assertEqual(markers, expected["markers"], case_name)