#
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import asyncio
import io
import sys
import unittest
from contextlib import redirect_stdout
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[3]
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))
from api.db.services.dialog_service import _stream_with_think_delta
CASES = [
(
"minimax",
{
"min_tokens": 16,
"chunks": [
'The user has sent a simple greeting "hello". I should respond in a friendly and helpful manner.Hello!',
"\n\n How can I help",
" you today?",
],
"expected": {
"think": 'The user has sent a simple greeting "hello". I should respond in a friendly and helpful manner.Hello!',
"answer": "\n\n How can I help you today?",
},
},
),
(
"deepseek",
{
"min_tokens": 16,
"chunks": [
"We",
" need",
" to",
" respond",
" to",
" the",
" user",
"'s",
" greeting",
' "',
"hello",
'".',
" The",
" assistant",
" should",
" be",
" friendly",
" and",
" helpful",
".",
" A",
" simple",
" greeting",
" back",
" is",
" appropriate",
",",
" perhaps",
" with",
" an",
" offer",
" of",
" assistance",
".",
"Hello",
"!",
" How",
" can",
" I",
" assist",
" you",
" today",
"?",
],
"expected": {
"think": 'We need to respond to the user\'s greeting "hello". The assistant should be friendly and helpful. A simple greeting back is appropriate, perhaps with an offer of assistance.',
"answer": "Hello! How can I assist you today?",
},
},
),
(
"deepseek_repeat",
{
"min_tokens": 16,
"chunks": [
"We",
" need",
" to",
" respond",
" to",
" the",
" user",
"'s",
' "',
"hello",
'"',
" again",
".",
" The",
" user",
" just",
" said",
' "',
"hello",
'"',
" after",
" I",
" already",
" responded",
".",
" Possibly",
" they",
"'re",
" testing",
" or",
" just",
" greeting",
" again",
".",
" I",
"'ll",
" respond",
" in",
" a",
" friendly",
" manner",
",",
" perhaps",
" acknowledging",
" the",
" repeated",
" greeting",
" and",
" inviting",
" them",
" to",
" ask",
" something",
".",
"Hello",
" again",
"!",
" How",
" can",
" I",
" help",
" you",
" today",
"?",
],
"expected": {
"think": 'We need to respond to the user\'s "hello" again. The user just said "hello" after I already responded. Possibly they\'re testing or just greeting again. I\'ll respond in a friendly manner, perhaps acknowledging the repeated greeting and inviting them to ask something.',
"answer": "Hello again! How can I help you today?",
},
},
),
(
"answer_then_think",
{
"min_tokens": 16,
"chunks": [
"前言",
" ",
"内部推理一",
"最终回答",
"。",
],
"expected": {
"think": "内部推理一",
"answer": "前言 最终回答。",
"markers": ["", ""],
},
},
),
(
"close_pending_eof",
{
"min_tokens": 16,
"chunks": [
"先思考完毕答案在这里",
],
"expected": {
"think": "先思考完毕",
"answer": "答案在这里",
"markers": ["", ""],
},
},
),
(
"mixed_boundary",
{
"min_tokens": 16,
"chunks": [
"前缀",
"理由A答案A",
" 后缀",
],
"expected": {
"think": "理由A",
"answer": "前缀答案A 后缀",
"markers": ["", ""],
},
},
),
(
"think_only_eof",
{
"min_tokens": 16,
"chunks": [
"只输出思考,不输出最终答案",
",并且流在这里结束",
],
"expected": {
"think": "只输出思考,不输出最终答案,并且流在这里结束",
"answer": "",
"markers": [""],
},
},
),
(
"double_think_blocks",
{
"min_tokens": 16,
"chunks": [
"第一段推理答案A",
" 第二段推理答案B",
],
"expected": {
"think": "第一段推理第二段推理",
"answer": "答案A 答案B",
"markers": ["", "", "", ""],
},
},
),
(
"nested_or_malformed_tags",
{
"min_tokens": 16,
"chunks": [
"重复开始",
"答案",
"",
"尾巴",
],
"expected": {
"think": "重复开始",
"answer": "答案尾巴",
"markers": ["", "", ""],
},
},
),
(
"tiny_think_chunks",
{
"min_tokens": 16,
"chunks": [
"",
"A",
"B",
"C",
"D",
"E",
"",
"答",
"案",
"输",
"出",
],
"expected": {
"think": "ABCDE",
"answer": "答案输出",
"markers": ["", ""],
},
},
),
(
"think_then_answer_then_think",
{
"min_tokens": 16,
"chunks": [
"第一轮推理第一轮答案",
" 第二轮推理第二轮答案",
],
"expected": {
"think": "第一轮推理第二轮推理",
"answer": "第一轮答案 第二轮答案",
"markers": ["", "", "", ""],
},
},
),
]
async def _iter_chunks(chunks):
for chunk in chunks:
yield chunk
async def _collect_case(chunks, min_tokens):
think_parts = []
answer_parts = []
markers = []
section = "answer"
async for kind, value, _state in _stream_with_think_delta(_iter_chunks(chunks), min_tokens=min_tokens):
if kind == "marker":
markers.append(value)
section = "think" if value == "" else "answer"
continue
if section == "think":
think_parts.append(value)
else:
answer_parts.append(value)
return "".join(think_parts), "".join(answer_parts), markers
class TestThinkStreamParser(unittest.TestCase):
def test_think_stream_parser_cases(self):
for case_name, case in CASES:
with self.subTest(case=case_name):
buf = io.StringIO()
with redirect_stdout(buf):
think_text, answer_text, markers = asyncio.run(
_collect_case(case["chunks"], case["min_tokens"])
)
expected = case["expected"]
self.assertEqual(think_text, expected["think"], case_name)
self.assertEqual(answer_text, expected["answer"], case_name)
if "markers" in expected:
self.assertEqual(markers, expected["markers"], case_name)