From 85d0b46d8e654e28405dca46de08ce37206f6389 Mon Sep 17 00:00:00 2001
From: tmimmanuel <14046872+tmimmanuel@users.noreply.github.com>
Date: Wed, 20 May 2026 21:19:04 -1000
Subject: [PATCH] fix(mistral): handle structured content from magistral
 reasoning models (#14805)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What problem does this PR solve?

`MistralModel.ChatWithMessages` (in the driver merged via #14807)
assumes that `choices[0].message.content` from `/v1/chat/completions` is
always a string and falls through to `return nil, fmt.Errorf("invalid
content format")` on anything else.

That assumption breaks for the **magistral reasoning family**
(`magistral-small-*`, `magistral-medium-*`). When the model needs a
chain-of-thought to answer, Mistral returns `content` as a **structured
array of typed parts**:

```json
"content": [
  {"type": "thinking",
   "thinking": [{"type": "text", "text": "Combined speed is 150 mph. 300 / 150 = 2 hours."}],
   "closed": true},
  {"type": "text", "text": "They will meet after **2 hours**."}
]
```

Concretely, this is what the live API returns today (probed against
`api.mistral.ai/v1`):

```
$ curl -H "Authorization: Bearer <key>" -H "Content-Type: application/json" \
       -X POST https://api.mistral.ai/v1/chat/completions \
       -d '{"model":"magistral-medium-latest",
            "messages":[{"role":"user","content":"two trains 60mph and 90mph, 300mi apart, when do they meet? step by step."}],
            "max_tokens":1024}'
HTTP 200
{ "choices":[{"message":{
    "role":"assistant",
    "content":[
      {"type":"thinking","thinking":[{"type":"text","text":"Okay, let's see..."}],"closed":true},
      {"type":"text","text":"To determine when the two trains meet..."}
    ]}}] }
```

With the current driver, every call like that returns the generic
`"invalid content format"` error. Trivial prompts that happen to fit in
a string answer still succeed, so the breakage is **non-deterministic
from the tenant's POV**: same model, same provider, sometimes works,
sometimes 500s with no useful error.

A secondary issue: `conf/models/mistral.json` does not include any
magistral model. The picker hid the broken path, which is why this
wasn't caught during #14807's review.

### What this PR includes

- New helper `extractMistralContent(raw interface{}) (answer,
reasonContent string, err error)` in
`internal/entity/models/mistral.go`, which normalizes both shapes
Mistral can return:
- `string` → historical path. `Answer = content`, `ReasonContent = ""`.
Preserves behavior for every non-reasoning model (`mistral-large-*`,
`mistral-small-*`, `ministral-*`, `codestral-*`, `pixtral-*`,
`open-mistral-nemo`).
- `[]interface{}` → walk the parts. Concatenate every `{"type":"text",
"text":...}` part into `Answer`; concatenate the inner text inside every
`{"type":"thinking", "thinking":[...]}` part into `ReasonContent`.
- `ChatWithMessages` now calls the helper instead of doing the raw
`.(string)` cast.
- Unknown part types are **skipped, not failed**. Mistral has been
adding new content variants quickly (audio chunks, citations, etc.);
this driver should not 500 every call when a new part type appears.
- `conf/models/mistral.json`: add `magistral-medium-latest` and
`magistral-small-latest`. Both are visible in `/v1/models` today.

No interface change. No factory change. No new dependencies.

### How was this tested?

**Unit tests** — 5 new tests in `internal/entity/models/mistral_test.go`
on top of the 27 already shipped via #14807:

- `TestMistralChatHandlesStringContent` — regression net for the
historical path
- `TestMistralChatExtractsReasoningFromStructuredContent` — the fixture
body is a trimmed copy of the actual `magistral-medium-latest` response
captured above; asserts both `Answer` and `ReasonContent` are populated
correctly
- `TestMistralChatHandlesStructuredContentWithoutThinking` —
`magistral-*` with a trivial answer returns a structured shape that has
only a `text` part; `ReasonContent` must stay empty
- `TestMistralChatIgnoresUnknownContentPartTypes` — `audio_url` and
`future_part_type` parts are skipped, `text` parts still flow through
- `TestExtractMistralContent` — table-driven unit coverage of the helper
for string, empty string, nil, empty array, text-only, thinking+text,
unsupported root type

```
$ go test -vet=off -run "TestMistral|TestExtractMistralContent" -count=1 -v ./internal/entity/models/...
=== RUN   TestMistralChatHandlesStringContent
--- PASS: TestMistralChatHandlesStringContent (0.00s)
=== RUN   TestMistralChatExtractsReasoningFromStructuredContent
--- PASS: TestMistralChatExtractsReasoningFromStructuredContent (0.00s)
=== RUN   TestMistralChatHandlesStructuredContentWithoutThinking
--- PASS: TestMistralChatHandlesStructuredContentWithoutThinking (0.00s)
=== RUN   TestMistralChatIgnoresUnknownContentPartTypes
--- PASS: TestMistralChatIgnoresUnknownContentPartTypes (0.00s)
=== RUN   TestExtractMistralContent
=== RUN   TestExtractMistralContent/plain_string
=== RUN   TestExtractMistralContent/empty_string
=== RUN   TestExtractMistralContent/nil
=== RUN   TestExtractMistralContent/empty_array
=== RUN   TestExtractMistralContent/text_only
=== RUN   TestExtractMistralContent/thinking_then_text
=== RUN   TestExtractMistralContent/unknown_root_type
--- PASS: TestExtractMistralContent (0.00s)
PASS
ok      ragflow/internal/entity/models  0.046s
```

All 32 Mistral tests pass on go 1.25. `go build
./internal/entity/models/...` exits 0.

**Live integration test** — driver exercised against `api.mistral.ai/v1`
with the patched code:

```
=== RUN   TestMistralMagistralSmoke
    [OK] "magistral-small-latest" present upstream
    [OK] "magistral-medium-latest" present upstream
    [OK trivial]    Answer="7"  ReasonContent=""
    [OK reasoning]  Answer len=797   head="To determine when the two trains meet, we can follow these steps:\n\n1. **Identify..."
                    ReasonContent len=1069 head="Okay, let's see. There are two trains, one going 60 mph and the other going 90 mph. They're moving towards each other, s..."

MAGISTRAL SMOKE PASSED
--- PASS: TestMistralMagistralSmoke (18.09s)
PASS
ok      ragflow/internal/entity/models  18.112s
```

What the live run proves on the wire:

- `magistral-small-latest` with a trivial prompt still uses the
string-content shape; the regression-net path is exercised against the
real server, not just the mock.
- `magistral-medium-latest` with a reasoning prompt uses the
structured-array shape; the new code path extracts a 1069-character
reasoning trace into `ChatResponse.ReasonContent` and a 797-character
visible answer into `ChatResponse.Answer`. Before this fix, the same
call returned `"invalid content format"` and the caller saw nothing.

The smoke-test file itself is not committed (live tests live outside the
PR diff, same convention used for prior provider PRs).

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
---
 conf/models/mistral.json               |  14 ++
 internal/entity/models/mistral.go      |  72 ++++++++-
 internal/entity/models/mistral_test.go | 199 +++++++++++++++++++++++++
 3 files changed, 280 insertions(+), 5 deletions(-)
diff --git a/conf/models/mistral.json b/conf/models/mistral.json
index be9cbb1861..1454922e80 100644
--- a/conf/models/mistral.json
+++ b/conf/models/mistral.json
@@ -89,6 +89,20 @@
         "chat"
       ]
     },
+    {
+      "name": "magistral-medium-latest",
+      "max_tokens": 40000,
+      "model_types": [
+        "chat"
+      ]
+    },
+    {
+      "name": "magistral-small-latest",
+      "max_tokens": 40000,
+      "model_types": [
+        "chat"
+      ]
+    },
     {
       "name": "mistral-embed",
       "max_tokens": 8192,
diff --git a/internal/entity/models/mistral.go b/internal/entity/models/mistral.go
index 1b526a8776..e37a3998f4 100644
--- a/internal/entity/models/mistral.go
+++ b/internal/entity/models/mistral.go
@@ -195,18 +195,80 @@ func (m *MistralModel) ChatWithMessages(modelName string, messages []Message, ap
 		return nil, fmt.Errorf("invalid message format")
 	}
 
-	content, ok := messageMap["content"].(string)
-	if !ok {
-		return nil, fmt.Errorf("invalid content format")
+	content, reasonContent, err := extractMistralContent(messageMap["content"])
+	if err != nil {
+		return nil, err
 	}
 
-	emptyReason := ""
 	return &ChatResponse{
 		Answer:        &content,
-		ReasonContent: &emptyReason,
+		ReasonContent: &reasonContent,
 	}, nil
 }
 
+// extractMistralContent normalizes the two shapes Mistral can return in
+// choices[0].message.content.
+//
+//  1. Plain string. The historical shape, used by every non-reasoning
+//     Mistral model (mistral-large, mistral-medium, ministral-*, etc.):
+//
+//     "content": "Pong."
+//
+//  2. Structured array of typed parts. Used by the magistral reasoning
+//     family (magistral-small-*, magistral-medium-*) when the model
+//     actually produces a chain-of-thought:
+//
+//     "content": [
+//       {"type": "thinking", "thinking": [{"type": "text", "text": "..."}]},
+//       {"type": "text", "text": "The final answer is ..."}
+//     ]
+//
+// The function concatenates the visible text parts into the assistant
+// answer and the inner thinking text into the reasoning trace. Unknown
+// part types are skipped rather than failing, so a new part shape from
+// Mistral does not break the driver for tenants that don't use it.
+func extractMistralContent(raw interface{}) (string, string, error) {
+	switch v := raw.(type) {
+	case string:
+		return v, "", nil
+	case []interface{}:
+		var answer, reasoning strings.Builder
+		for _, part := range v {
+			pm, ok := part.(map[string]interface{})
+			if !ok {
+				continue
+			}
+			switch pm["type"] {
+			case "text":
+				if t, ok := pm["text"].(string); ok {
+					answer.WriteString(t)
+				}
+			case "thinking":
+				// thinking is an array of inner text parts; concatenate
+				// any inner element with a non-empty text field.
+				inner, ok := pm["thinking"].([]interface{})
+				if !ok {
+					continue
+				}
+				for _, sub := range inner {
+					sm, ok := sub.(map[string]interface{})
+					if !ok {
+						continue
+					}
+					if t, ok := sm["text"].(string); ok {
+						reasoning.WriteString(t)
+					}
+				}
+			}
+		}
+		return answer.String(), reasoning.String(), nil
+	case nil:
+		return "", "", nil
+	default:
+		return "", "", fmt.Errorf("mistral: unsupported content type %T", raw)
+	}
+}
+
 // ChatStreamlyWithSender sends messages and streams the response via the
 // sender function. The Mistral SSE stream uses the same shape as OpenAI:
 // "data:" lines carrying JSON events, with a final "[DONE]" line.
diff --git a/internal/entity/models/mistral_test.go b/internal/entity/models/mistral_test.go
index dc7f318e14..7f288faff0 100644
--- a/internal/entity/models/mistral_test.go
+++ b/internal/entity/models/mistral_test.go
@@ -572,3 +572,202 @@ func TestMistralEmbedRejectsHTTPError(t *testing.T) {
 		t.Errorf("expected Mistral embeddings API error, got %v", err)
 	}
 }
+
+// --- structured-content (magistral reasoning) tests ---
+
+// Regression net: the existing string-content path stays green for every
+// non-reasoning Mistral model.
+func TestMistralChatHandlesStringContent(t *testing.T) {
+	srv := newMistralServer(t, "/chat/completions", func(t *testing.T, _ map[string]interface{}, w http.ResponseWriter) {
+		_ = json.NewEncoder(w).Encode(map[string]interface{}{
+			"choices": []map[string]interface{}{{"message": map[string]interface{}{
+				"role":    "assistant",
+				"content": "Pong.",
+			}}},
+		})
+	})
+	defer srv.Close()
+
+	m := newMistralForTest(srv.URL)
+	apiKey := "test-key"
+	resp, err := m.ChatWithMessages("ministral-3b-latest",
+		[]Message{{Role: "user", Content: "ping"}},
+		&APIConfig{ApiKey: &apiKey}, nil)
+	if err != nil {
+		t.Fatalf("Chat: %v", err)
+	}
+	if *resp.Answer != "Pong." {
+		t.Errorf("Answer=%q want %q", *resp.Answer, "Pong.")
+	}
+	if *resp.ReasonContent != "" {
+		t.Errorf("ReasonContent=%q want empty", *resp.ReasonContent)
+	}
+}
+
+// New path: magistral with a non-trivial answer returns a structured
+// content array. Two part types — `thinking` and `text` — must be
+// concatenated into ReasonContent and Answer respectively.
+//
+// The fixture body is a trimmed copy of the actual response captured
+// from api.mistral.ai/v1/chat/completions against magistral-medium-latest
+// with the prompt "When do two trains meet?".
+func TestMistralChatExtractsReasoningFromStructuredContent(t *testing.T) {
+	srv := newMistralServer(t, "/chat/completions", func(t *testing.T, _ map[string]interface{}, w http.ResponseWriter) {
+		_ = json.NewEncoder(w).Encode(map[string]interface{}{
+			"choices": []map[string]interface{}{{"message": map[string]interface{}{
+				"role": "assistant",
+				"content": []map[string]interface{}{
+					{
+						"type": "thinking",
+						"thinking": []map[string]interface{}{
+							{"type": "text", "text": "Combined speed is 150 mph. "},
+							{"type": "text", "text": "300 / 150 = 2 hours."},
+						},
+						"closed": true,
+					},
+					{"type": "text", "text": "They will meet after **2 hours**."},
+				},
+			}}},
+		})
+	})
+	defer srv.Close()
+
+	m := newMistralForTest(srv.URL)
+	apiKey := "test-key"
+	resp, err := m.ChatWithMessages("magistral-medium-latest",
+		[]Message{{Role: "user", Content: "When do they meet?"}},
+		&APIConfig{ApiKey: &apiKey}, nil)
+	if err != nil {
+		t.Fatalf("Chat: %v", err)
+	}
+	wantAnswer := "They will meet after **2 hours**."
+	wantReason := "Combined speed is 150 mph. 300 / 150 = 2 hours."
+	if *resp.Answer != wantAnswer {
+		t.Errorf("Answer=%q want %q", *resp.Answer, wantAnswer)
+	}
+	if *resp.ReasonContent != wantReason {
+		t.Errorf("ReasonContent=%q want %q", *resp.ReasonContent, wantReason)
+	}
+}
+
+// magistral with a trivial answer that needed no reasoning returns the
+// structured shape with only a `text` part. ReasonContent must be empty.
+func TestMistralChatHandlesStructuredContentWithoutThinking(t *testing.T) {
+	srv := newMistralServer(t, "/chat/completions", func(t *testing.T, _ map[string]interface{}, w http.ResponseWriter) {
+		_ = json.NewEncoder(w).Encode(map[string]interface{}{
+			"choices": []map[string]interface{}{{"message": map[string]interface{}{
+				"role": "assistant",
+				"content": []map[string]interface{}{
+					{"type": "text", "text": "12"},
+				},
+			}}},
+		})
+	})
+	defer srv.Close()
+
+	m := newMistralForTest(srv.URL)
+	apiKey := "test-key"
+	resp, err := m.ChatWithMessages("magistral-small-latest",
+		[]Message{{Role: "user", Content: "15% of 80?"}},
+		&APIConfig{ApiKey: &apiKey}, nil)
+	if err != nil {
+		t.Fatalf("Chat: %v", err)
+	}
+	if *resp.Answer != "12" {
+		t.Errorf("Answer=%q want %q", *resp.Answer, "12")
+	}
+	if *resp.ReasonContent != "" {
+		t.Errorf("ReasonContent=%q want empty (no thinking part)", *resp.ReasonContent)
+	}
+}
+
+// Unknown part types must be skipped, not crash the driver. This makes
+// the parser forward-compatible with new Mistral content variants
+// (audio chunks, citations, etc.) that ragflow doesn't surface yet.
+func TestMistralChatIgnoresUnknownContentPartTypes(t *testing.T) {
+	srv := newMistralServer(t, "/chat/completions", func(t *testing.T, _ map[string]interface{}, w http.ResponseWriter) {
+		_ = json.NewEncoder(w).Encode(map[string]interface{}{
+			"choices": []map[string]interface{}{{"message": map[string]interface{}{
+				"role": "assistant",
+				"content": []map[string]interface{}{
+					{"type": "audio_url", "audio_url": "ignored://x"},
+					{"type": "text", "text": "Hello"},
+					{"type": "future_part_type", "blob": "?"},
+				},
+			}}},
+		})
+	})
+	defer srv.Close()
+
+	m := newMistralForTest(srv.URL)
+	apiKey := "test-key"
+	resp, err := m.ChatWithMessages("magistral-small-latest",
+		[]Message{{Role: "user", Content: "x"}},
+		&APIConfig{ApiKey: &apiKey}, nil)
+	if err != nil {
+		t.Fatalf("Chat: %v", err)
+	}
+	if *resp.Answer != "Hello" {
+		t.Errorf("Answer=%q want %q", *resp.Answer, "Hello")
+	}
+}
+
+// Direct unit coverage of the helper, including the nil and bad-type
+// edge cases that won't surface in the integration tests above.
+func TestExtractMistralContent(t *testing.T) {
+	tests := []struct {
+		name       string
+		input      interface{}
+		wantAns    string
+		wantReason string
+		wantErr    bool
+	}{
+		{"plain string", "hi", "hi", "", false},
+		{"empty string", "", "", "", false},
+		{"nil", nil, "", "", false},
+		{"empty array", []interface{}{}, "", "", false},
+		{
+			"text only",
+			[]interface{}{
+				map[string]interface{}{"type": "text", "text": "a"},
+				map[string]interface{}{"type": "text", "text": "b"},
+			},
+			"ab", "", false,
+		},
+		{
+			"thinking then text",
+			[]interface{}{
+				map[string]interface{}{
+					"type": "thinking",
+					"thinking": []interface{}{
+						map[string]interface{}{"type": "text", "text": "why "},
+						map[string]interface{}{"type": "text", "text": "this"},
+					},
+				},
+				map[string]interface{}{"type": "text", "text": "answer"},
+			},
+			"answer", "why this", false,
+		},
+		{"unknown root type", 42, "", "", true},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			ans, reason, err := extractMistralContent(tc.input)
+			if tc.wantErr {
+				if err == nil {
+					t.Errorf("want error, got nil")
+				}
+				return
+			}
+			if err != nil {
+				t.Errorf("unexpected err: %v", err)
+			}
+			if ans != tc.wantAns {
+				t.Errorf("answer=%q want %q", ans, tc.wantAns)
+			}
+			if reason != tc.wantReason {
+				t.Errorf("reason=%q want %q", reason, tc.wantReason)
+			}
+		})
+	}
+}