Files
ragflow/internal/agent/canvas/run_tracker.go
Zhichang Yu 3fa15c0e2f feat(agent): Go port — canvas engine, 22 components, DSL v2, 13 endpoints (#15952)
Ports the agent canvas subsystem from Python to Go.

## What's included

### Canvas Engine (Phase 0/1)
- State engine, scheduler, variable resolver, Redis checkpoint store,
cancel protocol
- **209 tests** across canvas / component / io packages

### 22 Components (P0–P4)
| Tier | Components |
|---|---|
| P0 T1+T2+T3 | LLM, Agent, ExitLoop, Switch, Categorize, Begin,
Message, Invoke |
| P1 T3 | VariableAggregator, VariableAssigner, StringTransform,
ListOperations, DataOperations |
| P2 T3 | Iteration, IterationItem, Loop, LoopItem |
| P3 T3 | UserFillUp, Fillup |
| P4 T5 | Browser, ExcelProcessor, DocsGenerator |

### DSL v2 Schema (Phase 2.5)
- Typed v2 in-memory model with v1-to-v2 auto-detect converter
- v1 legacy field stripping per plan §2.11.7

### HTTP Endpoints & Bug Fixes (Plans PR1–PR3)
- **DELETE SQL bug fix**: gorm v2 `Where("id = ?", id).Delete(...)`
pattern
- **CreateAgent validation**: title/DSL required, duplicate check, 103
envelope
- **13 new endpoints**: templates, prompts, tags, sessions CRUD,
chat/completions (SSE + non-stream stubs), rerun, test_db_connection,
logs, webhook/logs
- **756 Go unit tests** (745 → 756, +18)
- **17 → 0 Python integration test failures** (test_agents.py +
test_session_management/)

### Tools
21 eino tools: HTTPHelper, search tools, financial/data tools, mandatory
stubs

### Infrastructure
OTel observability, NATS message queue, DeepDoc gRPC client, SSRF
guards, IDOR mitigation
2026-06-12 22:58:28 +08:00

152 lines
5.3 KiB
Go

//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// run_tracker.go persists canvas-run business metadata to a Redis Hash.
// See plan §2.6 (Key 2: "agent:run:{run_id}"). This is the *business*
// channel — checkpoint payload (eino bytes) lives in checkpoint_store.go.
//
// Status code mapping (stored as int under the "status" field):
//
// 0 = running, 1 = succeeded, 2 = failed, 3 = cancelled.
package canvas
import (
"context"
"errors"
"time"
"github.com/redis/go-redis/v9"
"ragflow/internal/cache"
)
// runKeyPrefix is the Redis Hash key namespace for run metadata.
// The full key is "agent:run:{run_id}".
const runKeyPrefix = "agent:run:"
// runStatus values for the "status" hash field.
const (
runStatusRunning = "0"
runStatusSucceeded = "1"
runStatusFailed = "2"
runStatusCancelled = "3"
)
func runKey(runID string) string { return runKeyPrefix + runID }
// RunTracker manages canvas-run metadata (canvas_id, status, checkpoint
// link, resume chain, ...) on a Redis Hash. Operations are explicit — the
// eino CheckPointStore does NOT write these fields, so callers (HTTP
// handler, cancel watcher) must invoke Start/Mark* at the right points.
type RunTracker struct {
client *redis.Client
ttl time.Duration
}
// NewRunTracker returns a tracker wired to the global Redis client. When
// the cache is uninitialized, client is nil; methods error in that case
// rather than panicking, and tests can inject a client via struct-literal
// construction.
func NewRunTracker(ttl time.Duration) *RunTracker {
var client *redis.Client
if rc := cache.Get(); rc != nil {
client = rc.GetClient()
}
return &RunTracker{client: client, ttl: ttl}
}
// Start records a new run as in-progress. canvasID and tenantID identify
// the source DSL and tenant; parentRunID may be empty for fresh runs and
// carries the source run-id for resume chains (R1 in plan §2.6).
//
// The HSet + Expire are sent through a pipeline so a TTL is set on the
// first write — without that, the key would have no expiry and a crashed
// run would leak the hash.
func (t *RunTracker) Start(ctx context.Context, runID, canvasID, tenantID, parentRunID string) error {
if t == nil || t.client == nil {
return errors.New("run tracker: redis client not initialized")
}
now := time.Now().UnixMilli()
key := runKey(runID)
pipe := t.client.Pipeline()
pipe.HSet(ctx, key, map[string]any{
"canvas_id": canvasID,
"tenant_id": tenantID,
"parent_run_id": parentRunID,
"status": runStatusRunning,
"cancel_requested": 0,
"started_at": now,
})
pipe.Expire(ctx, key, t.ttl)
_, err := pipe.Exec(ctx)
return err
}
// AttachCheckpoint writes the latest checkpoint id for this run. It is the
// ONLY writer of the "checkpoint_id" field; every W1/W2/W3/W4 path (plan
// §2.6) must call this once before the run goroutine returns.
func (t *RunTracker) AttachCheckpoint(ctx context.Context, runID, checkpointID string) error {
if t == nil || t.client == nil {
return errors.New("run tracker: redis client not initialized")
}
return t.client.HSet(ctx, runKey(runID), "checkpoint_id", checkpointID).Err()
}
// MarkSucceeded transitions the run to status=1 and stamps finished_at.
func (t *RunTracker) MarkSucceeded(ctx context.Context, runID string) error {
if t == nil || t.client == nil {
return errors.New("run tracker: redis client not initialized")
}
return t.client.HSet(ctx, runKey(runID),
"status", runStatusSucceeded,
"finished_at", time.Now().UnixMilli(),
).Err()
}
// MarkFailed transitions the run to status=2 and records the reason.
func (t *RunTracker) MarkFailed(ctx context.Context, runID, reason string) error {
if t == nil || t.client == nil {
return errors.New("run tracker: redis client not initialized")
}
return t.client.HSet(ctx, runKey(runID),
"status", runStatusFailed,
"finished_at", time.Now().UnixMilli(),
"failure_reason", reason,
).Err()
}
// MarkCancelled transitions the run to status=3 and sets the cancel flag.
func (t *RunTracker) MarkCancelled(ctx context.Context, runID string) error {
if t == nil || t.client == nil {
return errors.New("run tracker: redis client not initialized")
}
return t.client.HSet(ctx, runKey(runID),
"status", runStatusCancelled,
"finished_at", time.Now().UnixMilli(),
"cancel_requested", 1,
).Err()
}
// Get returns all hash fields for a run. The empty map (not nil) plus a
// nil error means "no such run" — callers can detect this with len(map)==0
// if they need to distinguish from a key that exists with no fields.
func (t *RunTracker) Get(ctx context.Context, runID string) (map[string]string, error) {
if t == nil || t.client == nil {
return nil, errors.New("run tracker: redis client not initialized")
}
return t.client.HGetAll(ctx, runKey(runID)).Result()
}