Files
ragflow/internal/engine/nats/nats.go
Jin Hai e96bc37d06 Go: use NATS as the message queue (#15327)
### What problem does this PR solve?

```
RAGFlow(admin)> mq publish 'msg2';
SUCCESS
RAGFlow(admin)> mq publish 'msg3';
SUCCESS
RAGFlow(admin)> mq list;
+---------+---------------+
| message | subject       |
+---------+---------------+
| msg1    | tasks.RAGFLOW |
| msg2    | tasks.RAGFLOW |
| msg3    | tasks.RAGFLOW |
+---------+---------------+
RAGFlow(admin)> mq pull 2;
+---------+---------------+
| message | subject       |
+---------+---------------+
| msg1    | tasks.RAGFLOW |
| msg2    | tasks.RAGFLOW |
+---------+---------------+
RAGFlow(admin)> mq pull noack;
+---------+---------------+
| message | subject       |
+---------+---------------+
| abc     | tasks.RAGFLOW |
+---------+---------------+
RAGFlow(admin)> mq show
+-------------------+----------------+--------+---------------+---------------+-------------------+---------------+
| ack_pending_count | consumer_count | memory | message_count | pending_count | redelivered_count | waiting_count |
+-------------------+----------------+--------+---------------+---------------+-------------------+---------------+
| 2                 | 1              | 0      | 2             | 0             | 1                 | 0             |
+-------------------+----------------+--------+---------------+---------------+-------------------+---------------+

RAGFlow(admin)> list ingestors;
+--------------+-------------------------------------------+--------+
| host         | name                                      | status |
+--------------+-------------------------------------------+--------+
| 192.168.1.38 | ingestor-8f0e4bd5650a4ac58b0151969fbf6935 | alive  |
+--------------+-------------------------------------------+--------+

RAGFlow(admin)> list ingestion tasks;
+----------------------------------+----------------------------------+-----------+------+-------------+----------------------------------+
| document_id                      | id                               | status    | step | user        | user_id                          |
+----------------------------------+----------------------------------+-----------+------+-------------+----------------------------------+
| ffe64fae423411f1a2d938a74640adcc | 90d3d0f6528941c1ac8eb0360effccc4 | COMPLETED | 5    | aaa@aaa.com | 2ba4881420fa11f19e9c38a74640adcc |
+----------------------------------+----------------------------------+-----------+------+-------------+----------------------------------+

RAGFlow(admin)> remove ingestion tasks '90d3d0f6528941c1ac8eb0360effccc4';
+---------+----------------------------------+
| delete  | task_id                          |
+---------+----------------------------------+
| success | 90d3d0f6528941c1ac8eb0360effccc4 |
+---------+----------------------------------+

RAGFlow(admin)> stop ingestion tasks 'e89e20d9a25848a1b79bd9345ddbfe1d';
+----------+----------------------------------+
| status   | task_id                          |
+----------+----------------------------------+
| STOPPING | e89e20d9a25848a1b79bd9345ddbfe1d |
+----------+----------------------------------+

# Publish a message
RAGFlow(admin)> mq publish 'cdd';
SUCCESS

# List current tasks in the message queue
RAGFlow(admin)> mq list
+----------------------------------+---------------+
| message                          | subject       |
+----------------------------------+---------------+
| 7ce392a3c1624cd2be4b5276e8825059 | tasks.RAGFLOW |
+----------------------------------+---------------+

# Consume a task from the message queue
RAGFlow(admin)> mq pull
+------+-----+----------------+
| ack  | id  | type           |
+------+-----+----------------+
| true | cdd | ingestion_test |
+------+-----+----------------+

# User mode
# List ingestion tasks, followed by dataset id
RAGFlow(user)> list ingestion tasks from '0abe79f9423311f1ad8d38a74640adcc';
+---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+
| create_date               | create_time   | dataset_id                       | document_id                      | id                               | schema | status    | update_date               | update_time   | user_id                          |
+---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+
| 2026-05-30T20:21:06+08:00 | 1780143666289 | 0abe79f9423311f1ad8d38a74640adcc | ffe64fae423411f1a2d938a74640adcc | 8d758cd14a8b4ba8ab505003fb52017d |        | COMPLETED | 2026-05-30T20:21:26+08:00 | 1780143686431 | 2ba4881420fa11f19e9c38a74640adcc |
+---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+

RAGFlow(user)> list ingestion tasks;
+---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+
| create_date               | create_time   | dataset_id                       | document_id                      | id                               | schema | status    | update_date               | update_time   | user_id                          |
+---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+
| 2026-06-02T19:02:31+08:00 | 1780398151417 | 0abe79f9423311f1ad8d38a74640adcc | ffe64fae423411f1a2d938a74640adcc | e89e20d9a25848a1b79bd9345ddbfe1d |        | COMPLETED | 2026-06-02T19:02:52+08:00 | 1780398172208 | 2ba4881420fa11f19e9c38a74640adcc |
+---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+

# Create an ingestion task
# First argument is document id, second argument is dataset id
RAGFlow(user)> start ingestion 'ffe64fae423411f1a2d938a74640adcc' from '0abe79f9423311f1ad8d38a74640adcc';
+----------------------------------+-------------------------------------------+
| document_id                      | result                                    |
+----------------------------------+-------------------------------------------+
| ffe64fae423411f1a2d938a74640adcc | task_id: 8d758cd14a8b4ba8ab505003fb52017d |
+----------------------------------+-------------------------------------------+

# Pause an ingestion task, first argument is ingestion id
RAGFlow(user)> stop ingestion '8d758cd14a8b4ba8ab505003fb52017d';
+---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+
| create_date               | create_time   | dataset_id                       | document_id                      | id                               | schema | status    | update_date               | update_time   | user_id                          |
+---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+
| 2026-05-30T20:21:06+08:00 | 1780143666289 | 0abe79f9423311f1ad8d38a74640adcc | ffe64fae423411f1a2d938a74640adcc | 8d758cd14a8b4ba8ab505003fb52017d |        | COMPLETED | 2026-05-30T20:21:26+08:00 | 1780143686431 | 2ba4881420fa11f19e9c38a74640adcc |
+---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+

# Delete an ingestion task
RAGFlow(api/default)> remove ingestion tasks 'f366450a27d54677aec1c7090add30f0';
+---------+----------------------------------+
| remove  | task_id                          |
+---------+----------------------------------+
| success | f366450a27d54677aec1c7090add30f0 |
+---------+----------------------------------+

```

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2026-06-12 14:56:44 +08:00

247 lines
6.8 KiB
Go

//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package nats
import (
"context"
"encoding/json"
"errors"
"fmt"
"ragflow/internal/common"
"strconv"
"strings"
"time"
"github.com/nats-io/nats.go"
"github.com/nats-io/nats.go/jetstream"
)
type NatsEngine struct {
host string
port int
nc *nats.Conn
jetStream jetstream.JetStream
stream jetstream.Stream
consumer jetstream.Consumer
}
func NewNatsEngine(host string, port int) *NatsEngine {
return &NatsEngine{
host: host,
port: port,
}
}
func (n *NatsEngine) Init() error {
var err error
n.nc, err = nats.Connect(nats.DefaultURL)
if err != nil {
return fmt.Errorf("failed to connect to NATS: %w", err)
}
n.jetStream, err = jetstream.New(n.nc)
if err != nil {
n.nc.Close()
return fmt.Errorf("failed to create JetStream context: %w", err)
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
streamCfg := jetstream.StreamConfig{
Name: "RAGFLOW_TASKS",
Subjects: []string{"tasks.>"},
Retention: jetstream.WorkQueuePolicy,
Storage: jetstream.FileStorage,
MaxMsgs: 1024 * 128,
MaxBytes: 1024 * 1024,
}
n.stream, err = n.jetStream.CreateStream(ctx, streamCfg)
if err != nil {
if err.Error() != "stream already exists" {
n.nc.Close()
return fmt.Errorf("fail to create stream: %w", err)
}
common.Info("NATS stream already exists, use existing stream")
n.stream, err = n.jetStream.Stream(ctx, "RAGFLOW_TASKS")
if err != nil {
n.nc.Close()
return fmt.Errorf("fail to get existing stream: %w", err)
}
} else {
common.Info("NATS stream create successfully")
}
return nil
}
func (n *NatsEngine) PublishTask(subject string, payload []byte) error {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
ack, err := n.jetStream.Publish(ctx, subject, payload)
if err != nil {
return err
}
common.Info(fmt.Sprintf("Task published, stream seq: %d", ack.Sequence))
return nil
}
func (n *NatsEngine) ShowMessageQueue() (map[string]string, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
accountInfo, err := n.jetStream.AccountInfo(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get account info: %w", err)
}
result := make(map[string]string)
result["consumer_count"] = strconv.Itoa(accountInfo.Consumers)
result["memory"] = strconv.FormatUint(accountInfo.Memory, 10)
subjectFilter := "tasks.>"
info, err := n.stream.Info(ctx, jetstream.WithSubjectFilter(subjectFilter))
if err != nil {
return nil, fmt.Errorf("failed to get stream info: %w", err)
}
result["message_count"] = strconv.FormatUint(info.State.Msgs, 10)
consumer, err := n.stream.Consumer(ctx, "RAGFLOW_CONSUMER")
if err != nil {
return nil, fmt.Errorf("failed to get existing consumer: %w", err)
}
consumerInfo, err := consumer.Info(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get consumer info: %w", err)
}
result["pending_count"] = strconv.FormatUint(consumerInfo.NumPending, 10)
result["waiting_count"] = strconv.Itoa(consumerInfo.NumWaiting)
result["ack_pending_count"] = strconv.Itoa(consumerInfo.NumAckPending)
result["redelivered_count"] = strconv.Itoa(consumerInfo.NumRedelivered)
return result, nil
}
func (n *NatsEngine) ListMessages(messageType string, pending bool) ([]map[string]string, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if n.stream == nil {
return nil, fmt.Errorf("NATS stream not initialized")
}
subjectFilter := "tasks.>"
info, err := n.stream.Info(ctx, jetstream.WithSubjectFilter(subjectFilter))
if err != nil {
return nil, fmt.Errorf("failed to get stream info: %w", err)
}
if info.State.Msgs == 0 {
return nil, nil
}
var messages []map[string]string
seq := info.State.FirstSeq
lastSeq := info.State.LastSeq
for seq <= lastSeq {
var msg *jetstream.RawStreamMsg
msg, err = n.stream.GetMsg(ctx, seq, jetstream.WithGetMsgSubject(subjectFilter))
if err != nil {
if errors.Is(err, jetstream.ErrMsgNotFound) {
break
}
return nil, fmt.Errorf("failed to get message at seq %d: %w", seq, err)
}
messageMap := make(map[string]string)
messageMap["subject"] = msg.Subject
messageMap["message"] = string(msg.Data)
messages = append(messages, messageMap)
seq = msg.Sequence + 1
}
common.Info(fmt.Sprintf("Listed %d messages for subject: %s", len(messages), subjectFilter))
return messages, nil
}
func (n *NatsEngine) InitConsumer(subject string) error {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
var err error
n.consumer, err = n.stream.CreateOrUpdateConsumer(ctx, jetstream.ConsumerConfig{
Name: "RAGFLOW_CONSUMER",
AckPolicy: jetstream.AckExplicitPolicy,
MaxDeliver: 16,
MaxAckPending: 1024 * 128,
FilterSubject: "tasks.>",
})
if err != nil {
// MaxAckPending is immutable after consumer creation.
// If the consumer already exists, fall back to fetching it.
if strings.Contains(err.Error(), "max waiting can not be updated") {
n.consumer, err = n.stream.Consumer(ctx, "RAGFLOW_CONSUMER")
if err != nil {
return fmt.Errorf("failed to get existing consumer: %w", err)
}
} else {
return fmt.Errorf("failed to create Consumer: %w", err)
}
}
return nil
}
func (n *NatsEngine) GetMessages(messageCount int) ([]common.TaskHandle, error) {
resultMessages := make([]common.TaskHandle, 0)
messages, err := n.consumer.Fetch(messageCount, jetstream.FetchMaxWait(1*time.Second))
if err != nil {
return nil, fmt.Errorf("failed to fetch messages: %w", err)
}
for msg := range messages.Messages() {
resultMessages = append(resultMessages, NewNatsMessageHandle(msg))
}
return resultMessages, nil
}
type NatsMessageHandle struct {
message jetstream.Msg
}
func NewNatsMessageHandle(message jetstream.Msg) *NatsMessageHandle {
return &NatsMessageHandle{
message: message,
}
}
func (m *NatsMessageHandle) GetMessage() common.TaskMessage {
// convert to task message
var taskMessage common.TaskMessage
if err := json.Unmarshal(m.message.Data(), &taskMessage); err != nil {
common.Error("failed to unmarshal message", err)
}
return taskMessage
}
func (m *NatsMessageHandle) Ack() error {
return m.message.Ack()
}
func (m *NatsMessageHandle) Nack() error {
return m.message.Nak()
}