mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-04 18:45:38 +08:00
### What problem does this PR solve? ``` RAGFlow(admin)> mq publish 'msg2'; SUCCESS RAGFlow(admin)> mq publish 'msg3'; SUCCESS RAGFlow(admin)> mq list; +---------+---------------+ | message | subject | +---------+---------------+ | msg1 | tasks.RAGFLOW | | msg2 | tasks.RAGFLOW | | msg3 | tasks.RAGFLOW | +---------+---------------+ RAGFlow(admin)> mq pull 2; +---------+---------------+ | message | subject | +---------+---------------+ | msg1 | tasks.RAGFLOW | | msg2 | tasks.RAGFLOW | +---------+---------------+ RAGFlow(admin)> mq pull noack; +---------+---------------+ | message | subject | +---------+---------------+ | abc | tasks.RAGFLOW | +---------+---------------+ RAGFlow(admin)> mq show +-------------------+----------------+--------+---------------+---------------+-------------------+---------------+ | ack_pending_count | consumer_count | memory | message_count | pending_count | redelivered_count | waiting_count | +-------------------+----------------+--------+---------------+---------------+-------------------+---------------+ | 2 | 1 | 0 | 2 | 0 | 1 | 0 | +-------------------+----------------+--------+---------------+---------------+-------------------+---------------+ RAGFlow(admin)> list ingestors; +--------------+-------------------------------------------+--------+ | host | name | status | +--------------+-------------------------------------------+--------+ | 192.168.1.38 | ingestor-8f0e4bd5650a4ac58b0151969fbf6935 | alive | +--------------+-------------------------------------------+--------+ RAGFlow(admin)> list ingestion tasks; +----------------------------------+----------------------------------+-----------+------+-------------+----------------------------------+ | document_id | id | status | step | user | user_id | +----------------------------------+----------------------------------+-----------+------+-------------+----------------------------------+ | ffe64fae423411f1a2d938a74640adcc | 90d3d0f6528941c1ac8eb0360effccc4 | COMPLETED | 5 | aaa@aaa.com | 2ba4881420fa11f19e9c38a74640adcc | +----------------------------------+----------------------------------+-----------+------+-------------+----------------------------------+ RAGFlow(admin)> remove ingestion tasks '90d3d0f6528941c1ac8eb0360effccc4'; +---------+----------------------------------+ | delete | task_id | +---------+----------------------------------+ | success | 90d3d0f6528941c1ac8eb0360effccc4 | +---------+----------------------------------+ RAGFlow(admin)> stop ingestion tasks 'e89e20d9a25848a1b79bd9345ddbfe1d'; +----------+----------------------------------+ | status | task_id | +----------+----------------------------------+ | STOPPING | e89e20d9a25848a1b79bd9345ddbfe1d | +----------+----------------------------------+ # Publish a message RAGFlow(admin)> mq publish 'cdd'; SUCCESS # List current tasks in the message queue RAGFlow(admin)> mq list +----------------------------------+---------------+ | message | subject | +----------------------------------+---------------+ | 7ce392a3c1624cd2be4b5276e8825059 | tasks.RAGFLOW | +----------------------------------+---------------+ # Consume a task from the message queue RAGFlow(admin)> mq pull +------+-----+----------------+ | ack | id | type | +------+-----+----------------+ | true | cdd | ingestion_test | +------+-----+----------------+ # User mode # List ingestion tasks, followed by dataset id RAGFlow(user)> list ingestion tasks from '0abe79f9423311f1ad8d38a74640adcc'; +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | create_date | create_time | dataset_id | document_id | id | schema | status | update_date | update_time | user_id | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | 2026-05-30T20:21:06+08:00 | 1780143666289 | 0abe79f9423311f1ad8d38a74640adcc | ffe64fae423411f1a2d938a74640adcc | 8d758cd14a8b4ba8ab505003fb52017d | | COMPLETED | 2026-05-30T20:21:26+08:00 | 1780143686431 | 2ba4881420fa11f19e9c38a74640adcc | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ RAGFlow(user)> list ingestion tasks; +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | create_date | create_time | dataset_id | document_id | id | schema | status | update_date | update_time | user_id | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | 2026-06-02T19:02:31+08:00 | 1780398151417 | 0abe79f9423311f1ad8d38a74640adcc | ffe64fae423411f1a2d938a74640adcc | e89e20d9a25848a1b79bd9345ddbfe1d | | COMPLETED | 2026-06-02T19:02:52+08:00 | 1780398172208 | 2ba4881420fa11f19e9c38a74640adcc | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ # Create an ingestion task # First argument is document id, second argument is dataset id RAGFlow(user)> start ingestion 'ffe64fae423411f1a2d938a74640adcc' from '0abe79f9423311f1ad8d38a74640adcc'; +----------------------------------+-------------------------------------------+ | document_id | result | +----------------------------------+-------------------------------------------+ | ffe64fae423411f1a2d938a74640adcc | task_id: 8d758cd14a8b4ba8ab505003fb52017d | +----------------------------------+-------------------------------------------+ # Pause an ingestion task, first argument is ingestion id RAGFlow(user)> stop ingestion '8d758cd14a8b4ba8ab505003fb52017d'; +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | create_date | create_time | dataset_id | document_id | id | schema | status | update_date | update_time | user_id | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | 2026-05-30T20:21:06+08:00 | 1780143666289 | 0abe79f9423311f1ad8d38a74640adcc | ffe64fae423411f1a2d938a74640adcc | 8d758cd14a8b4ba8ab505003fb52017d | | COMPLETED | 2026-05-30T20:21:26+08:00 | 1780143686431 | 2ba4881420fa11f19e9c38a74640adcc | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ # Delete an ingestion task RAGFlow(api/default)> remove ingestion tasks 'f366450a27d54677aec1c7090add30f0'; +---------+----------------------------------+ | remove | task_id | +---------+----------------------------------+ | success | f366450a27d54677aec1c7090add30f0 | +---------+----------------------------------+ ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
247 lines
6.8 KiB
Go
247 lines
6.8 KiB
Go
//
|
|
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
package nats
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"ragflow/internal/common"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/nats-io/nats.go"
|
|
"github.com/nats-io/nats.go/jetstream"
|
|
)
|
|
|
|
type NatsEngine struct {
|
|
host string
|
|
port int
|
|
nc *nats.Conn
|
|
jetStream jetstream.JetStream
|
|
stream jetstream.Stream
|
|
consumer jetstream.Consumer
|
|
}
|
|
|
|
func NewNatsEngine(host string, port int) *NatsEngine {
|
|
return &NatsEngine{
|
|
host: host,
|
|
port: port,
|
|
}
|
|
}
|
|
|
|
func (n *NatsEngine) Init() error {
|
|
var err error
|
|
n.nc, err = nats.Connect(nats.DefaultURL)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to connect to NATS: %w", err)
|
|
}
|
|
|
|
n.jetStream, err = jetstream.New(n.nc)
|
|
if err != nil {
|
|
n.nc.Close()
|
|
return fmt.Errorf("failed to create JetStream context: %w", err)
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
defer cancel()
|
|
|
|
streamCfg := jetstream.StreamConfig{
|
|
Name: "RAGFLOW_TASKS",
|
|
Subjects: []string{"tasks.>"},
|
|
Retention: jetstream.WorkQueuePolicy,
|
|
Storage: jetstream.FileStorage,
|
|
MaxMsgs: 1024 * 128,
|
|
MaxBytes: 1024 * 1024,
|
|
}
|
|
|
|
n.stream, err = n.jetStream.CreateStream(ctx, streamCfg)
|
|
if err != nil {
|
|
if err.Error() != "stream already exists" {
|
|
n.nc.Close()
|
|
return fmt.Errorf("fail to create stream: %w", err)
|
|
}
|
|
|
|
common.Info("NATS stream already exists, use existing stream")
|
|
n.stream, err = n.jetStream.Stream(ctx, "RAGFLOW_TASKS")
|
|
if err != nil {
|
|
n.nc.Close()
|
|
return fmt.Errorf("fail to get existing stream: %w", err)
|
|
}
|
|
} else {
|
|
common.Info("NATS stream create successfully")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (n *NatsEngine) PublishTask(subject string, payload []byte) error {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
|
|
ack, err := n.jetStream.Publish(ctx, subject, payload)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
common.Info(fmt.Sprintf("Task published, stream seq: %d", ack.Sequence))
|
|
return nil
|
|
}
|
|
|
|
func (n *NatsEngine) ShowMessageQueue() (map[string]string, error) {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
defer cancel()
|
|
accountInfo, err := n.jetStream.AccountInfo(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get account info: %w", err)
|
|
}
|
|
result := make(map[string]string)
|
|
result["consumer_count"] = strconv.Itoa(accountInfo.Consumers)
|
|
result["memory"] = strconv.FormatUint(accountInfo.Memory, 10)
|
|
|
|
subjectFilter := "tasks.>"
|
|
info, err := n.stream.Info(ctx, jetstream.WithSubjectFilter(subjectFilter))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get stream info: %w", err)
|
|
}
|
|
result["message_count"] = strconv.FormatUint(info.State.Msgs, 10)
|
|
|
|
consumer, err := n.stream.Consumer(ctx, "RAGFLOW_CONSUMER")
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get existing consumer: %w", err)
|
|
}
|
|
|
|
consumerInfo, err := consumer.Info(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get consumer info: %w", err)
|
|
}
|
|
result["pending_count"] = strconv.FormatUint(consumerInfo.NumPending, 10)
|
|
result["waiting_count"] = strconv.Itoa(consumerInfo.NumWaiting)
|
|
result["ack_pending_count"] = strconv.Itoa(consumerInfo.NumAckPending)
|
|
result["redelivered_count"] = strconv.Itoa(consumerInfo.NumRedelivered)
|
|
return result, nil
|
|
}
|
|
|
|
func (n *NatsEngine) ListMessages(messageType string, pending bool) ([]map[string]string, error) {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
defer cancel()
|
|
|
|
if n.stream == nil {
|
|
return nil, fmt.Errorf("NATS stream not initialized")
|
|
}
|
|
|
|
subjectFilter := "tasks.>"
|
|
|
|
info, err := n.stream.Info(ctx, jetstream.WithSubjectFilter(subjectFilter))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get stream info: %w", err)
|
|
}
|
|
|
|
if info.State.Msgs == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
var messages []map[string]string
|
|
seq := info.State.FirstSeq
|
|
lastSeq := info.State.LastSeq
|
|
|
|
for seq <= lastSeq {
|
|
var msg *jetstream.RawStreamMsg
|
|
msg, err = n.stream.GetMsg(ctx, seq, jetstream.WithGetMsgSubject(subjectFilter))
|
|
if err != nil {
|
|
if errors.Is(err, jetstream.ErrMsgNotFound) {
|
|
break
|
|
}
|
|
return nil, fmt.Errorf("failed to get message at seq %d: %w", seq, err)
|
|
}
|
|
messageMap := make(map[string]string)
|
|
messageMap["subject"] = msg.Subject
|
|
messageMap["message"] = string(msg.Data)
|
|
messages = append(messages, messageMap)
|
|
seq = msg.Sequence + 1
|
|
}
|
|
|
|
common.Info(fmt.Sprintf("Listed %d messages for subject: %s", len(messages), subjectFilter))
|
|
return messages, nil
|
|
}
|
|
|
|
func (n *NatsEngine) InitConsumer(subject string) error {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
defer cancel()
|
|
|
|
var err error
|
|
n.consumer, err = n.stream.CreateOrUpdateConsumer(ctx, jetstream.ConsumerConfig{
|
|
Name: "RAGFLOW_CONSUMER",
|
|
AckPolicy: jetstream.AckExplicitPolicy,
|
|
MaxDeliver: 16,
|
|
MaxAckPending: 1024 * 128,
|
|
FilterSubject: "tasks.>",
|
|
})
|
|
if err != nil {
|
|
// MaxAckPending is immutable after consumer creation.
|
|
// If the consumer already exists, fall back to fetching it.
|
|
if strings.Contains(err.Error(), "max waiting can not be updated") {
|
|
n.consumer, err = n.stream.Consumer(ctx, "RAGFLOW_CONSUMER")
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get existing consumer: %w", err)
|
|
}
|
|
} else {
|
|
return fmt.Errorf("failed to create Consumer: %w", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
func (n *NatsEngine) GetMessages(messageCount int) ([]common.TaskHandle, error) {
|
|
resultMessages := make([]common.TaskHandle, 0)
|
|
messages, err := n.consumer.Fetch(messageCount, jetstream.FetchMaxWait(1*time.Second))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to fetch messages: %w", err)
|
|
}
|
|
for msg := range messages.Messages() {
|
|
resultMessages = append(resultMessages, NewNatsMessageHandle(msg))
|
|
}
|
|
return resultMessages, nil
|
|
}
|
|
|
|
type NatsMessageHandle struct {
|
|
message jetstream.Msg
|
|
}
|
|
|
|
func NewNatsMessageHandle(message jetstream.Msg) *NatsMessageHandle {
|
|
return &NatsMessageHandle{
|
|
message: message,
|
|
}
|
|
}
|
|
|
|
func (m *NatsMessageHandle) GetMessage() common.TaskMessage {
|
|
// convert to task message
|
|
var taskMessage common.TaskMessage
|
|
if err := json.Unmarshal(m.message.Data(), &taskMessage); err != nil {
|
|
common.Error("failed to unmarshal message", err)
|
|
}
|
|
return taskMessage
|
|
}
|
|
|
|
func (m *NatsMessageHandle) Ack() error {
|
|
return m.message.Ack()
|
|
}
|
|
|
|
func (m *NatsMessageHandle) Nack() error {
|
|
return m.message.Nak()
|
|
}
|