mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
### What problem does this PR solve? ``` RAGFlow(admin)> mq publish 'msg2'; SUCCESS RAGFlow(admin)> mq publish 'msg3'; SUCCESS RAGFlow(admin)> mq list; +---------+---------------+ | message | subject | +---------+---------------+ | msg1 | tasks.RAGFLOW | | msg2 | tasks.RAGFLOW | | msg3 | tasks.RAGFLOW | +---------+---------------+ RAGFlow(admin)> mq pull 2; +---------+---------------+ | message | subject | +---------+---------------+ | msg1 | tasks.RAGFLOW | | msg2 | tasks.RAGFLOW | +---------+---------------+ RAGFlow(admin)> mq pull noack; +---------+---------------+ | message | subject | +---------+---------------+ | abc | tasks.RAGFLOW | +---------+---------------+ RAGFlow(admin)> mq show +-------------------+----------------+--------+---------------+---------------+-------------------+---------------+ | ack_pending_count | consumer_count | memory | message_count | pending_count | redelivered_count | waiting_count | +-------------------+----------------+--------+---------------+---------------+-------------------+---------------+ | 2 | 1 | 0 | 2 | 0 | 1 | 0 | +-------------------+----------------+--------+---------------+---------------+-------------------+---------------+ RAGFlow(admin)> list ingestors; +--------------+-------------------------------------------+--------+ | host | name | status | +--------------+-------------------------------------------+--------+ | 192.168.1.38 | ingestor-8f0e4bd5650a4ac58b0151969fbf6935 | alive | +--------------+-------------------------------------------+--------+ RAGFlow(admin)> list ingestion tasks; +----------------------------------+----------------------------------+-----------+------+-------------+----------------------------------+ | document_id | id | status | step | user | user_id | +----------------------------------+----------------------------------+-----------+------+-------------+----------------------------------+ | ffe64fae423411f1a2d938a74640adcc | 90d3d0f6528941c1ac8eb0360effccc4 | COMPLETED | 5 | aaa@aaa.com | 2ba4881420fa11f19e9c38a74640adcc | +----------------------------------+----------------------------------+-----------+------+-------------+----------------------------------+ RAGFlow(admin)> remove ingestion tasks '90d3d0f6528941c1ac8eb0360effccc4'; +---------+----------------------------------+ | delete | task_id | +---------+----------------------------------+ | success | 90d3d0f6528941c1ac8eb0360effccc4 | +---------+----------------------------------+ RAGFlow(admin)> stop ingestion tasks 'e89e20d9a25848a1b79bd9345ddbfe1d'; +----------+----------------------------------+ | status | task_id | +----------+----------------------------------+ | STOPPING | e89e20d9a25848a1b79bd9345ddbfe1d | +----------+----------------------------------+ # Publish a message RAGFlow(admin)> mq publish 'cdd'; SUCCESS # List current tasks in the message queue RAGFlow(admin)> mq list +----------------------------------+---------------+ | message | subject | +----------------------------------+---------------+ | 7ce392a3c1624cd2be4b5276e8825059 | tasks.RAGFLOW | +----------------------------------+---------------+ # Consume a task from the message queue RAGFlow(admin)> mq pull +------+-----+----------------+ | ack | id | type | +------+-----+----------------+ | true | cdd | ingestion_test | +------+-----+----------------+ # User mode # List ingestion tasks, followed by dataset id RAGFlow(user)> list ingestion tasks from '0abe79f9423311f1ad8d38a74640adcc'; +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | create_date | create_time | dataset_id | document_id | id | schema | status | update_date | update_time | user_id | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | 2026-05-30T20:21:06+08:00 | 1780143666289 | 0abe79f9423311f1ad8d38a74640adcc | ffe64fae423411f1a2d938a74640adcc | 8d758cd14a8b4ba8ab505003fb52017d | | COMPLETED | 2026-05-30T20:21:26+08:00 | 1780143686431 | 2ba4881420fa11f19e9c38a74640adcc | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ RAGFlow(user)> list ingestion tasks; +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | create_date | create_time | dataset_id | document_id | id | schema | status | update_date | update_time | user_id | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | 2026-06-02T19:02:31+08:00 | 1780398151417 | 0abe79f9423311f1ad8d38a74640adcc | ffe64fae423411f1a2d938a74640adcc | e89e20d9a25848a1b79bd9345ddbfe1d | | COMPLETED | 2026-06-02T19:02:52+08:00 | 1780398172208 | 2ba4881420fa11f19e9c38a74640adcc | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ # Create an ingestion task # First argument is document id, second argument is dataset id RAGFlow(user)> start ingestion 'ffe64fae423411f1a2d938a74640adcc' from '0abe79f9423311f1ad8d38a74640adcc'; +----------------------------------+-------------------------------------------+ | document_id | result | +----------------------------------+-------------------------------------------+ | ffe64fae423411f1a2d938a74640adcc | task_id: 8d758cd14a8b4ba8ab505003fb52017d | +----------------------------------+-------------------------------------------+ # Pause an ingestion task, first argument is ingestion id RAGFlow(user)> stop ingestion '8d758cd14a8b4ba8ab505003fb52017d'; +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | create_date | create_time | dataset_id | document_id | id | schema | status | update_date | update_time | user_id | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | 2026-05-30T20:21:06+08:00 | 1780143666289 | 0abe79f9423311f1ad8d38a74640adcc | ffe64fae423411f1a2d938a74640adcc | 8d758cd14a8b4ba8ab505003fb52017d | | COMPLETED | 2026-05-30T20:21:26+08:00 | 1780143686431 | 2ba4881420fa11f19e9c38a74640adcc | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ # Delete an ingestion task RAGFlow(api/default)> remove ingestion tasks 'f366450a27d54677aec1c7090add30f0'; +---------+----------------------------------+ | remove | task_id | +---------+----------------------------------+ | success | f366450a27d54677aec1c7090add30f0 | +---------+----------------------------------+ ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
99 lines
4.7 KiB
Go
99 lines
4.7 KiB
Go
//
|
|
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
package engine
|
|
|
|
import (
|
|
"context"
|
|
"ragflow/internal/common"
|
|
"ragflow/internal/engine/types"
|
|
)
|
|
|
|
// EngineType document engine type
|
|
type EngineType string
|
|
|
|
const (
|
|
EngineElasticsearch EngineType = "elasticsearch"
|
|
EngineInfinity EngineType = "infinity"
|
|
)
|
|
|
|
// DocEngine document storage engine interface
|
|
type DocEngine interface {
|
|
// Chunk operations
|
|
CreateChunkStore(ctx context.Context, baseName, datasetID string, vectorSize int, parserID string) error
|
|
InsertChunks(ctx context.Context, chunks []map[string]interface{}, baseName string, datasetID string) ([]string, error)
|
|
UpdateChunks(ctx context.Context, condition map[string]interface{}, newValue map[string]interface{}, baseName string, datasetID string) error
|
|
DeleteChunks(ctx context.Context, condition map[string]interface{}, baseName string, datasetID string) (int64, error)
|
|
Search(ctx context.Context, req *types.SearchRequest) (*types.SearchResult, error)
|
|
GetChunk(ctx context.Context, baseName, chunkID string, datasetIDs []string) (interface{}, error)
|
|
DropChunkStore(ctx context.Context, baseName, datasetID string) error
|
|
ChunkStoreExists(ctx context.Context, baseName, datasetID string) (bool, error)
|
|
|
|
// Document metadata operations
|
|
CreateMetadataStore(ctx context.Context, tenantID string) error
|
|
InsertMetadata(ctx context.Context, metadata []map[string]interface{}, tenantID string) ([]string, error)
|
|
UpdateMetadata(ctx context.Context, docID string, datasetID string, metaFields map[string]interface{}, tenantID string) error
|
|
DeleteMetadata(ctx context.Context, condition map[string]interface{}, tenantID string) (int64, error)
|
|
DeleteMetadataKeys(ctx context.Context, docID string, datasetID string, keys []string, tenantID string) error
|
|
DropMetadataStore(ctx context.Context, tenantID string) error
|
|
MetadataStoreExists(ctx context.Context, tenantID string) (bool, error)
|
|
SearchMetadata(ctx context.Context, req *types.SearchMetadataRequest) (*types.SearchMetadataResult, error)
|
|
|
|
// Document operations (used by skill indexing)
|
|
IndexDocument(ctx context.Context, indexName, docID string, doc interface{}) error
|
|
DeleteDocument(ctx context.Context, indexName, docID string) error
|
|
BulkIndex(ctx context.Context, indexName string, docs []interface{}) (interface{}, error)
|
|
|
|
// Utility functions for search result processing
|
|
GetFields(chunks []map[string]interface{}, fields []string) map[string]map[string]interface{}
|
|
GetAggregation(chunks []map[string]interface{}, fieldName string) []map[string]interface{}
|
|
GetHighlight(chunks []map[string]interface{}, keywords []string, fieldName string) map[string]string
|
|
GetChunkIDs(chunks []map[string]interface{}) []string
|
|
KNNScores(ctx context.Context, chunks []map[string]interface{}, queryVector []float64, topK int) (map[string]interface{}, error)
|
|
GetScores(searchResult map[string]interface{}) map[string]float64
|
|
|
|
// Health check
|
|
Ping(ctx context.Context) error
|
|
Close() error
|
|
|
|
// GetType returns the engine type
|
|
GetType() string
|
|
|
|
// FilterDocIdsByMetaPushdown runs a metadata filter directly against
|
|
// the doc metadata index, returning matching doc IDs or nil if push-down
|
|
// is not supported (caller should fall back to in-memory filtering).
|
|
// conditions is a list of filter objects with keys: key, op, value
|
|
FilterDocIdsByMetaPushdown(ctx context.Context, kbIDs []string, conditions []map[string]interface{}, logic string) []string
|
|
}
|
|
|
|
// Type returns the engine type (helper method for runtime type checking)
|
|
// This is a workaround since we can't import elasticsearch or infinity packages directly
|
|
func Type(docEngine DocEngine) EngineType {
|
|
// Type checking through interface methods is not straightforward
|
|
// This is a placeholder that should be implemented differently
|
|
// or rely on configuration to know the type
|
|
return EngineType("unknown")
|
|
}
|
|
|
|
type MessageQueue interface {
|
|
Init() error
|
|
InitConsumer(subject string) error
|
|
PublishTask(subject string, payload []byte) error
|
|
GetMessages(messageCount int) ([]common.TaskHandle, error)
|
|
ListMessages(messageType string, pending bool) ([]map[string]string, error)
|
|
ShowMessageQueue() (map[string]string, error)
|
|
}
|