mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-03 01:01:56 +08:00
### What problem does this PR solve? ``` RAGFlow(admin)> mq publish 'msg2'; SUCCESS RAGFlow(admin)> mq publish 'msg3'; SUCCESS RAGFlow(admin)> mq list; +---------+---------------+ | message | subject | +---------+---------------+ | msg1 | tasks.RAGFLOW | | msg2 | tasks.RAGFLOW | | msg3 | tasks.RAGFLOW | +---------+---------------+ RAGFlow(admin)> mq pull 2; +---------+---------------+ | message | subject | +---------+---------------+ | msg1 | tasks.RAGFLOW | | msg2 | tasks.RAGFLOW | +---------+---------------+ RAGFlow(admin)> mq pull noack; +---------+---------------+ | message | subject | +---------+---------------+ | abc | tasks.RAGFLOW | +---------+---------------+ RAGFlow(admin)> mq show +-------------------+----------------+--------+---------------+---------------+-------------------+---------------+ | ack_pending_count | consumer_count | memory | message_count | pending_count | redelivered_count | waiting_count | +-------------------+----------------+--------+---------------+---------------+-------------------+---------------+ | 2 | 1 | 0 | 2 | 0 | 1 | 0 | +-------------------+----------------+--------+---------------+---------------+-------------------+---------------+ RAGFlow(admin)> list ingestors; +--------------+-------------------------------------------+--------+ | host | name | status | +--------------+-------------------------------------------+--------+ | 192.168.1.38 | ingestor-8f0e4bd5650a4ac58b0151969fbf6935 | alive | +--------------+-------------------------------------------+--------+ RAGFlow(admin)> list ingestion tasks; +----------------------------------+----------------------------------+-----------+------+-------------+----------------------------------+ | document_id | id | status | step | user | user_id | +----------------------------------+----------------------------------+-----------+------+-------------+----------------------------------+ | ffe64fae423411f1a2d938a74640adcc | 90d3d0f6528941c1ac8eb0360effccc4 | COMPLETED | 5 | aaa@aaa.com | 2ba4881420fa11f19e9c38a74640adcc | +----------------------------------+----------------------------------+-----------+------+-------------+----------------------------------+ RAGFlow(admin)> remove ingestion tasks '90d3d0f6528941c1ac8eb0360effccc4'; +---------+----------------------------------+ | delete | task_id | +---------+----------------------------------+ | success | 90d3d0f6528941c1ac8eb0360effccc4 | +---------+----------------------------------+ RAGFlow(admin)> stop ingestion tasks 'e89e20d9a25848a1b79bd9345ddbfe1d'; +----------+----------------------------------+ | status | task_id | +----------+----------------------------------+ | STOPPING | e89e20d9a25848a1b79bd9345ddbfe1d | +----------+----------------------------------+ # Publish a message RAGFlow(admin)> mq publish 'cdd'; SUCCESS # List current tasks in the message queue RAGFlow(admin)> mq list +----------------------------------+---------------+ | message | subject | +----------------------------------+---------------+ | 7ce392a3c1624cd2be4b5276e8825059 | tasks.RAGFLOW | +----------------------------------+---------------+ # Consume a task from the message queue RAGFlow(admin)> mq pull +------+-----+----------------+ | ack | id | type | +------+-----+----------------+ | true | cdd | ingestion_test | +------+-----+----------------+ # User mode # List ingestion tasks, followed by dataset id RAGFlow(user)> list ingestion tasks from '0abe79f9423311f1ad8d38a74640adcc'; +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | create_date | create_time | dataset_id | document_id | id | schema | status | update_date | update_time | user_id | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | 2026-05-30T20:21:06+08:00 | 1780143666289 | 0abe79f9423311f1ad8d38a74640adcc | ffe64fae423411f1a2d938a74640adcc | 8d758cd14a8b4ba8ab505003fb52017d | | COMPLETED | 2026-05-30T20:21:26+08:00 | 1780143686431 | 2ba4881420fa11f19e9c38a74640adcc | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ RAGFlow(user)> list ingestion tasks; +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | create_date | create_time | dataset_id | document_id | id | schema | status | update_date | update_time | user_id | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | 2026-06-02T19:02:31+08:00 | 1780398151417 | 0abe79f9423311f1ad8d38a74640adcc | ffe64fae423411f1a2d938a74640adcc | e89e20d9a25848a1b79bd9345ddbfe1d | | COMPLETED | 2026-06-02T19:02:52+08:00 | 1780398172208 | 2ba4881420fa11f19e9c38a74640adcc | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ # Create an ingestion task # First argument is document id, second argument is dataset id RAGFlow(user)> start ingestion 'ffe64fae423411f1a2d938a74640adcc' from '0abe79f9423311f1ad8d38a74640adcc'; +----------------------------------+-------------------------------------------+ | document_id | result | +----------------------------------+-------------------------------------------+ | ffe64fae423411f1a2d938a74640adcc | task_id: 8d758cd14a8b4ba8ab505003fb52017d | +----------------------------------+-------------------------------------------+ # Pause an ingestion task, first argument is ingestion id RAGFlow(user)> stop ingestion '8d758cd14a8b4ba8ab505003fb52017d'; +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | create_date | create_time | dataset_id | document_id | id | schema | status | update_date | update_time | user_id | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ | 2026-05-30T20:21:06+08:00 | 1780143666289 | 0abe79f9423311f1ad8d38a74640adcc | ffe64fae423411f1a2d938a74640adcc | 8d758cd14a8b4ba8ab505003fb52017d | | COMPLETED | 2026-05-30T20:21:26+08:00 | 1780143686431 | 2ba4881420fa11f19e9c38a74640adcc | +---------------------------+---------------+----------------------------------+----------------------------------+----------------------------------+--------+-----------+---------------------------+---------------+----------------------------------+ # Delete an ingestion task RAGFlow(api/default)> remove ingestion tasks 'f366450a27d54677aec1c7090add30f0'; +---------+----------------------------------+ | remove | task_id | +---------+----------------------------------+ | success | f366450a27d54677aec1c7090add30f0 | +---------+----------------------------------+ ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
229 lines
6.0 KiB
Go
229 lines
6.0 KiB
Go
//
|
|
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
package dao
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"ragflow/internal/common"
|
|
"ragflow/internal/entity"
|
|
"ragflow/internal/entity/models"
|
|
"strings"
|
|
"time"
|
|
|
|
"ragflow/internal/server"
|
|
|
|
"go.uber.org/zap"
|
|
gormLogger "gorm.io/gorm/logger"
|
|
|
|
"gorm.io/driver/mysql"
|
|
"gorm.io/gorm"
|
|
)
|
|
|
|
var DB *gorm.DB
|
|
var modelProviderManager *models.ProviderManager
|
|
|
|
// LLMFactoryConfig represents a single LLM factory configuration
|
|
type LLMFactoryConfig struct {
|
|
Name string `json:"name"`
|
|
Logo string `json:"logo"`
|
|
Tags string `json:"tags"`
|
|
Status string `json:"status"`
|
|
Rank string `json:"rank"`
|
|
LLM []LLMConfig `json:"llm"`
|
|
}
|
|
|
|
// LLMConfig represents a single LLM model configuration
|
|
type LLMConfig struct {
|
|
LLMName string `json:"llm_name"`
|
|
Tags string `json:"tags"`
|
|
MaxTokens int64 `json:"max_tokens"`
|
|
ModelType string `json:"model_type"`
|
|
IsTools bool `json:"is_tools"`
|
|
}
|
|
|
|
// LLMFactoriesFile represents the structure of llm_factories.json
|
|
type LLMFactoriesFile struct {
|
|
FactoryLLMInfos []LLMFactoryConfig `json:"factory_llm_infos"`
|
|
}
|
|
|
|
// InitDB initialize database connection
|
|
func InitDB() error {
|
|
cfg := server.GetConfig()
|
|
dbCfg := cfg.Database
|
|
|
|
dsn := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?charset=%s&parseTime=True&loc=Local",
|
|
dbCfg.Username,
|
|
dbCfg.Password,
|
|
dbCfg.Host,
|
|
dbCfg.Port,
|
|
dbCfg.Database,
|
|
dbCfg.Charset,
|
|
)
|
|
|
|
// Set log level
|
|
var gormLogLevel gormLogger.LogLevel
|
|
if cfg.Server.Mode == "debug" {
|
|
gormLogLevel = gormLogger.Info
|
|
} else {
|
|
gormLogLevel = gormLogger.Silent
|
|
}
|
|
|
|
// Connect to database
|
|
var err error
|
|
DB, err = gorm.Open(mysql.Open(dsn), &gorm.Config{
|
|
Logger: gormLogger.Default.LogMode(gormLogLevel),
|
|
NowFunc: func() time.Time {
|
|
return time.Now().Local()
|
|
},
|
|
TranslateError: true,
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to connect database: %w", err)
|
|
}
|
|
|
|
// Get general database object sql.DB
|
|
sqlDB, err := DB.DB()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get database instance: %w", err)
|
|
}
|
|
|
|
// Set connection pool
|
|
sqlDB.SetMaxIdleConns(10)
|
|
sqlDB.SetMaxOpenConns(100)
|
|
sqlDB.SetConnMaxLifetime(time.Hour)
|
|
|
|
// Auto migrate all dataModels
|
|
dataModels := []interface{}{
|
|
&entity.User{},
|
|
&entity.Tenant{},
|
|
&entity.UserTenant{},
|
|
&entity.File{},
|
|
&entity.File2Document{},
|
|
&entity.TenantLLM{},
|
|
&entity.Chat{},
|
|
&entity.ChatSession{},
|
|
&entity.Task{},
|
|
&entity.APIToken{},
|
|
&entity.API4Conversation{},
|
|
&entity.Knowledgebase{},
|
|
&entity.InvitationCode{},
|
|
&entity.Document{},
|
|
&entity.UserCanvas{},
|
|
&entity.CanvasTemplate{},
|
|
&entity.UserCanvasVersion{},
|
|
&entity.LLMFactories{},
|
|
&entity.LLM{},
|
|
&entity.TenantLangfuse{},
|
|
&entity.SystemSettings{},
|
|
&entity.Connector{},
|
|
&entity.Connector2Kb{},
|
|
&entity.SyncLogs{},
|
|
&entity.MCPServer{},
|
|
&entity.Memory{},
|
|
&entity.Search{},
|
|
&entity.PipelineOperationLog{},
|
|
&entity.EvaluationDataset{},
|
|
&entity.EvaluationCase{},
|
|
&entity.EvaluationRun{},
|
|
&entity.EvaluationResult{},
|
|
&entity.TimeRecord{},
|
|
&entity.License{},
|
|
&entity.SkillSearchConfig{},
|
|
&entity.TenantModelInstance{},
|
|
&entity.TenantModel{},
|
|
&entity.TenantModelGroupMapping{},
|
|
&entity.TenantModelProvider{},
|
|
&entity.TenantModelGroup{},
|
|
&entity.IngestionTask{},
|
|
&entity.IngestionTaskLog{},
|
|
&entity.IngestionTasklet{},
|
|
&entity.IngestionTaskletLog{},
|
|
}
|
|
|
|
for _, m := range dataModels {
|
|
if err = autoMigrateSafely(DB, m); err != nil {
|
|
return fmt.Errorf("failed to migrate model %T: %w", m, err)
|
|
}
|
|
}
|
|
|
|
// Run manual migrations for complex schema changes
|
|
if err = RunMigrations(DB); err != nil {
|
|
return fmt.Errorf("failed to run manual migrations: %w", err)
|
|
}
|
|
|
|
common.Info("Database connected and migrated successfully")
|
|
|
|
err = models.InitProviderManager("conf/models")
|
|
if err != nil {
|
|
log.Fatal("Failed to load model providers:", err)
|
|
}
|
|
|
|
modelProviderManager = models.GetProviderManager()
|
|
common.Info("Model providers loaded successfully")
|
|
|
|
return nil
|
|
}
|
|
|
|
// GetDB get database instance
|
|
func GetDB() *gorm.DB {
|
|
return DB
|
|
}
|
|
|
|
// GetModelProviderManager get database instance
|
|
func GetModelProviderManager() *models.ProviderManager {
|
|
return modelProviderManager
|
|
}
|
|
|
|
// autoMigrateSafely runs AutoMigrate and ignores duplicate index errors
|
|
// This handles cases where indexes already exist (e.g., created by Python backend)
|
|
func autoMigrateSafely(db *gorm.DB, model interface{}) error {
|
|
err := db.AutoMigrate(model)
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
|
|
// Check if error is MySQL duplicate index error (Error 1061)
|
|
errStr := err.Error()
|
|
if strings.Contains(errStr, "Error 1061") && strings.Contains(errStr, "Duplicate key name") {
|
|
common.Info("Index already exists, skipping", zap.String("error", errStr))
|
|
return nil
|
|
}
|
|
|
|
if strings.Contains(errStr, "Error 1060") && strings.Contains(errStr, "Duplicate column name") {
|
|
common.Info("Column already exists, skipping", zap.String("error", errStr))
|
|
return nil
|
|
}
|
|
|
|
if strings.Contains(errStr, "Error 1050") && strings.Contains(errStr, "Table") {
|
|
common.Info("Table already exists, skipping", zap.String("error", errStr))
|
|
return nil
|
|
}
|
|
|
|
if strings.Contains(errStr, "Error 1091") && strings.Contains(errStr, "Can't DROP") {
|
|
common.Info("Index/column already dropped, skipping", zap.String("error", errStr))
|
|
return nil
|
|
}
|
|
|
|
if strings.Contains(errStr, "Error 1138") && strings.Contains(errStr, "Invalid use of NULL") {
|
|
common.Info("NULL value in existing rows, skipping migration change", zap.String("error", errStr))
|
|
return nil
|
|
}
|
|
|
|
return err
|
|
}
|