Files
ragflow/cmd/server_main.go

322 lines
11 KiB
Go
Raw Normal View History

Go: add ingestion server (#15094) ### What problem does this PR solve? 1. Go ingestion server will connected with admin server with gRPC stream 2. Go ingestion server will be responsible for ingestion tasks ``` RAGFlow(admin)> list ingestors; +-----------------+-----------+----------------------------------+---------------------------+----------+------------+--------------+--------+------------+---------------+ | address | cpu_usage | id | last_heartbeat | name | process_id | rss_usage | status | task_count | vms_usage | +-----------------+-----------+----------------------------------+---------------------------+----------+------------+--------------+--------+------------+---------------+ | 127.0.0.1:58564 | 0 | bdd1870eea2646e0aacb8a2cd3307aa2 | 2026-05-24T18:16:17+08:00 | ingestor | 680152 | 212.72265625 | active | 0 | 2589.12109375 | +-----------------+-----------+----------------------------------+---------------------------+----------+------------+--------------+--------+------------+---------------+ RAGFlow(admin)> start ingestion 'abc'; +----------------------------------+ | task_id | +----------------------------------+ | e714777639ca4760ab427b5f211e81ad | +----------------------------------+ RAGFlow(admin)> stop ingestion 'f7bd39d0a724457eb5fdce6d81699776'; +----------------------------------+ | task_id | +----------------------------------+ | f7bd39d0a724457eb5fdce6d81699776 | +----------------------------------+ RAGFlow(admin)> list tasks; +-----+----------------------------------+-------+------+----------------------------------+---------------------------+------------+------------+ | ETA | assign_to | error | from | id | last_update | start_time | status | +-----+----------------------------------+-------+------+----------------------------------+---------------------------+------------+------------+ | 0 | 17937da188b84f23a5c10bb87588944b | | CLI | eae6431da72a40e796cff3a03008091b | 2026-05-24T19:46:03+08:00 | | COMPLETED | | 0 | 17937da188b84f23a5c10bb87588944b | | CLI | 6cccdd174bd049ecb05a774bbb47593f | 2026-05-24T19:46:03+08:00 | | COMPLETED | | 0 | 17937da188b84f23a5c10bb87588944b | | CLI | ef360d777e57485799adb96b30f2b4b8 | 2026-05-24T19:46:03+08:00 | | CANCELED | | 0 | 17937da188b84f23a5c10bb87588944b | | CLI | bcc5c5448cb64de48b6b6171c36fb790 | 2026-05-24T19:46:03+08:00 | | CANCELED | | 0 | 17937da188b84f23a5c10bb87588944b | | CLI | bfc25384c43a443294fe2da979a38ac2 | 2026-05-24T19:46:03+08:00 | | DISPATCHED | | 0 | 17937da188b84f23a5c10bb87588944b | | CLI | 84960537b85d413b8990a9efd5952d67 | 2026-05-24T19:46:04+08:00 | | DISPATCHED | | 0 | 17937da188b84f23a5c10bb87588944b | | CLI | 3d223c1b51e24b36861a3bfb2f1d58d4 | 2026-05-24T19:46:03+08:00 | | CANCELED | | 0 | 17937da188b84f23a5c10bb87588944b | | CLI | e433b0e356b846c89c301621a3c54494 | 2026-05-24T19:46:03+08:00 | | COMPLETED | | 0 | 17937da188b84f23a5c10bb87588944b | | CLI | 7c93a3880f074ebd8eca14e6b51bb7ef | 2026-05-24T19:46:03+08:00 | | COMPLETED | | 0 | 17937da188b84f23a5c10bb87588944b | | CLI | df2e4ef51aaf4390bff9a23f2692486e | 2026-05-24T19:46:04+08:00 | | DISPATCHED | | 0 | 17937da188b84f23a5c10bb87588944b | | CLI | 7377c53010194ef7a83aa206698d66ff | 2026-05-24T19:46:05+08:00 | | DISPATCHED | | 0 | 17937da188b84f23a5c10bb87588944b | | CLI | df64d1a1f9d348e3a2f174c4d7d69e73 | 2026-05-24T19:46:05+08:00 | | DISPATCHED | | 0 | 17937da188b84f23a5c10bb87588944b | | CLI | b59834512e2847e1bdf13ace04b8a456 | 2026-05-24T19:46:06+08:00 | | DISPATCHED | | 0 | 17937da188b84f23a5c10bb87588944b | | CLI | 0064bb0ab69344028d1ecfda053826f4 | 2026-05-24T19:46:03+08:00 | | QUEUED | +-----+----------------------------------+-------+------+----------------------------------+---------------------------+------------+------------+ ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2026-05-25 14:00:08 +08:00
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package main
import (
"context"
"errors"
"flag"
"fmt"
"net/http"
"os"
"os/signal"
"ragflow/internal/common"
"ragflow/internal/server"
"ragflow/internal/server/local"
"ragflow/internal/storage"
"ragflow/internal/utility"
"strings"
"syscall"
"time"
"github.com/gin-gonic/gin"
"go.uber.org/zap"
"ragflow/internal/cache"
"ragflow/internal/dao"
"ragflow/internal/engine"
"ragflow/internal/handler"
"ragflow/internal/router"
"ragflow/internal/service"
"ragflow/internal/service/nlp"
"ragflow/internal/tokenizer"
)
func printHelp() {
fmt.Fprintf(os.Stderr, "Usage: %s [OPTIONS]\n\n", os.Args[0])
fmt.Fprintf(os.Stderr, "RAGFlow Server - Open-source RAG engine based on deep document understanding\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
fmt.Fprintf(os.Stderr, " -p, --port int\tServer port (overrides config file)\n")
fmt.Fprintf(os.Stderr, " -h, --help \tShow this help message and exit\n")
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " %s # Start server with config file port\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s -p 8080 # Start server on port 8080\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s --port 8080 # Start server on port 8080\n", os.Args[0])
}
func main() {
// Parse command line flags
var portFlag int
flag.IntVar(&portFlag, "port", 0, "Server port (overrides config file)")
flag.IntVar(&portFlag, "p", 0, "Server port (shorthand, overrides config file)")
// Custom help message
flag.Usage = printHelp
flag.Parse()
// Initialize logger with default level
// logger.Init("info"); // set debug log level
if err := common.Init("info"); err != nil {
panic(fmt.Sprintf("Failed to initialize logger: %v", err))
}
// Initialize configuration
if err := server.Init(""); err != nil {
common.Fatal("Failed to initialize config", zap.Error(err))
}
// Override port with command line argument if provided
config := server.GetConfig()
if portFlag > 0 {
config.Server.Port = portFlag
common.Info("Port overridden by command line argument", zap.Int("port", portFlag))
}
if config.Server.Port == 0 {
common.Fatal("Server port is not configured. Please specify via --port flag or config file.")
}
// Reinitialize logger with configured level if different
if config.Log.Level != "" && config.Log.Level != "info" {
if err := common.Init(config.Log.Level); err != nil {
common.Error("Failed to reinitialize logger with configured level", err)
}
}
server.SetLogger(common.Logger)
if config.Log.Level == "" {
config.Log.Level = common.GetLevel()
}
common.Info("Server mode", zap.String("mode", config.Server.Mode))
// Print all configuration settings
server.PrintAll()
// Initialize database
if err := dao.InitDB(); err != nil {
common.Fatal("Failed to initialize database", zap.Error(err))
}
// Initialize doc engine
if err := engine.Init(&config.DocEngine); err != nil {
common.Fatal("Failed to initialize doc engine", zap.Error(err))
}
defer engine.Close()
// Initialize Redis cache
if err := cache.Init(&config.Redis); err != nil {
common.Fatal("Failed to initialize Redis", zap.Error(err))
}
defer cache.Close()
if err := storage.InitStorageFactory(); err != nil {
common.Fatal("Failed to initialize storage factory", zap.Error(err))
}
// Initialize server variables (runtime variables that can change during operation)
// This must be done after Cache is initialized
if err := server.InitVariables(cache.Get()); err != nil {
common.Warn("Failed to initialize server variables from Redis, using defaults", zap.String("error", err.Error()))
}
// Initialize admin status (default: unavailable=1)
local.InitAdminStatus(1, "admin server not connected")
// Initialize tokenizer (rag_analyzer)
feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577) ## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00
dictPath := os.Getenv("RAGFLOW_DICT_PATH")
if dictPath == "" {
dictPath = "/usr/share/infinity/resource"
}
tokenizerCfg := &tokenizer.PoolConfig{
feat: migrate DELETE /api/v1/datasets/:dataset_id/documents to Go (#15577) ## Summary Migrate the batch document deletion endpoint from Python to Go. Two modes supported: explicit `ids` list and `delete_all`. ## Changes | File | Change | |------|--------| | `internal/dao/file2document.go` | Add `GetByDocumentID`, `DeleteByDocumentID` | | `internal/dao/file2document_test.go` | 5 new tests | | `internal/dao/kb_test.go` | 2 new tests (`DecreaseDocumentNum`) | | `internal/service/document.go` | Add `deleteDocumentFull` + `DeleteDocuments`, refactor `DeleteDocument` | | `internal/service/document_test.go` | 10 new tests | | `internal/handler/document.go` | Add `documentServiceIface` + `DeleteDocuments` handler | | `internal/handler/document_test.go` | 7 new tests | | `internal/router/router.go` | Register `DELETE /:dataset_id/documents` | | `cmd/server_main.go` | Support `RAGFLOW_DICT_PATH` env var | | `internal/binding/rag_analyzer.go` | Use `-lpcre2-8` dynamic linking | | `internal/dao/database.go` | Skip Error 1091/1138 during migration | | `internal/service/llm.go` | Fix vet warning | ## Per-document cleanup - Delete tasks from DB - Hard-delete document + decrement KB counters - Delete chunks from document engine (nil-guarded) - Delete metadata from document engine (nil-guarded) - Remove file2document mapping + file record + storage blob ## Test Results **24 unit tests all passing** (7 DAO + 10 service + 7 handler) using SQLite :memory: + gin.TestMode. See [test report](docs/test_report_delete_documents.md) for manual integration test results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:55:53 +08:00
DictPath: dictPath,
}
if err := tokenizer.Init(tokenizerCfg); err != nil {
common.Fatal("Failed to initialize tokenizer", zap.Error(err))
}
defer tokenizer.Close()
// Initialize global QueryBuilder using tokenizer's DictPath
// This ensures the Synonym uses the same wordnet directory as tokenizer
if err := nlp.InitQueryBuilderFromTokenizer(tokenizerCfg.DictPath); err != nil {
common.Fatal("Failed to initialize query builder", zap.Error(err))
}
startServer(config)
common.Info("Server exited")
}
func startServer(config *server.Config) {
// Set Gin mode
if config.Server.Mode == "release" {
gin.SetMode(gin.ReleaseMode)
} else {
gin.SetMode(gin.DebugMode)
}
// Initialize service layer
userService := service.NewUserService()
documentService := service.NewDocumentService()
datasetsService := service.NewDatasetService()
knowledgebaseService := service.NewKnowledgebaseService()
metadataService := service.NewMetadataService()
chunkService := service.NewChunkService()
llmService := service.NewLLMService()
tenantService := service.NewTenantService()
chatService := service.NewChatService()
chatSessionService := service.NewChatSessionService()
systemService := service.NewSystemService()
connectorService := service.NewConnectorService()
searchService := service.NewSearchService()
fileService := service.NewFileService()
memoryService := service.NewMemoryService()
mcpService := service.NewMCPService()
modelProviderService := service.NewModelProviderService()
// Initialize doc engine for skill search
docEngine := engine.Get()
// Initialize handler layer
authHandler := handler.NewAuthHandler()
userHandler := handler.NewUserHandler(userService)
tenantHandler := handler.NewTenantHandler(tenantService, userService, knowledgebaseService)
documentHandler := handler.NewDocumentHandler(documentService, datasetsService)
datasetsHandler := handler.NewDatasetsHandler(datasetsService, metadataService)
systemHandler := handler.NewSystemHandler(systemService)
knowledgebaseHandler := handler.NewKnowledgebaseHandler(knowledgebaseService, userService, documentService)
chunkHandler := handler.NewChunkHandler(chunkService, userService)
llmHandler := handler.NewLLMHandler(llmService, userService)
chatHandler := handler.NewChatHandler(chatService, userService)
chatSessionHandler := handler.NewChatSessionHandler(chatSessionService, userService)
connectorHandler := handler.NewConnectorHandler(connectorService, userService)
searchHandler := handler.NewSearchHandler(searchService, userService)
fileHandler := handler.NewFileHandler(fileService, userService)
memoryHandler := handler.NewMemoryHandler(memoryService)
mcpHandler := handler.NewMCPHandler(mcpService)
skillSearchHandler := handler.NewSkillSearchHandler(docEngine)
providerHandler := handler.NewProviderHandler(userService, modelProviderService)
feat: implement POST /api/v1/agents/<agent_id>/upload API (#15633) ## Summary Implement the `POST /api/v1/agents/<agent_id>/upload` endpoint in Go, allowing file uploads associated with agent canvases. ### Changes - **Modified**: `internal/service/agent.go` — Added `CheckCanvasAccess` method (owner + team-level permission semantics) - **Modified**: `internal/handler/agent.go` — Added `UploadAgentFile` handler with auth check, multipart file parsing, and delegation to `FileService`. Added `fileUploader` interface for testability. - **Modified**: `internal/router/router.go` — Registered `POST /:agent_id/upload` route - **Modified**: `cmd/server_main.go` — Wired `fileService` into `AgentHandler` - **New**: `internal/service/agent_test.go` — 4 service-level tests for `CheckCanvasAccess` (owner, team member, private denial, not found) - **New**: `internal/handler/agent_upload_test.go` — 3 handler-level tests (success with fake file service, cross-user denial, empty file rejection) ### Testing All 7 tests pass with zero mocking of the DB layer (in-memory SQLite): ``` === RUN TestCheckCanvasAccess_Owner --- PASS === RUN TestCheckCanvasAccess_NotOwner --- PASS === RUN TestCheckCanvasAccess_PrivateCanvas_Denied --- PASS === RUN TestCheckCanvasAccess_NotFound --- PASS === RUN TestUploadAgentFileHandler_Success --- PASS === RUN TestUploadAgentFileHandler_NoPermission --- PASS === RUN TestUploadAgentFileHandler_NoFiles --- PASS ``` Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-04 17:21:47 +08:00
agentHandler := handler.NewAgentHandler(service.NewAgentService(), fileService)
relatedQuestionsHandler := handler.NewSearchbotHandler(
searchService,
tenantService,
&handler.SearchbotRealLLM{Svc: modelProviderService},
)
// Dify retrieval handler
docDAO := dao.NewDocumentDAO()
retrievalService := nlp.NewRetrievalService(docEngine, docDAO)
difyRetrievalHandler := handler.NewDifyRetrievalHandler(
knowledgebaseService,
modelProviderService,
metadataService,
retrievalService,
docDAO,
docEngine,
)
// Initialize router
r := router.NewRouter(authHandler, userHandler, tenantHandler, documentHandler, datasetsHandler, systemHandler, knowledgebaseHandler, chunkHandler, llmHandler, chatHandler, chatSessionHandler, connectorHandler, searchHandler, fileHandler, memoryHandler, mcpHandler, skillSearchHandler, providerHandler, agentHandler, relatedQuestionsHandler, difyRetrievalHandler)
// Create Gin engine
ginEngine := gin.New()
// Middleware
if config.Server.Mode == "debug" {
ginEngine.Use(gin.Logger())
}
ginEngine.Use(gin.Recovery())
// Setup routes
r.Setup(ginEngine)
// Create HTTP server with timeouts to prevent slow clients from blocking shutdown
addr := fmt.Sprintf(":%d", config.Server.Port)
srv := &http.Server{
Addr: addr,
Handler: ginEngine,
ReadHeaderTimeout: 10 * time.Second,
ReadTimeout: 60 * time.Second,
WriteTimeout: 120 * time.Second,
IdleTimeout: 120 * time.Second,
}
// Start server in a goroutine
go func() {
common.Info(
"\n ____ ___ ______ ______ __\n" +
" / __ \\ / | / ____// ____// /____ _ __\n" +
" / /_/ // /| | / / __ / /_ / // __ \\| | /| / /\n" +
" / _, _// ___ |/ /_/ // __/ / // /_/ /| |/ |/ /\n" +
" /_/ |_|/_/ |_|\\____//_/ /_/ \\____/ |__/|__/\n",
)
common.Info(fmt.Sprintf("RAGFlow Go Version: %s", utility.GetRAGFlowVersion()))
common.Info(fmt.Sprintf("Server starting on port: %d", config.Server.Port))
if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
common.Fatal("Failed to start server", zap.Error(err))
}
}()
// Get local IP address for heartbeat reporting
localIP, err := utility.GetLocalIP()
if err != nil {
common.Fatal("fail to get local ip address")
}
// Initialize and start heartbeat reporter to admin server
heartbeatService := service.NewHeartbeatSender(
common.Logger,
common.ServerTypeAPI,
fmt.Sprintf("ragflow-server-%d", config.Server.Port),
localIP,
config.Server.Port,
)
if err = heartbeatService.InitHTTPClient(); err != nil {
common.Warn("Failed to initialize heartbeat service", zap.Error(err))
} else {
// Start heartbeat reporter with 30 seconds interval
heartbeatReporter := utility.NewScheduledTask("Heartbeat reporter", 3*time.Second, func() {
if err = heartbeatService.SendHeartbeat(); err == nil {
local.SetAdminStatus(0, "")
} else {
local.SetAdminStatus(1, err.Error())
//logger.Warn(fmt.Sprintf(err.Error()))
}
})
heartbeatReporter.Start()
defer heartbeatReporter.Stop()
}
// Wait for interrupt signal to gracefully shutdown
quit := make(chan os.Signal, 1)
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT, syscall.SIGUSR2)
sig := <-quit
common.Info(fmt.Sprintf("Receives %s signal to shutdown server", strings.ToUpper(sig.String())))
common.Info("Shutting down server...")
// Create context with timeout for graceful shutdown
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// Shutdown server
if err = srv.Shutdown(ctx); err != nil {
common.Fatal("Server forced to shutdown", zap.Error(err))
}
}