mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-02 16:55:42 +08:00
### What problem does this PR solve?
1. support command:
```
RAGFlow(user)> create provider 'vllm' instance 'test' key 'test-key' url 'base-url' region 'abc';
SUCCESS
RAGFlow(user)> list instances from 'vllm';
+----------+----------------------------------------+----------------------------------+--------------+----------------------------------+--------+
| apiKey | extra | id | instanceName | providerID | status |
+----------+----------------------------------------+----------------------------------+--------------+----------------------------------+--------+
| test-key | {"base_url":"base-url","region":"abc"} | 40213c89430311f1a7cf38a74640adcc | test | b4d40e6142d311f1a4f938a74640adcc | enable |
+----------+----------------------------------------+----------------------------------+--------------+----------------------------------+--------+
```
2. support add vllm model
```
RAGFlow(user)> add model 'Qwen/Qwen2-0.5B' to provider 'vllm' instance 'test' with tokens 131072 chat;
SUCCESS
```
3. add vllm chat
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- [x] Refactoring
---------
Signed-off-by: Jin Hai <haijin.chn@gmail.com>
183 lines
5.1 KiB
Go
183 lines
5.1 KiB
Go
//
|
|
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
package models
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"ragflow/internal/logger"
|
|
|
|
"google.golang.org/genai"
|
|
)
|
|
|
|
// GoogleModel implements ModelDriver for Dummy AI
|
|
type GoogleModel struct {
|
|
BaseURL map[string]string
|
|
URLSuffix URLSuffix
|
|
}
|
|
|
|
// NewGoogleModel creates a new Google AI model instance
|
|
func NewGoogleModel(baseURL map[string]string, urlSuffix URLSuffix) *GoogleModel {
|
|
return &GoogleModel{
|
|
BaseURL: baseURL,
|
|
URLSuffix: urlSuffix,
|
|
}
|
|
}
|
|
|
|
func (z *GoogleModel) NewInstance(baseURL map[string]string) ModelDriver {
|
|
return nil
|
|
}
|
|
|
|
func (z *GoogleModel) Name() string {
|
|
return "google"
|
|
}
|
|
|
|
// Chat sends a message and returns response
|
|
func (z *GoogleModel) Chat(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
|
|
ctx := context.Background()
|
|
client, err := genai.NewClient(ctx, &genai.ClientConfig{
|
|
APIKey: *apiConfig.ApiKey,
|
|
Backend: genai.BackendGeminiAPI,
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
contents := []*genai.Content{
|
|
genai.NewContentFromText(*message, genai.RoleUser),
|
|
}
|
|
|
|
generateContentConfig := &genai.GenerateContentConfig{}
|
|
generateContentConfig.ThinkingConfig = &genai.ThinkingConfig{}
|
|
if chatModelConfig.Thinking != nil && *chatModelConfig.Thinking {
|
|
generateContentConfig.ThinkingConfig.IncludeThoughts = true
|
|
} else {
|
|
generateContentConfig.ThinkingConfig.IncludeThoughts = false
|
|
}
|
|
|
|
response, err := client.Models.GenerateContent(ctx, *modelName, contents, generateContentConfig)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
content := response.Text()
|
|
|
|
var responseContent string
|
|
if chatModelConfig.Thinking != nil && *chatModelConfig.Thinking {
|
|
responseContent = response.Candidates[0].Content.Parts[0].Text
|
|
}
|
|
|
|
chatResponse := &ChatResponse{
|
|
Answer: &content,
|
|
ReasonContent: &responseContent,
|
|
}
|
|
return chatResponse, nil
|
|
}
|
|
|
|
// ChatWithMessages sends multiple messages with roles and returns response
|
|
func (z *GoogleModel) ChatWithMessages(modelName string, apiKey *string, messages []Message, modelConfig *ChatConfig) (string, error) {
|
|
return "", fmt.Errorf("not implemented")
|
|
}
|
|
|
|
// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel)
|
|
func (z *GoogleModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
|
|
ctx := context.Background()
|
|
client, err := genai.NewClient(ctx, &genai.ClientConfig{
|
|
APIKey: *apiConfig.ApiKey,
|
|
Backend: genai.BackendGeminiAPI,
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
contents := []*genai.Content{
|
|
genai.NewContentFromText(*message, genai.RoleUser),
|
|
}
|
|
for response, err := range client.Models.GenerateContentStream(
|
|
ctx,
|
|
*modelName,
|
|
contents,
|
|
nil,
|
|
) {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
content := response.Text()
|
|
|
|
var responseContent string
|
|
if chatModelConfig.Thinking != nil && *chatModelConfig.Thinking {
|
|
responseContent = response.Candidates[0].Content.Parts[0].Text
|
|
}
|
|
|
|
if responseContent != "" {
|
|
logger.Info(fmt.Sprintf("Thinking: %s", responseContent))
|
|
if err = sender(nil, &responseContent); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if content != "" {
|
|
logger.Info(fmt.Sprintf("Answer: %s", responseContent))
|
|
if err = sender(&content, nil); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
// Encode encodes a list of texts into embeddings
|
|
func (z *GoogleModel) Encode(modelName *string, texts []string, apiConfig *APIConfig, embeddingConfig *EmbeddingConfig) ([][]float64, error) {
|
|
return nil, fmt.Errorf("not implemented")
|
|
}
|
|
|
|
func (z *GoogleModel) ListModels(apiConfig *APIConfig) ([]string, error) {
|
|
ctx := context.Background()
|
|
client, err := genai.NewClient(ctx, &genai.ClientConfig{
|
|
APIKey: *apiConfig.ApiKey,
|
|
Backend: genai.BackendGeminiAPI,
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Retrieve the list of models.
|
|
models, err := client.Models.List(ctx, &genai.ListModelsConfig{})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var modelNames []string
|
|
for _, m := range models.Items {
|
|
modelNames = append(modelNames, m.Name)
|
|
}
|
|
return modelNames, nil
|
|
}
|
|
|
|
func (z *GoogleModel) Balance(apiConfig *APIConfig) (map[string]interface{}, error) {
|
|
return nil, fmt.Errorf("no such method")
|
|
}
|
|
|
|
func (z *GoogleModel) CheckConnection(apiConfig *APIConfig) error {
|
|
return fmt.Errorf("no such method")
|
|
}
|
|
|
|
// Rerank calculates similarity scores between query and texts
|
|
func (z *GoogleModel) Rerank(modelName *string, query string, texts []string, apiConfig *APIConfig) ([]float64, error) {
|
|
return nil, fmt.Errorf("%s, Rerank not implemented", z.Name())
|
|
}
|