mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-03 01:01:56 +08:00
Replaces the Python agent canvas runtime with a Go implementation that runs inside `cmd/server_main`. The canvas compiles into an eino Workflow that pauses on wait-for-user via native Interrupt/Resume (no sentinel flag) and resumes from a Redis-backed CheckPointStore. All 21 Python agent components and ~35 tools are ported with functional parity. Sandbox providers now read their JSON config from the admin-panel system_settings table with env fallback. 234 files / +35,413 / -6,111. All Go files are gofmt-clean (CI gate added); drops the v2 DSL E2E step and the gap-analysis plan (both redundant after the port ships). ## Type of change - [x] Refactoring - [x] New feature - [x] Bug fix 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude <noreply@anthropic.com>
317 lines
9.8 KiB
Go
317 lines
9.8 KiB
Go
//
|
|
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
package tool
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/cloudwego/eino/components/tool"
|
|
"github.com/cloudwego/eino/schema"
|
|
)
|
|
|
|
const pubmedToolName = "pubmed"
|
|
|
|
const pubmedToolDescription = "Search PubMed via NCBI E-utilities. Returns {pmid, title, authors, journal, year}."
|
|
|
|
// pubmedParams is the JSON shape the model sends into InvokableRun.
|
|
type pubmedParams struct {
|
|
Query string `json:"query"`
|
|
MaxResults int `json:"max_results"`
|
|
}
|
|
|
|
// pubmedResult is one row in the returned record list.
|
|
type pubmedResult struct {
|
|
PMID string `json:"pmid"`
|
|
Title string `json:"title"`
|
|
Authors string `json:"authors"`
|
|
Journal string `json:"journal"`
|
|
Year string `json:"year"`
|
|
}
|
|
|
|
// pubmedEnvelope is what the model sees.
|
|
type pubmedEnvelope struct {
|
|
Results []pubmedResult `json:"results"`
|
|
Error string `json:"_ERROR,omitempty"`
|
|
}
|
|
|
|
// pubmedUserAgent is the User-Agent that NCBI requires for all
|
|
// E-utilities requests. Without it, requests are silently dropped
|
|
// or rate-limited to a single IP.
|
|
const pubmedUserAgent = "ragflow/1.0"
|
|
|
|
// pubmedESearchEndpoint is the E-utilities esearch URL. Exposed as a
|
|
// package var so tests can substitute a httptest.Server URL.
|
|
var pubmedESearchEndpoint = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
|
|
|
// pubmedESummaryEndpoint is the E-utilities esummary URL. Exposed
|
|
// as a package var so tests can substitute a httptest.Server URL.
|
|
var pubmedESummaryEndpoint = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
|
|
|
|
// PubMedTool is the PubMed
|
|
// search tool. It uses NCBI
|
|
// E-utilities: esearch returns a list of PMIDs, then esummary fetches
|
|
// the full records for those PMIDs.
|
|
type PubMedTool struct {
|
|
helper *HTTPHelper
|
|
}
|
|
|
|
// NewPubMedTool returns a PubMedTool using the default HTTPHelper.
|
|
func NewPubMedTool() *PubMedTool {
|
|
return NewPubMedToolWith(NewHTTPHelper())
|
|
}
|
|
|
|
// NewPubMedToolWith returns a PubMedTool that uses the provided
|
|
// HTTPHelper. Useful for tests.
|
|
func NewPubMedToolWith(h *HTTPHelper) *PubMedTool {
|
|
if h == nil {
|
|
h = NewHTTPHelper()
|
|
}
|
|
return &PubMedTool{helper: h}
|
|
}
|
|
|
|
// Info returns the tool's metadata for the chat model.
|
|
func (p *PubMedTool) Info(_ context.Context) (*schema.ToolInfo, error) {
|
|
return &schema.ToolInfo{
|
|
Name: pubmedToolName,
|
|
Desc: pubmedToolDescription,
|
|
ParamsOneOf: schema.NewParamsOneOfByParams(map[string]*schema.ParameterInfo{
|
|
"query": {
|
|
Type: schema.String,
|
|
Desc: "PubMed search query (full PubMed query syntax supported).",
|
|
Required: true,
|
|
},
|
|
"max_results": {
|
|
Type: schema.Integer,
|
|
Desc: "Maximum number of records to return. Defaults to 5 (max 100 per request).",
|
|
Required: false,
|
|
},
|
|
}),
|
|
}, nil
|
|
}
|
|
|
|
// buildPubMedESearchURL composes the esearch URL. Centralized for
|
|
// testability.
|
|
func buildPubMedESearchURL(query string, maxResults int) string {
|
|
if maxResults <= 0 {
|
|
maxResults = 5
|
|
}
|
|
if maxResults > 100 {
|
|
maxResults = 100
|
|
}
|
|
q := url.Values{}
|
|
q.Set("db", "pubmed")
|
|
q.Set("term", query)
|
|
q.Set("retmax", strconv.Itoa(maxResults))
|
|
q.Set("retmode", "json")
|
|
return pubmedESearchEndpoint + "?" + q.Encode()
|
|
}
|
|
|
|
// buildPubMedESummaryURL composes the esummary URL for a list of
|
|
// PMIDs. Centralized for testability.
|
|
func buildPubMedESummaryURL(pmids []string) string {
|
|
q := url.Values{}
|
|
q.Set("db", "pubmed")
|
|
q.Set("id", strings.Join(pmids, ","))
|
|
q.Set("retmode", "json")
|
|
return pubmedESummaryEndpoint + "?" + q.Encode()
|
|
}
|
|
|
|
// pubmedESearchResponse is the upstream esearch envelope.
|
|
type pubmedESearchResponse struct {
|
|
ESearchResult struct {
|
|
IDList []string `json:"idlist"`
|
|
} `json:"esearchresult"`
|
|
}
|
|
|
|
// pubmedESummaryAuthor is one author in the esummary author list.
|
|
type pubmedESummaryAuthor struct {
|
|
Name string `json:"name"`
|
|
}
|
|
|
|
// pubmedESummaryArticle is one article in the esummary result map.
|
|
type pubmedESummaryArticle struct {
|
|
Title string `json:"title"`
|
|
Authors []pubmedESummaryAuthor `json:"authors"`
|
|
FullJournalName string `json:"fulljournalname"`
|
|
PubDate string `json:"pubdate"`
|
|
}
|
|
|
|
// pubmedESummaryResponse is the upstream esummary envelope. The
|
|
// `result` value mixes per-PMID article objects with a string-list
|
|
// `uids` key, so we decode it as a map of RawMessage and then walk
|
|
// the entries to extract the article objects (skipping the `uids`
|
|
// array). See decodePubMedESummary.
|
|
type pubmedESummaryResponse struct {
|
|
Result map[string]json.RawMessage `json:"result"`
|
|
}
|
|
|
|
// mustReadAll is a tiny helper to read the entire response body. We
|
|
// keep it private because pubmed.go owns the only two-step HTTP dance.
|
|
func mustReadAll(r io.Reader) []byte {
|
|
b, err := io.ReadAll(r)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
return b
|
|
}
|
|
|
|
// decodePubMedESummary parses the raw esummary response and returns
|
|
// a PMID → article map. The upstream response is a flat object whose
|
|
// keys are PMIDs (article values) plus a `uids` key (string list).
|
|
// A single JSON decode into map[string]Article would fail on `uids`
|
|
// because Go's encoding/json cannot store arrays in a struct-typed
|
|
// map; the RawMessage indirection sidesteps that.
|
|
func decodePubMedESummary(body []byte) (map[string]pubmedESummaryArticle, error) {
|
|
var raw pubmedESummaryResponse
|
|
if err := json.Unmarshal(body, &raw); err != nil {
|
|
return nil, err
|
|
}
|
|
out := make(map[string]pubmedESummaryArticle, len(raw.Result))
|
|
for k, v := range raw.Result {
|
|
// The `uids` key is a JSON array of strings; skip it.
|
|
if len(v) > 0 && v[0] == '[' {
|
|
continue
|
|
}
|
|
var article pubmedESummaryArticle
|
|
if err := json.Unmarshal(v, &article); err != nil {
|
|
continue
|
|
}
|
|
out[k] = article
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// InvokableRun performs the two-step PubMed lookup.
|
|
func (p *PubMedTool) InvokableRun(ctx context.Context, argsJSON string, _ ...tool.Option) (string, error) {
|
|
var params pubmedParams
|
|
if err := json.Unmarshal([]byte(argsJSON), ¶ms); err != nil {
|
|
return pubmedErrJSON(fmt.Errorf("pubmed: parse arguments: %w", err)),
|
|
fmt.Errorf("pubmed: parse arguments: %w", err)
|
|
}
|
|
if strings.TrimSpace(params.Query) == "" {
|
|
return pubmedErrJSON(fmt.Errorf("query is required")),
|
|
fmt.Errorf("pubmed: query is required")
|
|
}
|
|
if params.MaxResults <= 0 {
|
|
params.MaxResults = 5
|
|
}
|
|
|
|
headers := map[string]string{
|
|
"User-Agent": pubmedUserAgent,
|
|
"Accept": "application/json",
|
|
}
|
|
|
|
// Step 1: esearch → list of PMIDs
|
|
searchURL := buildPubMedESearchURL(params.Query, params.MaxResults)
|
|
searchResp, err := p.helper.Do(ctx, http.MethodGet, searchURL, "", "", headers)
|
|
if err != nil {
|
|
return pubmedErrJSON(err), err
|
|
}
|
|
var searchBody pubmedESearchResponse
|
|
if decErr := json.NewDecoder(searchResp.Body).Decode(&searchBody); decErr != nil {
|
|
_ = searchResp.Body.Close()
|
|
return pubmedErrJSON(fmt.Errorf("pubmed: decode esearch: %w", decErr)),
|
|
fmt.Errorf("pubmed: decode esearch: %w", decErr)
|
|
}
|
|
_ = searchResp.Body.Close()
|
|
if searchResp.StatusCode < 200 || searchResp.StatusCode >= 300 {
|
|
return pubmedErrJSON(fmt.Errorf("pubmed: esearch returned %d", searchResp.StatusCode)),
|
|
fmt.Errorf("pubmed: esearch returned %d", searchResp.StatusCode)
|
|
}
|
|
|
|
pmids := searchBody.ESearchResult.IDList
|
|
if len(pmids) == 0 {
|
|
return pubmedJSON(pubmedEnvelope{Results: []pubmedResult{}}), nil
|
|
}
|
|
|
|
// Step 2: esummary → full records
|
|
summaryURL := buildPubMedESummaryURL(pmids)
|
|
summaryResp, err := p.helper.Do(ctx, http.MethodGet, summaryURL, "", "", headers)
|
|
if err != nil {
|
|
return pubmedErrJSON(err), err
|
|
}
|
|
defer summaryResp.Body.Close()
|
|
if summaryResp.StatusCode < 200 || summaryResp.StatusCode >= 300 {
|
|
return pubmedErrJSON(fmt.Errorf("pubmed: esummary returned %d", summaryResp.StatusCode)),
|
|
fmt.Errorf("pubmed: esummary returned %d", summaryResp.StatusCode)
|
|
}
|
|
|
|
articles, err := decodePubMedESummary(mustReadAll(summaryResp.Body))
|
|
if err != nil {
|
|
return pubmedErrJSON(fmt.Errorf("pubmed: parse esummary: %w", err)),
|
|
fmt.Errorf("pubmed: parse esummary: %w", err)
|
|
}
|
|
|
|
results := make([]pubmedResult, 0, len(pmids))
|
|
for _, pmid := range pmids {
|
|
article, ok := articles[pmid]
|
|
if !ok {
|
|
continue
|
|
}
|
|
results = append(results, pubmedResult{
|
|
PMID: pmid,
|
|
Title: strings.TrimSpace(article.Title),
|
|
Authors: joinAuthorNames(article.Authors),
|
|
Journal: article.FullJournalName,
|
|
Year: firstFourDigitYear(article.PubDate),
|
|
})
|
|
}
|
|
return pubmedJSON(pubmedEnvelope{Results: results}), nil
|
|
}
|
|
|
|
// joinAuthorNames joins the first N authors with ", " and adds
|
|
// "et al." for any beyond N. We use N=3 to mirror the convention
|
|
// common in academic citation styles.
|
|
func joinAuthorNames(authors []pubmedESummaryAuthor) string {
|
|
const cap = 3
|
|
if len(authors) == 0 {
|
|
return ""
|
|
}
|
|
if len(authors) <= cap {
|
|
names := make([]string, len(authors))
|
|
for i, a := range authors {
|
|
names[i] = a.Name
|
|
}
|
|
return strings.Join(names, ", ")
|
|
}
|
|
names := make([]string, 0, cap+1)
|
|
for i := range cap {
|
|
names = append(names, authors[i].Name)
|
|
}
|
|
names = append(names, "et al.")
|
|
return strings.Join(names, ", ")
|
|
}
|
|
|
|
func pubmedJSON(env pubmedEnvelope) string {
|
|
b, err := json.Marshal(env)
|
|
if err != nil {
|
|
return fmt.Sprintf(`{"_ERROR":"pubmed: marshal result: %s"}`, err)
|
|
}
|
|
return string(b)
|
|
}
|
|
|
|
func pubmedErrJSON(err error) string {
|
|
return pubmedJSON(pubmedEnvelope{Error: err.Error()})
|
|
}
|