mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
### What problem does this PR solve? Closes #15673 — ports the Python `file2document_api.py` `convert()` endpoint to Go. | Method | Path | Handler | |--------|------|---------| | POST | `/api/v1/files/link-to-datasets` | `FileHandler.LinkToDatasets` | ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- #### Implementation notes **Files changed:** ``` internal/service/file2document.go – new service (File2DocumentService) internal/dao/file2document.go – added Create method internal/handler/file.go – FileHandler gains file2DocumentService; LinkToDatasets HTTP handler internal/router/router.go – route registered ``` **Functional parity table:** | Concern | Go behaviour | |---------|-------------| | Required fields | `file_ids` and `kb_ids` both required; missing either → `CodeDataError` mirroring Python `@validate_request` | | File existence | `fileDAO.GetByIDs(fileIDs)` builds a set; any missing ID → `"File not found!"` | | KB existence | `kbDAO.GetByID(kbID)` per KB; missing → `"Can't find this dataset!"` | | Folder expansion | `getAllInnermostFileIDs` recursively calls `fileDAO.ListByParentID` — mirrors `FileService.get_all_innermost_file_ids` | | File permissions | `checkFileTeamPermission`: `file.TenantID == userID` OR user in tenant's team — mirrors `check_file_team_permission` | | KB permissions | `checkKBTeamPermission`: `kb.TenantID == userID` OR user in tenant's team — mirrors `check_kb_team_permission` | | Fire-and-forget | `go convertFiles(...)` goroutine after all validation passes — mirrors `loop.run_in_executor(None, _convert_files, …)` | | Conversion | `convertFiles`: for each file → delete existing mappings + hard-delete old documents → create new `Document` in each target KB → create `File2Document` mapping — mirrors Python `_convert_files` | | `getParser` | Extension-based lookup with fallback to `kb.ParserID` — mirrors `FileService.get_parser` | | Immediate return | `true` returned to caller as soon as goroutine is scheduled | --------- Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
90 lines
2.9 KiB
Go
90 lines
2.9 KiB
Go
//
|
|
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
package dao
|
|
|
|
import (
|
|
"ragflow/internal/entity"
|
|
)
|
|
|
|
// File2DocumentDAO file to document mapping data access object
|
|
type File2DocumentDAO struct{}
|
|
|
|
// NewFile2DocumentDAO create file2document DAO
|
|
func NewFile2DocumentDAO() *File2DocumentDAO {
|
|
return &File2DocumentDAO{}
|
|
}
|
|
|
|
// GetKBInfoByFileID gets knowledge base info by file ID
|
|
func (dao *File2DocumentDAO) GetKBInfoByFileID(fileID string) ([]map[string]interface{}, error) {
|
|
var results []map[string]interface{}
|
|
|
|
rows, err := DB.Model(&entity.File{}).
|
|
Select("knowledgebase.id, knowledgebase.name, file2document.document_id").
|
|
Joins("JOIN file2document ON file2document.file_id = ?", fileID).
|
|
Joins("JOIN document ON document.id = file2document.document_id").
|
|
Joins("JOIN knowledgebase ON knowledgebase.id = document.kb_id").
|
|
Where("file.id = ?", fileID).
|
|
Rows()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
for rows.Next() {
|
|
var kbID, kbName, docID string
|
|
if err := rows.Scan(&kbID, &kbName, &docID); err != nil {
|
|
continue
|
|
}
|
|
results = append(results, map[string]interface{}{
|
|
"kb_id": kbID,
|
|
"kb_name": kbName,
|
|
"document_id": docID,
|
|
})
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
// GetByFileID gets file2document mappings by file ID
|
|
func (dao *File2DocumentDAO) GetByFileID(fileID string) ([]*entity.File2Document, error) {
|
|
var mappings []*entity.File2Document
|
|
err := DB.Where("file_id = ?", fileID).Find(&mappings).Error
|
|
return mappings, err
|
|
}
|
|
|
|
// DeleteByFileID deletes file2document mappings by file ID
|
|
func (dao *File2DocumentDAO) DeleteByFileID(fileID string) error {
|
|
return DB.Unscoped().Where("file_id = ?", fileID).Delete(&entity.File2Document{}).Error
|
|
}
|
|
|
|
// GetByDocumentID gets file2document mappings by document ID
|
|
func (dao *File2DocumentDAO) GetByDocumentID(docID string) ([]*entity.File2Document, error) {
|
|
var mappings []*entity.File2Document
|
|
err := DB.Where("document_id = ?", docID).Find(&mappings).Error
|
|
return mappings, err
|
|
}
|
|
|
|
// DeleteByDocumentID deletes file2document mappings by document ID
|
|
func (dao *File2DocumentDAO) DeleteByDocumentID(docID string) error {
|
|
return DB.Unscoped().Where("document_id = ?", docID).Delete(&entity.File2Document{}).Error
|
|
}
|
|
|
|
// Create inserts a new file2document mapping record.
|
|
func (dao *File2DocumentDAO) Create(mapping *entity.File2Document) error {
|
|
return DB.Create(mapping).Error
|
|
}
|