mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
## Summary Migrated the dataset document upload API (`POST /api/v1/datasets/:dataset_id/documents`) from Python to the Go backend. It supports local file uploads (`type=local`), web page ingestion (`type=web`), and empty document creation (`type=empty`). ## Changes - **Router**: Registered `POST /api/v1/datasets/:dataset_id/documents` route. - **Handler**: Implemented `UploadDocuments` handler and its routing functions (`uploadLocalDocuments`, `uploadWebDocument`, `uploadEmptyDocument`). - **Service**: Implemented `UploadLocalDocuments`, `UploadWebDocument`, and `UploadEmptyDocument` in `DocumentService`. - **Refactoring**: Moved permission checking logic to a shared helper for reuse in file and document services. - **Tests**: Added comprehensive unit tests for the new handler and service upload paths. ## Verification Ran and passed the test suite for service and handler packages: - `go test ./internal/service` - `go test ./internal/handler`
43 lines
975 B
Go
43 lines
975 B
Go
package service
|
|
|
|
import (
|
|
"encoding/hex"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"ragflow/internal/utility"
|
|
|
|
"github.com/zeebo/xxh3"
|
|
)
|
|
|
|
var (
|
|
presentationUploadPattern = regexp.MustCompile(`(?i)\.(ppt|pptx|pages)$`)
|
|
emailUploadPattern = regexp.MustCompile(`(?i)\.(msg|eml)$`)
|
|
)
|
|
|
|
// selectUploadParser mirrors Python FileService.get_parser.
|
|
func selectUploadParser(docType utility.FileType, filename, defaultParser string) string {
|
|
switch docType {
|
|
case utility.FileTypeVISUAL:
|
|
return "picture"
|
|
case utility.FileTypeAURAL:
|
|
return "audio"
|
|
}
|
|
base := filepath.Base(strings.TrimSpace(filename))
|
|
switch {
|
|
case presentationUploadPattern.MatchString(base):
|
|
return "presentation"
|
|
case emailUploadPattern.MatchString(base):
|
|
return "email"
|
|
default:
|
|
return defaultParser
|
|
}
|
|
}
|
|
|
|
// contentHashHex mirrors Python xxhash.xxh128(blob).hexdigest().
|
|
func contentHashHex(blob []byte) string {
|
|
sum := xxh3.Hash128(blob).Bytes()
|
|
return hex.EncodeToString(sum[:])
|
|
}
|