Files
ragflow/internal/service/document_upload_helpers.go
Hz_ a6cc3023c5 feat(go-api): implement dataset document upload API (#16295)
## Summary
Migrated the dataset document upload API (`POST
/api/v1/datasets/:dataset_id/documents`) from Python to the Go backend.
It supports local file uploads (`type=local`), web page ingestion
(`type=web`), and empty document creation (`type=empty`).

## Changes
- **Router**: Registered `POST /api/v1/datasets/:dataset_id/documents`
route.
- **Handler**: Implemented `UploadDocuments` handler and its routing
functions (`uploadLocalDocuments`, `uploadWebDocument`,
`uploadEmptyDocument`).
- **Service**: Implemented `UploadLocalDocuments`, `UploadWebDocument`,
and `UploadEmptyDocument` in `DocumentService`.
- **Refactoring**: Moved permission checking logic to a shared helper
for reuse in file and document services.
- **Tests**: Added comprehensive unit tests for the new handler and
service upload paths.

## Verification
Ran and passed the test suite for service and handler packages:
- `go test ./internal/service`
- `go test ./internal/handler`
2026-06-25 13:36:49 +08:00

43 lines
975 B
Go

package service
import (
"encoding/hex"
"path/filepath"
"regexp"
"strings"
"ragflow/internal/utility"
"github.com/zeebo/xxh3"
)
var (
presentationUploadPattern = regexp.MustCompile(`(?i)\.(ppt|pptx|pages)$`)
emailUploadPattern = regexp.MustCompile(`(?i)\.(msg|eml)$`)
)
// selectUploadParser mirrors Python FileService.get_parser.
func selectUploadParser(docType utility.FileType, filename, defaultParser string) string {
switch docType {
case utility.FileTypeVISUAL:
return "picture"
case utility.FileTypeAURAL:
return "audio"
}
base := filepath.Base(strings.TrimSpace(filename))
switch {
case presentationUploadPattern.MatchString(base):
return "presentation"
case emailUploadPattern.MatchString(base):
return "email"
default:
return defaultParser
}
}
// contentHashHex mirrors Python xxhash.xxh128(blob).hexdigest().
func contentHashHex(blob []byte) string {
sum := xxh3.Hash128(blob).Bytes()
return hex.EncodeToString(sum[:])
}