From 4538910b5283b64c07ccdc2c9246e69ebb46b51c Mon Sep 17 00:00:00 2001 From: chanx <1243304602@qq.com> Date: Fri, 10 Apr 2026 12:15:27 +0800 Subject: [PATCH] feat: Implement file-related functionality (#14011) ### What problem does this PR solve? feat: Implement file-related functionality - Implement file deletion API and business logic - Add context support for file deletion operations and prevent root folder deletion - Implement file move functionality - Add File Download API Endpoints and Utility Functions ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: Yingfeng --- internal/dao/document.go | 5 + internal/dao/file.go | 91 +++-- internal/dao/file2document.go | 12 + internal/handler/file.go | 183 ++++++++++ internal/router/router.go | 3 + internal/service/file.go | 549 ++++++++++++++++++++++++++++- internal/{util => utility}/file.go | 116 +++++- 7 files changed, 922 insertions(+), 37 deletions(-) rename internal/{util => utility}/file.go (50%) diff --git a/internal/dao/document.go b/internal/dao/document.go index 8c73f1074a..ddd13e35ad 100644 --- a/internal/dao/document.go +++ b/internal/dao/document.go @@ -62,6 +62,11 @@ func (dao *DocumentDAO) Update(document *entity.Document) error { return DB.Save(document).Error } +// UpdateByID updates document by ID with the given fields +func (dao *DocumentDAO) UpdateByID(id string, updates map[string]interface{}) error { + return DB.Model(&entity.Document{}).Where("id = ?", id).Updates(updates).Error +} + // Delete delete document func (dao *DocumentDAO) Delete(id string) error { return DB.Delete(&entity.Document{}, "id = ?", id).Error diff --git a/internal/dao/file.go b/internal/dao/file.go index de4fffb76a..347c04f6ea 100644 --- a/internal/dao/file.go +++ b/internal/dao/file.go @@ -221,6 +221,23 @@ func (dao *FileDAO) GetAllIDsByTenantID(tenantID string) ([]string, error) { return ids, err } +// GetByIDs gets files by multiple IDs +func (dao *FileDAO) GetByIDs(ids []string) ([]*entity.File, error) { + var files []*entity.File + if len(ids) == 0 { + return files, nil + } + err := DB.Where("id IN ?", ids).Find(&files).Error + return files, err +} + +// ListAllFilesByParentID lists all files by parent folder ID +func (dao *FileDAO) ListAllFilesByParentID(parentID string) ([]*entity.File, error) { + var files []*entity.File + err := DB.Where("parent_id = ? AND id != ?", parentID, parentID).Find(&files).Error + return files, err +} + // GetByParentIDAndName gets file by parent folder ID and name func (dao *FileDAO) GetByParentIDAndName(parentID, name string) (*entity.File, error) { var file entity.File @@ -291,10 +308,40 @@ func (dao *FileDAO) Query(name string, parentID string) []*entity.File { return files } -// UpdateByID updates file by ID -func (dao *FileDAO) UpdateByID(id string, updates map[string]interface{}) bool { - result := DB.Model(&entity.File{}).Where("id = ?", id).Updates(updates) - return result.RowsAffected > 0 +// UpdateByID updates file by ID with the given fields +func (dao *FileDAO) UpdateByID(id string, updates map[string]interface{}) error { + return DB.Model(&entity.File{}).Where("id = ?", id).Updates(updates).Error +} + +// Delete deletes a file by ID (hard delete) +func (dao *FileDAO) Delete(id string) error { + return DB.Unscoped().Where("id = ?", id).Delete(&entity.File{}).Error +} + +// GetDatasetIDByFileID gets dataset ID by file ID +func (dao *FileDAO) GetDatasetIDByFileID(fileID string) ([]string, error) { + var datasetIDs []string + rows, err := DB.Model(&entity.File{}). + Select("knowledgebase.id"). + Joins("JOIN file2document ON file2document.file_id = ?", fileID). + Joins("JOIN document ON document.id = file2document.document_id"). + Joins("JOIN knowledgebase ON knowledgebase.id = document.kb_id"). + Where("file.id = ?", fileID). + Rows() + if err != nil { + return nil, err + } + defer rows.Close() + + for rows.Next() { + var kbID string + if err := rows.Scan(&kbID); err != nil { + continue + } + datasetIDs = append(datasetIDs, kbID) + } + + return datasetIDs, nil } // generateUUID generates a UUID @@ -303,15 +350,15 @@ func generateUUID() string { return strings.ReplaceAll(id, "-", "") } -// KnowledgebaseFolderName is the folder name for knowledgebase -const KnowledgebaseFolderName = ".knowledgebase" +// DatasetFolderName is the folder name for dataset +const DatasetFolderName = ".knowledgebase" -// InitKnowledgebaseDocs initializes knowledgebase documents for tenant -// This matches Python's FileService.init_knowledgebase_docs method -func (dao *FileDAO) InitKnowledgebaseDocs(rootID, tenantID string, file2DocumentDAO *File2DocumentDAO) error { +// InitDatasetDocs initializes dataset documents for tenant +// This matches Python's FileService.init_dataset_docs method +func (dao *FileDAO) InitDatasetDocs(rootID, tenantID string, file2DocumentDAO *File2DocumentDAO) error { var count int64 err := DB.Model(&entity.File{}). - Where("name = ? AND parent_id = ?", KnowledgebaseFolderName, rootID). + Where("name = ? AND parent_id = ?", DatasetFolderName, rootID). Count(&count).Error if err != nil { return err @@ -321,41 +368,43 @@ func (dao *FileDAO) InitKnowledgebaseDocs(rootID, tenantID string, file2Document return nil } - kbFolder, err := dao.newAFileFromKB(tenantID, KnowledgebaseFolderName, rootID) + datasetFolder, err := dao.newAFileFromDataset(tenantID, DatasetFolderName, rootID) if err != nil { return err } - var knowledgebases []entity.Knowledgebase + var datasets []entity.Knowledgebase err = DB.Select("id", "name"). Where("tenant_id = ?", tenantID). - Find(&knowledgebases).Error + Find(&datasets).Error if err != nil { return err } - for _, kb := range knowledgebases { - kbFolderForKB, err := dao.newAFileFromKB(tenantID, kb.Name, kbFolder.ID) + for _, ds := range datasets { + datasetFolderForDataset, err := dao.newAFileFromDataset(tenantID, ds.Name, datasetFolder.ID) if err != nil { continue } var documents []entity.Document - err = DB.Where("kb_id = ?", kb.ID).Find(&documents).Error + err = DB.Where("kb_id = ?", ds.ID).Find(&documents).Error if err != nil { continue } for _, doc := range documents { - dao.addFileFromKB(&doc, kbFolderForKB.ID, tenantID, file2DocumentDAO) + if err := dao.addFileFromKB(&doc, datasetFolderForDataset.ID, tenantID, file2DocumentDAO); err != nil { + return err + } } } return nil } -// newAFileFromKB creates a new file from knowledgebase -func (dao *FileDAO) newAFileFromKB(tenantID, name, parentID string) (*entity.File, error) { +// newAFileFromDataset creates a new file from knowledgebase +func (dao *FileDAO) newAFileFromDataset(tenantID, name, parentID string) (*entity.File, error) { var existingFiles []*entity.File err := DB.Where("tenant_id = ? AND parent_id = ? AND name = ?", tenantID, parentID, name).Find(&existingFiles).Error if err != nil { @@ -385,7 +434,7 @@ func (dao *FileDAO) newAFileFromKB(tenantID, name, parentID string) (*entity.Fil } // addFileFromKB adds a file record from knowledgebase document -func (dao *FileDAO) addFileFromKB(doc *entity.Document, kbFolderID, tenantID string, file2DocumentDAO *File2DocumentDAO) error { +func (dao *FileDAO) addFileFromKB(doc *entity.Document, datasetFolderID, tenantID string, file2DocumentDAO *File2DocumentDAO) error { var f2dCount int64 err := DB.Model(&entity.File2Document{}). Where("document_id = ?", doc.ID). @@ -411,7 +460,7 @@ func (dao *FileDAO) addFileFromKB(doc *entity.Document, kbFolderID, tenantID str fileID := generateUUID() file := &entity.File{ ID: fileID, - ParentID: kbFolderID, + ParentID: datasetFolderID, TenantID: tenantID, CreatedBy: tenantID, Name: docName, diff --git a/internal/dao/file2document.go b/internal/dao/file2document.go index 5252e7826d..762165f620 100644 --- a/internal/dao/file2document.go +++ b/internal/dao/file2document.go @@ -58,3 +58,15 @@ func (dao *File2DocumentDAO) GetKBInfoByFileID(fileID string) ([]map[string]inte return results, nil } + +// GetByFileID gets file2document mappings by file ID +func (dao *File2DocumentDAO) GetByFileID(fileID string) ([]*entity.File2Document, error) { + var mappings []*entity.File2Document + err := DB.Where("file_id = ?", fileID).Find(&mappings).Error + return mappings, err +} + +// DeleteByFileID deletes file2document mappings by file ID +func (dao *File2DocumentDAO) DeleteByFileID(fileID string) error { + return DB.Unscoped().Where("file_id = ?", fileID).Delete(&entity.File2Document{}).Error +} diff --git a/internal/handler/file.go b/internal/handler/file.go index cc31a1db39..37beaa2055 100644 --- a/internal/handler/file.go +++ b/internal/handler/file.go @@ -18,7 +18,10 @@ package handler import ( "net/http" + "net/url" "ragflow/internal/common" + "ragflow/internal/storage" + "ragflow/internal/utility" "strconv" "strings" @@ -329,3 +332,183 @@ func (h *FileHandler) UploadFile(c *gin.Context) { jsonError(c, common.CodeBadRequest, "Unsupported content type") return } + +type DeleteFileRequest struct { + IDs []string `json:"ids" binding:"required,min=1"` +} + +// DeleteFiles deletes files +// @Summary Delete Files +// @Description Delete files by IDs +// @Tags file +// @Accept json +// @Produce json +// @Param ids body DeleteFileRequest true "file IDs to delete" +// @Success 200 {object} map[string]interface{} +// @Router /api/v1/files [delete] +func (h *FileHandler) DeleteFiles(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + var req DeleteFileRequest + if err := c.ShouldBindJSON(&req); err != nil { + jsonError(c, common.CodeBadRequest, err.Error()) + return + } + + success, message := h.fileService.DeleteFiles(c.Request.Context(), user.ID, req.IDs) + if !success { + jsonError(c, common.CodeBadRequest, message) + return + } + + c.JSON(http.StatusOK, gin.H{ + "code": common.CodeSuccess, + "data": true, + "message": common.CodeSuccess.Message(), + }) +} + +// MoveFileRequest represents the request body for move files operation +type MoveFileRequest struct { + SrcFileIDs []string `json:"src_file_ids" binding:"required,min=1"` + DestFileID string `json:"dest_file_id"` + NewName string `json:"new_name" binding:"max=255"` +} + +// MoveFiles moves and/or renames files +// @Summary Move Files +// @Description Move and/or rename files. Follows Linux mv semantics: +// - dest_file_id only: move files to a new folder (names unchanged) +// - new_name only: rename a single file in place (no storage operation) +// - both: move and rename simultaneously +// @Tags file +// @Accept json +// @Produce json +// @Param body body MoveFileRequest true "Move file request" +// @Success 200 {object} map[string]interface{} +// @Router /api/v1/files/move [post] +func (h *FileHandler) MoveFiles(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + var req MoveFileRequest + if err := c.ShouldBindJSON(&req); err != nil { + jsonError(c, common.CodeBadRequest, err.Error()) + return + } + + // Validate: at least one of dest_file_id or new_name must be provided + if req.DestFileID == "" && req.NewName == "" { + jsonError(c, common.CodeParamError, "At least one of dest_file_id or new_name must be provided") + return + } + + // Validate: new_name can only be used with a single file + if req.NewName != "" && len(req.SrcFileIDs) > 1 { + jsonError(c, common.CodeParamError, "new_name can only be used with a single file") + return + } + + success, message := h.fileService.MoveFiles(user.ID, req.SrcFileIDs, req.DestFileID, req.NewName) + if !success { + jsonError(c, common.CodeBadRequest, message) + return + } + + c.JSON(http.StatusOK, gin.H{ + "code": common.CodeSuccess, + "data": true, + "message": common.CodeSuccess.Message(), + }) +} + +// Download handles file download +// @Summary Download File +// @Description Download a file by ID +// @Tags file +// @Accept json +// @Produce octet-stream +// @Param file_id path string true "file ID" +// @Success 200 {file} binary "File stream" +// @Router /api/v1/files/{file_id} [get] +func (h *FileHandler) Download(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + userID := user.ID + + fileID := c.Param("id") + if fileID == "" { + jsonError(c, common.CodeParamError, "id is required") + return + } + + // Get file metadata and check permission + file, err := h.fileService.GetFileContent(userID, fileID) + if err != nil { + jsonError(c, common.CodeUnauthorized, err.Error()) + return + } + + // Get storage + storageImpl := storage.GetStorageFactory().GetStorage() + if storageImpl == nil { + jsonError(c, common.CodeServerError, "storage not initialized") + return + } + + // Try to get file blob from primary location (parent_id, location) + var blob []byte + var getErr error + if file.Location != nil && *file.Location != "" { + blob, getErr = storageImpl.Get(file.ParentID, *file.Location) + } + + // If blob is empty, try fallback via file2document + if len(blob) == 0 { + storageAddr, err := h.fileService.GetStorageAddress(fileID) + if err != nil { + jsonError(c, common.CodeServerError, "Failed to get file storage address: "+err.Error()) + return + } + blob, getErr = storageImpl.Get(storageAddr.Bucket, storageAddr.Name) + } + + // Check if we got valid data + if len(blob) == 0 { + errMsg := "Failed to retrieve file blob" + if getErr != nil { + errMsg += ": " + getErr.Error() + } + jsonError(c, common.CodeServerError, errMsg) + return + } + + // Extract file extension + ext := utility.GetFileExtension(file.Name) + + // Determine content type based on extension and file type + contentType := utility.GetContentType(ext, file.Type) + + // Set response headers + if contentType != "" { + c.Header("Content-Type", contentType) + } + if utility.ShouldForceAttachment(ext, contentType) { + c.Header("X-Content-Type-Options", "nosniff") + encodedName := url.QueryEscape(file.Name) + c.Header("Content-Disposition", "attachment; filename*=UTF-8''"+encodedName) + } + + // Send file data + c.Data(http.StatusOK, contentType, blob) +} diff --git a/internal/router/router.go b/internal/router/router.go index b80acf88cc..972bdd83e4 100644 --- a/internal/router/router.go +++ b/internal/router/router.go @@ -204,6 +204,9 @@ func (r *Router) Setup(engine *gin.Engine) { { file.POST("", r.fileHandler.UploadFile) file.GET("", r.fileHandler.ListFiles) + file.DELETE("", r.fileHandler.DeleteFiles) + file.POST("/move", r.fileHandler.MoveFiles) + file.GET("/:id", r.fileHandler.Download) } // provider pool route group diff --git a/internal/service/file.go b/internal/service/file.go index f47af98137..be8ee95039 100644 --- a/internal/service/file.go +++ b/internal/service/file.go @@ -17,15 +17,19 @@ package service import ( + "context" "fmt" "mime/multipart" "os" "path/filepath" "ragflow/internal/dao" + "ragflow/internal/engine" "ragflow/internal/entity" + "ragflow/internal/logger" "ragflow/internal/storage" - "ragflow/internal/util" + "ragflow/internal/utility" "strings" + "time" "github.com/google/uuid" ) @@ -69,9 +73,9 @@ func (s *FileService) GetRootFolder(tenantID string) (map[string]interface{}, er } // ListFiles lists files by parent folder ID (matching Python /files endpoint) -// This method includes init_knowledgebase_docs initialization when parent_id is empty +// This method includes init_dataset_docs initialization when parent_id is empty func (s *FileService) ListFiles(tenantID, pfID string, page, pageSize int, orderby string, desc bool, keywords string) (*ListFilesResponse, error) { - // If pfID is empty, get root folder and initialize knowledgebase docs + // If pfID is empty, get root folder and initialize dataset docs if pfID == "" { rootFolder, err := s.fileDAO.GetRootFolder(tenantID) if err != nil { @@ -79,9 +83,9 @@ func (s *FileService) ListFiles(tenantID, pfID string, page, pageSize int, order } pfID = rootFolder.ID - // Initialize knowledgebase docs (matching Python init_knowledgebase_docs logic) - if err := s.initKnowledgebaseDocs(pfID, tenantID); err != nil { - return nil, fmt.Errorf("failed to initialize knowledgebase docs: %w", err) + // Initialize dataset docs (matching Python init_knowledgebase_docs logic) + if err := s.initDatasetDocs(pfID, tenantID); err != nil { + return nil, fmt.Errorf("failed to initialize dataset docs: %w", err) } } @@ -137,17 +141,17 @@ func (s *FileService) ListFiles(tenantID, pfID string, page, pageSize int, order }, nil } -// initKnowledgebaseDocs initializes knowledgebase documents for tenant -// This matches Python's FileService.init_knowledgebase_docs method -func (s *FileService) initKnowledgebaseDocs(rootID, tenantID string) error { - return s.fileDAO.InitKnowledgebaseDocs(rootID, tenantID, s.file2DocumentDAO) +// initDatasetDocs initializes dataset documents for tenant +// This matches Python's FileService.init_dataset_docs method +func (s *FileService) initDatasetDocs(rootID, tenantID string) error { + return s.fileDAO.InitDatasetDocs(rootID, tenantID, s.file2DocumentDAO) } -// KnowledgebaseFolderName is the folder name for knowledgebase -const KnowledgebaseFolderName = ".knowledgebase" +// DatasetFolderName is the folder name for dataset +const DatasetFolderName = ".knowledgebase" -// FileSourceKnowledgebase represents knowledgebase as file source -const FileSourceKnowledgebase = "knowledgebase" +// FileSourceDataset represents dataset as file source +const FileSourceDataset = "knowledgebase" // toFileResponse converts file model to response format func (s *FileService) toFileResponse(file *entity.File) map[string]interface{} { @@ -299,7 +303,7 @@ func (s *FileService) UploadFile(tenantID, parentID string, files []*multipart.F return nil, fmt.Errorf("No file selected!") } - fileType := util.FilenameType(filename) + fileType := utility.FilenameType(filename) fileObjNames := s.parseFilePath(filename) @@ -459,3 +463,518 @@ func (s *FileService) CreateFolder(tenantID, name, parentID, fileType string) (m return s.toFileResponse(folder), nil } + +// DeleteFiles deletes files by IDs +// Returns (success, message) where success is true if all files were deleted +func (s *FileService) DeleteFiles(ctx context.Context, uid string, fileIDs []string) (bool, string) { + for _, fileID := range fileIDs { + // 1. Get file + file, err := s.fileDAO.GetByID(fileID) + if err != nil || file == nil { + return false, "File or Folder not found!" + } + + // 2. Check tenant_id + if file.TenantID == "" { + return false, "Tenant not found!" + } + + // Block root-folder deletion (root folders have parent_id == id) + if file.ParentID == file.ID { + return false, "Root folder cannot be deleted." + } + + // 3. Permission check + if !s.checkFileTeamPermission(file, uid) { + return false, "No authorization." + } + + // 4. Skip dataset source files + if file.SourceType == FileSourceDataset { + continue + } + + // 5. Delete based on type + if file.Type == FileTypeFolder { + if err := s.deleteFolderRecursive(ctx, file, uid); err != nil { + return false, fmt.Sprintf("Failed to delete folder: %v", err) + } + } else { + if err := s.deleteSingleFile(ctx, file); err != nil { + return false, fmt.Sprintf("Failed to delete file: %v", err) + } + } + } + + return true, "" +} + +// checkFileTeamPermission checks if user has permission to access the file +// Matches Python's check_file_team_permission function +func (s *FileService) checkFileTeamPermission(file *entity.File, uid string) bool { + // File's tenant directly authorized + if file.TenantID == uid { + return true + } + + // Check KB permissions + datasetIDs, err := s.fileDAO.GetDatasetIDByFileID(file.ID) + if err != nil || len(datasetIDs) == 0 { + return false + } + + kbDAO := dao.NewKnowledgebaseDAO() + userTenantDAO := dao.NewUserTenantDAO() + + for _, datasetID := range datasetIDs { + ds, err := kbDAO.GetByID(datasetID) + if err != nil || ds == nil { + continue + } + + // Check KB tenant permission + if s.checkDatasetTeamPermission(ds, uid, userTenantDAO) { + return true + } + } + + return false +} + +// checkDatasetTeamPermission checks if user has permission to access the dataset +// Matches Python's check_kb_team_permission function +func (s *FileService) checkDatasetTeamPermission(ds *entity.Knowledgebase, uid string, userTenantDAO *dao.UserTenantDAO) bool { + // KB's tenant directly authorized + if ds.TenantID == uid { + return true + } + + // Check permission type + permission := ds.Permission + if permission != string(entity.TenantPermissionTeam) { + return false + } + + // Check if user joined the tenant + joinedTenantIDs, err := userTenantDAO.GetTenantIDsByUserID(uid) + if err != nil || len(joinedTenantIDs) == 0 { + return false + } + + for _, tenantID := range joinedTenantIDs { + if tenantID == ds.TenantID { + return true + } + } + + return false +} + +// deleteSingleFile deletes a single file (not folder) +// Matches Python's _delete_single_file function +func (s *FileService) deleteSingleFile(ctx context.Context, file *entity.File) error { + // 1. Delete storage object + if file.Location != nil && *file.Location != "" { + storageImpl := storage.GetStorageFactory().GetStorage() + if storageImpl != nil { + if err := storageImpl.Remove(file.ParentID, *file.Location); err != nil { + logger.Logger.Error(fmt.Sprintf("Fail to remove object: %s/%s, error: %v", file.ParentID, *file.Location, err)) + } + } + } + + // 2. Handle associated documents + informs, err := s.file2DocumentDAO.GetByFileID(file.ID) + if err != nil { + return fmt.Errorf("failed to get file2document mappings: %w", err) + } + if len(informs) > 0 { + documentDAO := dao.NewDocumentDAO() + datasetDAO := dao.NewKnowledgebaseDAO() + + for _, inform := range informs { + if inform.DocumentID == nil { + continue + } + docID := *inform.DocumentID + + doc, err := documentDAO.GetByID(docID) + if err == nil && doc != nil { + // Get tenant ID from KB + ds, err := datasetDAO.GetByID(doc.KbID) + if err == nil && ds != nil { + tenantID := ds.TenantID + if tenantID != "" { + // Delete from document engine + if err := s.deleteDocumentFromEngine(ctx, doc, tenantID); err != nil { + logger.Logger.Error(fmt.Sprintf("Fail to delete document from engine: %s, error: %v", doc.ID, err)) + } + } + } + + // Delete document record + if err := documentDAO.Delete(docID); err != nil { + logger.Logger.Error(fmt.Sprintf("Fail to delete document: %s, error: %v", docID, err)) + } + } + + } + + // Delete file2document mapping (outside the loop, called once - matching Python behavior) + if err := s.file2DocumentDAO.DeleteByFileID(file.ID); err != nil { + return fmt.Errorf("failed to delete file2document mapping: %w", err) + } + } + + // 3. Delete file record + if err := s.fileDAO.Delete(file.ID); err != nil { + return err + } + + return nil +} + +// deleteDocumentFromEngine deletes a document from the document engine +func (s *FileService) deleteDocumentFromEngine(ctx context.Context, doc *entity.Document, tenantID string) error { + // Get document engine + docEngine := engine.Get() + if docEngine == nil { + return nil + } + + // Build index name: ragflow__ + indexName := fmt.Sprintf("ragflow_%s_%s", tenantID, doc.KbID) + + // Delete document from engine with timeout + reqCtx, cancel := context.WithTimeout(ctx, 300*time.Second) + defer cancel() + condition := map[string]interface{}{"doc_id": doc.ID} + if _, err := docEngine.Delete(reqCtx, condition, indexName, doc.KbID); err != nil { + return fmt.Errorf("delete document from engine: %w", err) + } + return nil +} + +// deleteFolderRecursive recursively deletes a folder and its contents +// Matches Python's _delete_folder_recursive function +func (s *FileService) deleteFolderRecursive(ctx context.Context, folder *entity.File, uid string) error { + // Get all sub-files + subFiles, err := s.fileDAO.ListByParentID(folder.ID) + if err != nil { + return err + } + + for _, subFile := range subFiles { + if subFile.Type == FileTypeFolder { + // Recursively delete subfolder + if err := s.deleteFolderRecursive(ctx, subFile, uid); err != nil { + return err + } + } else { + // Delete single file + if err := s.deleteSingleFile(ctx, subFile); err != nil { + return err + } + } + } + + // Delete the folder itself + if err := s.fileDAO.Delete(folder.ID); err != nil { + return err + } + + return nil +} + +// MoveFileReq represents the request body for move files operation +type MoveFileReq struct { + SrcFileIDs []string `json:"src_file_ids" binding:"required,min=1"` + DestFileID string `json:"dest_file_id"` + NewName string `json:"new_name"` +} + +// MoveFiles moves and/or renames files +// Follows Linux mv semantics: +// - new_name only: rename in place (no storage operation) +// - dest_file_id only: move to new folder (keep names) +// - both: move and rename simultaneously +func (s *FileService) MoveFiles(uid string, srcFileIDs []string, destFileID string, newName string) (bool, string) { + // 1. Get all source files + files, err := s.fileDAO.GetByIDs(srcFileIDs) + if err != nil || len(files) == 0 { + return false, "Source files not found!" + } + + // Create a map for quick lookup + filesMap := make(map[string]*entity.File) + for _, f := range files { + filesMap[f.ID] = f + } + + // 2. Validate all source files + for _, fileID := range srcFileIDs { + file, ok := filesMap[fileID] + if !ok { + return false, "File or folder not found!" + } + if file.TenantID == "" { + return false, "Tenant not found!" + } + // 3. Permission check + if !s.checkFileTeamPermission(file, uid) { + return false, "No authorization." + } + } + + // 4. Validate destination folder if provided + var destFolder *entity.File + if destFileID != "" { + destFolder, err = s.fileDAO.GetByID(destFileID) + if err != nil || destFolder == nil { + return false, "Parent folder not found!" + } + // Check destination folder permission + if !s.checkFileTeamPermission(destFolder, uid) { + return false, "No authorization to write to destination folder." + } + } + + // 5. Validate new_name if provided + if newName != "" { + if len(srcFileIDs) > 1 { + return false, "new_name can only be used with a single file" + } + + file := filesMap[srcFileIDs[0]] + // Check extension for non-folder files + if file.Type != FileTypeFolder { + oldExt := utility.GetFileExtension(file.Name) + newExt := utility.GetFileExtension(newName) + if oldExt != newExt { + return false, "The extension of file can't be changed" + } + } + + // Check for duplicate names in target folder + targetParentID := file.ParentID + if destFolder != nil { + targetParentID = destFolder.ID + } + existingFiles := s.fileDAO.Query(newName, targetParentID) + for _, f := range existingFiles { + if f.Name == newName { + return false, "Duplicated file name in the same folder." + } + } + } else if destFolder != nil { + // Plain move (no rename): check for duplicate names in destination folder + for _, file := range files { + existingFiles := s.fileDAO.Query(file.Name, destFolder.ID) + for _, f := range existingFiles { + // Ignore the source file itself + if f.ID != file.ID { + return false, "Duplicated file name in the same folder." + } + } + } + } + + // 6. Perform the move operation + if destFolder != nil { + // Move to destination folder + for _, file := range files { + if err := s.moveEntryRecursive(file, destFolder, newName); err != nil { + return false, err.Error() + } + } + } else { + // Pure rename: no storage operation needed + if newName == "" { + return false, "new_name is required for rename" + } + if len(srcFileIDs) == 0 { + return false, "Source files not found!" + } + file := filesMap[srcFileIDs[0]] + if err := s.fileDAO.UpdateByID(file.ID, map[string]interface{}{"name": newName}); err != nil { + return false, "Database error (File rename)!" + } + + // Update associated document name if exists + informs, err := s.file2DocumentDAO.GetByFileID(file.ID) + if err == nil && len(informs) > 0 && informs[0].DocumentID != nil { + docID := *informs[0].DocumentID + documentDAO := dao.NewDocumentDAO() + if err := documentDAO.UpdateByID(docID, map[string]interface{}{"name": newName}); err != nil { + return false, "Database error (Document rename)!" + } + } + } + + return true, "" +} + +// moveEntryRecursive recursively moves a file or folder entry +func (s *FileService) moveEntryRecursive(sourceFile *entity.File, destFolder *entity.File, overrideName string) error { + effectiveName := overrideName + if effectiveName == "" { + effectiveName = sourceFile.Name + } + + if sourceFile.Type == FileTypeFolder { + // Handle folder move + existingFolders := s.fileDAO.Query(effectiveName, destFolder.ID) + var newFolder *entity.File + if len(existingFolders) > 0 { + // Prevent moving a folder into itself (self-target merge) + if existingFolders[0].ID == sourceFile.ID { + return fmt.Errorf("cannot move folder into itself") + } + newFolder = existingFolders[0] + } else { + // Create new folder + var err error + newFolder, err = s.fileDAO.CreateFolder(destFolder.ID, sourceFile.TenantID, effectiveName, FileTypeFolder) + if err != nil { + return fmt.Errorf("failed to create destination folder: %w", err) + } + } + + // Recursively move sub-files + subFiles, err := s.fileDAO.ListAllFilesByParentID(sourceFile.ID) + if err != nil { + return err + } + for _, subFile := range subFiles { + if err := s.moveEntryRecursive(subFile, newFolder, ""); err != nil { + return err + } + } + + // Delete the source folder + return s.fileDAO.Delete(sourceFile.ID) + } + + // Handle non-folder file move + needStorageMove := destFolder.ID != sourceFile.ParentID + updates := map[string]interface{}{} + + if needStorageMove { + // Get storage + storageImpl := storage.GetStorageFactory().GetStorage() + if storageImpl == nil { + return fmt.Errorf("storage not initialized") + } + + // Calculate new location + newLocation := effectiveName + for storageImpl.ObjExist(destFolder.ID, newLocation) { + newLocation += "_" + } + + // Perform storage move (copy + delete) + if sourceFile.Location == nil || *sourceFile.Location == "" { + return fmt.Errorf("file location is empty") + } + + if !storageImpl.Move(sourceFile.ParentID, *sourceFile.Location, destFolder.ID, newLocation) { + return fmt.Errorf("move file failed at storage layer") + } + + updates["parent_id"] = destFolder.ID + updates["location"] = newLocation + } + + if overrideName != "" { + updates["name"] = overrideName + } + + if len(updates) > 0 { + if err := s.fileDAO.UpdateByID(sourceFile.ID, updates); err != nil { + return fmt.Errorf("database error (File update): %w", err) + } + } + + // Update associated document name if renamed + if overrideName != "" { + informs, err := s.file2DocumentDAO.GetByFileID(sourceFile.ID) + if err == nil && len(informs) > 0 && informs[0].DocumentID != nil { + docID := *informs[0].DocumentID + documentDAO := dao.NewDocumentDAO() + if err := documentDAO.UpdateByID(docID, map[string]interface{}{"name": overrideName}); err != nil { + return fmt.Errorf("database error (Document rename): %w", err) + } + } + } + + return nil +} + +// GetFileContent gets file metadata and checks permission for download +// Matches Python's file_api_service.get_file_content function +func (s *FileService) GetFileContent(uid, fileID string) (*entity.File, error) { + file, err := s.fileDAO.GetByID(fileID) + if err != nil || file == nil { + return nil, fmt.Errorf("Document not found!") + } + if !s.checkFileTeamPermission(file, uid) { + return nil, fmt.Errorf("No authorization.") + } + return file, nil +} + +// StorageAddress represents bucket and object name for storage +type StorageAddress struct { + Bucket string + Name string +} + +// GetStorageAddress gets storage address for a file (fallback for when direct blob is empty) +// Matches Python's File2DocumentService.get_storage_address function +func (s *FileService) GetStorageAddress(fileID string) (*StorageAddress, error) { + // Get file2document mapping + f2d, err := s.file2DocumentDAO.GetByFileID(fileID) + if err != nil || len(f2d) == 0 { + return nil, fmt.Errorf("file2document mapping not found") + } + + // Get the file + if f2d[0].FileID == nil { + return nil, fmt.Errorf("file_id is nil in file2document mapping") + } + file, err := s.fileDAO.GetByID(*f2d[0].FileID) + if err != nil || file == nil { + return nil, fmt.Errorf("file not found") + } + + // If source_type is empty or local, return file's parent_id and location + if file.SourceType == "" || entity.FileSource(file.SourceType) == entity.FileSourceLocal { + if file.Location == nil || *file.Location == "" { + return nil, fmt.Errorf("file location is empty") + } + return &StorageAddress{ + Bucket: file.ParentID, + Name: *file.Location, + }, nil + } + + // Otherwise, use document's kb_id and location + if f2d[0].DocumentID == nil { + return nil, fmt.Errorf("document_id is required") + } + + documentDAO := dao.NewDocumentDAO() + doc, err := documentDAO.GetByID(*f2d[0].DocumentID) + if err != nil || doc == nil { + return nil, fmt.Errorf("document not found") + } + + if doc.Location == nil || *doc.Location == "" { + return nil, fmt.Errorf("document location is empty") + } + + return &StorageAddress{ + Bucket: doc.KbID, + Name: *doc.Location, + }, nil +} diff --git a/internal/util/file.go b/internal/utility/file.go similarity index 50% rename from internal/util/file.go rename to internal/utility/file.go index 2e1628b54b..898ebae435 100644 --- a/internal/util/file.go +++ b/internal/utility/file.go @@ -14,7 +14,7 @@ // limitations under the License. // -package util +package utility import ( "path/filepath" @@ -132,3 +132,117 @@ func GetFileExtension(filename string) string { } return strings.ToLower(ext) } + +// CONTENT_TYPE_MAP maps file extensions to MIME content types +var CONTENT_TYPE_MAP = map[string]string{ + // Office + "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "doc": "application/msword", + "pdf": "application/pdf", + "csv": "text/csv", + "xls": "application/vnd.ms-excel", + "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + // Text/code + "txt": "text/plain", + "py": "text/plain", + "js": "text/plain", + "java": "text/plain", + "c": "text/plain", + "cpp": "text/plain", + "h": "text/plain", + "php": "text/plain", + "go": "text/plain", + "ts": "text/plain", + "sh": "text/plain", + "cs": "text/plain", + "kt": "text/plain", + "sql": "text/plain", + // Web + "md": "text/markdown", + "markdown": "text/markdown", + "mdx": "text/markdown", + "htm": "text/html", + "html": "text/html", + "json": "application/json", + // Image formats + "png": "image/png", + "jpg": "image/jpeg", + "jpeg": "image/jpeg", + "gif": "image/gif", + "bmp": "image/bmp", + "tiff": "image/tiff", + "tif": "image/tiff", + "webp": "image/webp", + "svg": "image/svg+xml", + "ico": "image/x-icon", + "avif": "image/avif", + "heic": "image/heic", + // PPTX + "ppt": "application/vnd.ms-powerpoint", + "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation", + // Video formats + "mp4": "video/mp4", + "mov": "video/quicktime", + "avi": "video/x-msvideo", + "mpg": "video/mpeg", + "mpeg": "video/mpeg", + "mkv": "video/x-matroska", + "wmv": "video/x-ms-wmv", + "webm": "video/webm", + "rm": "application/vnd.rn-realmedia", + "rmvb": "application/vnd.rn-realmedia", + "dat": "video/mpeg", + "asx": "video/x-ms-asf", + "wvx": "video/x-ms-wvx", + "mpe": "video/mpeg", + "mpa": "video/mpeg", +} + +// FORCE_ATTACHMENT_EXTENSIONS are extensions that should always be downloaded as attachments +var FORCE_ATTACHMENT_EXTENSIONS = map[string]bool{ + "htm": true, + "html": true, + "shtml": true, + "xht": true, + "xhtml": true, + "xml": true, + "mhtml": true, + "svg": true, +} + +// FORCE_ATTACHMENT_CONTENT_TYPES are content types that should always be downloaded as attachments +var FORCE_ATTACHMENT_CONTENT_TYPES = map[string]bool{ + "text/html": true, + "image/svg+xml": true, + "application/xhtml+xml": true, + "text/xml": true, + "application/xml": true, + "multipart/related": true, +} + +// ShouldForceAttachment determines if the file should be forced as attachment +func ShouldForceAttachment(ext string, contentType string) bool { + normalizedExt := strings.ToLower(strings.TrimPrefix(ext, ".")) + if normalizedExt != "" && FORCE_ATTACHMENT_EXTENSIONS[normalizedExt] { + return true + } + normalizedType := strings.ToLower(contentType) + return FORCE_ATTACHMENT_CONTENT_TYPES[normalizedType] +} + +// GetContentType determines the content type based on extension and file type +// fallbackPrefix is "image" for visual files, "application" for others +func GetContentType(ext string, fileType string) string { + if ext == "" { + return "" + } + normalizedExt := strings.ToLower(strings.TrimPrefix(ext, ".")) + if contentType, ok := CONTENT_TYPE_MAP[normalizedExt]; ok { + return contentType + } + fallbackPrefix := "application" + if fileType == FileTypeVISUAL { + fallbackPrefix = "image" + } + return fallbackPrefix + "/" + normalizedExt +}