diff --git a/build.sh b/build.sh index f4ed0bdf8c..8303daffcf 100755 --- a/build.sh +++ b/build.sh @@ -18,6 +18,10 @@ RAGFLOW_SERVER_BINARY="$PROJECT_ROOT/bin/server_main" ADMIN_SERVER_BINARY="$PROJECT_ROOT/bin/admin_server" RAGFLOW_CLI_BINARY="$PROJECT_ROOT/bin/ragflow_cli" +# office_oxide native library settings +OFFICE_OXIDE_PREFIX="${HOME}/.office_oxide" +OFFICE_OXIDE_VERSION="0.1.2" + echo -e "${GREEN}=== RAGFlow Go Server Build Script ===${NC}" # Function to print section headers @@ -54,6 +58,79 @@ check_go_deps() { echo "✓ Required tools are available" } +# Download and extract a tar.gz from a URL to a target directory +_download_and_extract() { + local url="$1" target_dir="$2" + echo "Downloading ${url} ..." + local tmpfile + tmpfile="$(mktemp)" + if command -v curl >/dev/null 2>&1; then + curl -fsSL "$url" -o "$tmpfile" + elif command -v wget >/dev/null 2>&1; then + wget -q "$url" -O "$tmpfile" + else + echo -e "${RED}Error: need curl or wget to download office_oxide${NC}" + exit 1 + fi + tar xzf "$tmpfile" -C "$target_dir" + rm -f "$tmpfile" +} + +# Check / install office_oxide native library (Rust → C FFI library) +check_office_oxide_deps() { + print_section "Checking office_oxide native library" + + local lib_file header_path + case "$(uname -s)" in + Linux) lib_file="liboffice_oxide.so" ;; + Darwin) lib_file="liboffice_oxide.dylib" ;; + *) echo -e "${RED}Unsupported OS for office_oxide${NC}"; exit 1 ;; + esac + + local lib_path="${OFFICE_OXIDE_PREFIX}/lib/${lib_file}" + local header_path="${OFFICE_OXIDE_PREFIX}/include/office_oxide_c/office_oxide.h" + + if [ -f "$lib_path" ] && [ -f "$header_path" ]; then + echo "✓ office_oxide native library found at ${OFFICE_OXIDE_PREFIX}" + return 0 + fi + + echo "office_oxide native library not found. Installing..." + + # Map platform to the release asset name. Note: the GitHub release archives + # omit the version number from the native-* asset filenames. + local asset_name + case "$(uname -s)" in + Linux) + case "$(uname -m)" in + x86_64) asset_name="native-linux-x86_64" ;; + aarch64|arm64) asset_name="native-linux-aarch64" ;; + *) echo -e "${RED}Unsupported arch: $(uname -m)${NC}"; exit 1 ;; + esac + ;; + Darwin) + case "$(uname -m)" in + x86_64) asset_name="native-macos-x86_64" ;; + aarch64|arm64) asset_name="native-macos-aarch64" ;; + *) echo -e "${RED}Unsupported arch: $(uname -m)${NC}"; exit 1 ;; + esac + ;; + esac + + local release_url="https://github.com/yfedoseev/office_oxide/releases/download/v${OFFICE_OXIDE_VERSION}/${asset_name}.tar.gz" + + mkdir -p "${OFFICE_OXIDE_PREFIX}" + _download_and_extract "$release_url" "${OFFICE_OXIDE_PREFIX}" + + if [ ! -f "$lib_path" ]; then + echo -e "${RED}Error: Failed to install office_oxide native library (missing ${lib_path})${NC}" + echo " Try: curl -fsSL ${release_url} | tar xzf - -C ${OFFICE_OXIDE_PREFIX}" + exit 1 + fi + + echo -e "${GREEN}✓ office_oxide native library installed${NC}" +} + # Build C++ static library build_cpp() { print_section "Building C++ static library" @@ -103,11 +180,26 @@ build_go() { echo -e "${YELLOW}Warning: libpcre2-8 not found. You may need to install libpcre2-dev:${NC}" sudo apt -y install libpcre2-dev fi - + + # Check / install office_oxide native library + check_office_oxide_deps + + # Export CGO flags so go build can find office_oxide headers and library + export CGO_CFLAGS="-I${OFFICE_OXIDE_PREFIX}/include/office_oxide_c${CGO_CFLAGS:+ $CGO_CFLAGS}" + echo "Exporting CGO_CFLAGS: $CGO_CFLAGS" + export CGO_LDFLAGS="-L${OFFICE_OXIDE_PREFIX}/lib -loffice_oxide -Wl,-rpath,${OFFICE_OXIDE_PREFIX}/lib${CGO_LDFLAGS:+ $CGO_LDFLAGS}" + echo "Exporting CGO_LDFLAGS: $CGO_LDFLAGS" + echo "Building RAGFlow binary: $RAGFLOW_SERVER_BINARY, $ADMIN_SERVER_BINARY, and $RAGFLOW_CLI_BINARY" - GOPROXY=${GOPROXY:-https://goproxy.cn,https://proxy.golang.org,direct} CGO_ENABLED=1 go build -o "$RAGFLOW_SERVER_BINARY" cmd/server_main.go - GOPROXY=${GOPROXY:-https://goproxy.cn,https://proxy.golang.org,direct} CGO_ENABLED=1 go build -o "$ADMIN_SERVER_BINARY" cmd/admin_server.go - GOPROXY=${GOPROXY:-https://goproxy.cn,https://proxy.golang.org,direct} CGO_ENABLED=1 go build -o "$RAGFLOW_CLI_BINARY" cmd/ragflow_cli.go + GOPROXY=${GOPROXY:-https://goproxy.cn,https://proxy.golang.org,direct} CGO_ENABLED=1 \ + CGO_CFLAGS="$CGO_CFLAGS" CGO_LDFLAGS="$CGO_LDFLAGS" \ + go build -o "$RAGFLOW_SERVER_BINARY" cmd/server_main.go + GOPROXY=${GOPROXY:-https://goproxy.cn,https://proxy.golang.org,direct} CGO_ENABLED=1 \ + CGO_CFLAGS="$CGO_CFLAGS" CGO_LDFLAGS="$CGO_LDFLAGS" \ + go build -o "$ADMIN_SERVER_BINARY" cmd/admin_server.go + GOPROXY=${GOPROXY:-https://goproxy.cn,https://proxy.golang.org,direct} CGO_ENABLED=1 \ + CGO_CFLAGS="$CGO_CFLAGS" CGO_LDFLAGS="$CGO_LDFLAGS" \ + go build -o "$RAGFLOW_CLI_BINARY" cmd/ragflow_cli.go if [ ! -f "$RAGFLOW_SERVER_BINARY" ]; then echo -e "${RED}Error: Failed to build RAGFlow server binary${NC}" @@ -183,6 +275,7 @@ DEPENDENCIES: - go >= 1.24 - g++ with C++17/23 support - libpcre2-dev + - office_oxide native library (auto-downloaded on first build) EOF } diff --git a/go.mod b/go.mod index 82949ed981..ded1dbdf02 100644 --- a/go.mod +++ b/go.mod @@ -26,6 +26,8 @@ require ( github.com/redis/go-redis/v9 v9.18.0 github.com/siongui/gojianfan v0.0.0-20210926212422-2f175ac615de github.com/spf13/viper v1.18.2 + github.com/yfedoseev/office_oxide/go v0.1.2 + github.com/yfedoseev/pdf_oxide/go v0.3.63 go.uber.org/zap v1.27.1 golang.org/x/crypto v0.49.0 golang.org/x/net v0.51.0 @@ -58,6 +60,7 @@ require ( github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/dustin/go-humanize v1.0.1 // indirect + github.com/ebitengine/purego v0.10.1 // indirect github.com/elastic/elastic-transport-go/v8 v8.8.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/gabriel-vasile/mimetype v1.4.2 // indirect diff --git a/go.sum b/go.sum index 7eb3d719dc..0218d0cb65 100644 --- a/go.sum +++ b/go.sum @@ -69,6 +69,8 @@ github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/r github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/ebitengine/purego v0.10.1 h1:dewVBCBT2GaMu1SrNTYxQhgQBethzfhiwvZiLGP/qyY= +github.com/ebitengine/purego v0.10.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= github.com/elastic/elastic-transport-go/v8 v8.8.0 h1:7k1Ua+qluFr6p1jfJjGDl97ssJS/P7cHNInzfxgBQAo= github.com/elastic/elastic-transport-go/v8 v8.8.0/go.mod h1:YLHer5cj0csTzNFXoNQ8qhtGY1GTvSqPnKWKaqQE3Hk= github.com/elastic/go-elasticsearch/v8 v8.19.1 h1:0iEGt5/Ds9MNVxEp3hqLsXdbe6SjleaVHONg/FuR09Q= @@ -252,6 +254,10 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +github.com/yfedoseev/office_oxide/go v0.1.2 h1:LnyVGXgJJF4tanuRUYVHZNn8e+IwGvOqtIFmQGDjPE4= +github.com/yfedoseev/office_oxide/go v0.1.2/go.mod h1:YLtMlKUkRCp/Q96wsy7D6yoBKDeJnP66UH+c9Bb+E+M= +github.com/yfedoseev/pdf_oxide/go v0.3.63 h1:6qlNQdaiGBGlo70je1fApQcCjeKg6AVUSUo+URCLl/s= +github.com/yfedoseev/pdf_oxide/go v0.3.63/go.mod h1:QbJ/nLbez0al2EnqEdEPIlGflFprWmiuUM4mo9rNNOI= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= diff --git a/internal/cli/user_command.go b/internal/cli/user_command.go index e6d84d902a..c96357e238 100644 --- a/internal/cli/user_command.go +++ b/internal/cli/user_command.go @@ -29,6 +29,8 @@ import ( "os/exec" "path/filepath" "ragflow/internal/ingestion" + "ragflow/internal/ingestion/parser" + "ragflow/internal/utility" "strings" "time" ) @@ -3249,6 +3251,22 @@ func (c *CLI) UserParseLocalFile(cmd *Command) (ResponseIf, error) { docParseModel = "" } + fileType := utility.GetFileType(filename) + + fileParser, err := parser.GetParser(fileType) + if err != nil { + return nil, err + } + + fileContent, err := os.ReadFile(filename) + if err != nil { + return nil, fmt.Errorf("failed to read dsl file: %w", err) + } + + if err = fileParser.Parse(filename, fileContent); err != nil { + return nil, formatRequestError("parse local file", err) + } + var result SimpleResponse result.Code = 0 result.Message = fmt.Sprintf("Success to parse local file %q, vision: %v, chat: %v, asr: %v, ocr: %v, embedding: %v, doc_parse: %v", filename, visionModel, chatModel, asrModel, ocrModel, embeddingModel, docParseModel) diff --git a/internal/development.md b/internal/development.md index f702461560..c5f7bcf642 100644 --- a/internal/development.md +++ b/internal/development.md @@ -7,7 +7,7 @@ docker compose -f docker/docker-compose-base.yml up -d ``` ## 2. Build Go Version RAGFlow -- First build (includes C++ dependencies): +- First build (includes C++ dependencies and office_oxide native library): ```bash ./build.sh --cpp @@ -19,6 +19,13 @@ docker compose -f docker/docker-compose-base.yml up -d ./build.sh --go ``` +> **Note**: If you use IDEs like GoLand to run/debug directly (via Run/Debug buttons), or run `go build` / `go run` from command line, you must set the following two CGO environment variables in your run configuration or shell: +> +> ```bash +> export CGO_CFLAGS="-I${HOME}/.office_oxide/include/office_oxide_c" +> export CGO_LDFLAGS="-L${HOME}/.office_oxide/lib -loffice_oxide -Wl,-rpath,${HOME}/.office_oxide/lib" +> ``` + ## 3. Run Go Version RAGFlow Note: admin_server must be started first; otherwise, ragflow_server will encounter errors when sending heartbeats. diff --git a/internal/ingestion/parser/doc_parser.go b/internal/ingestion/parser/doc_parser.go new file mode 100644 index 0000000000..75b7c3fc4c --- /dev/null +++ b/internal/ingestion/parser/doc_parser.go @@ -0,0 +1,35 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package parser + +import "fmt" + +type DOCParser struct { +} + +func NewDOCParser() *DOCParser { + return &DOCParser{} +} + +func (p *DOCParser) Parse(filename string, data []byte) error { + fmt.Printf("Parsing DOC file: %s\n", filename) + return nil +} + +func (p *DOCParser) String() string { + return "DOCParser" +} diff --git a/internal/ingestion/parser/docx_parser.go b/internal/ingestion/parser/docx_parser.go new file mode 100644 index 0000000000..22a0b0ae16 --- /dev/null +++ b/internal/ingestion/parser/docx_parser.go @@ -0,0 +1,64 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package parser + +import ( + "fmt" + + officeOxide "github.com/yfedoseev/office_oxide/go" +) + +type DOCXParser struct { +} + +func NewDOCXParser() *DOCXParser { + return &DOCXParser{} +} + +func (p *DOCXParser) Parse(filename string, data []byte) error { + + fmt.Printf("Parsing DOCX file: %s\n", filename) + doc, err := officeOxide.OpenFromBytes(data, "docx") + if err != nil { + return err + } + defer doc.Close() + + docFormat, err := doc.Format() + if err != nil { + return err + } + + fmt.Println("Document format:", docFormat) + + docContext, err := doc.PlainText() + if err != nil { + return err + } + fmt.Println("Document context:", docContext) + + md, err := doc.ToMarkdown() + if err != nil { + return err + } + fmt.Println("Document Markdown:", md) + return nil +} + +func (p *DOCXParser) String() string { + return "DOCXParser" +} diff --git a/internal/ingestion/parser/pdf_parser.go b/internal/ingestion/parser/pdf_parser.go new file mode 100644 index 0000000000..3061d6a2e5 --- /dev/null +++ b/internal/ingestion/parser/pdf_parser.go @@ -0,0 +1,35 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package parser + +import "fmt" + +type PDFParser struct { +} + +func NewPDFParser() *PDFParser { + return &PDFParser{} +} + +func (p *PDFParser) Parse(filename string, data []byte) error { + fmt.Printf("Parsing PDF file: %s\n", filename) + return nil +} + +func (p *PDFParser) String() string { + return "PDFParser" +} diff --git a/internal/ingestion/parser/ppt_parser.go b/internal/ingestion/parser/ppt_parser.go new file mode 100644 index 0000000000..bb6398c77d --- /dev/null +++ b/internal/ingestion/parser/ppt_parser.go @@ -0,0 +1,35 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package parser + +import "fmt" + +type PPTParser struct { +} + +func NewPPTParser() *PPTParser { + return &PPTParser{} +} + +func (p *PPTParser) Parse(filename string, data []byte) error { + fmt.Printf("Parsing PPT file: %s\n", filename) + return nil +} + +func (p *PPTParser) String() string { + return "PPTParser" +} diff --git a/internal/ingestion/parser/pptx_parser.go b/internal/ingestion/parser/pptx_parser.go new file mode 100644 index 0000000000..eb657d2422 --- /dev/null +++ b/internal/ingestion/parser/pptx_parser.go @@ -0,0 +1,35 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package parser + +import "fmt" + +type PPTXParser struct { +} + +func NewPPTXParser() *PPTXParser { + return &PPTXParser{} +} + +func (p *PPTXParser) Parse(filename string, data []byte) error { + fmt.Printf("Parsing PPTX file: %s\n", filename) + return nil +} + +func (p *PPTXParser) String() string { + return "PPTXParser" +} diff --git a/internal/ingestion/parser/type.go b/internal/ingestion/parser/type.go new file mode 100644 index 0000000000..9770e44bf8 --- /dev/null +++ b/internal/ingestion/parser/type.go @@ -0,0 +1,51 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package parser + +import ( + "fmt" + "ragflow/internal/utility" +) + +func GetParser(fileType utility.FileType) (FileParser, error) { + switch fileType { + case utility.FileTypePPTX: + return NewPPTXParser(), nil + case utility.FileTypePPT: + return NewPPTParser(), nil + case utility.FileTypeXLSX: + return NewXLSXParser(), nil + case utility.FileTypeXLS: + return NewXLSParser(), nil + case utility.FileTypeDOCX: + return NewDOCXParser(), nil + case utility.FileTypeDOC: + return NewDOCParser(), nil + case utility.FileTypePDF: + return NewPDFParser(), nil + default: + return nil, fmt.Errorf("unsupported file type: %s", fileType) + } +} + +// FileParser defines the interface for all file parsers. +type FileParser interface { + // Parse parses the input text. + Parse(filename string, data []byte) error + + String() string +} diff --git a/internal/ingestion/parser/xls_parser.go b/internal/ingestion/parser/xls_parser.go new file mode 100644 index 0000000000..5aa0f402a2 --- /dev/null +++ b/internal/ingestion/parser/xls_parser.go @@ -0,0 +1,35 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package parser + +import "fmt" + +type XLSParser struct { +} + +func NewXLSParser() *XLSParser { + return &XLSParser{} +} + +func (p *XLSParser) Parse(filename string, data []byte) error { + fmt.Printf("Parsing XLS file: %s\n", filename) + return nil +} + +func (p *XLSParser) String() string { + return "XLSParser" +} diff --git a/internal/ingestion/parser/xlsx_parser.go b/internal/ingestion/parser/xlsx_parser.go new file mode 100644 index 0000000000..4a8548346e --- /dev/null +++ b/internal/ingestion/parser/xlsx_parser.go @@ -0,0 +1,35 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package parser + +import "fmt" + +type XLSXParser struct { +} + +func NewXLSXParser() *XLSXParser { + return &XLSXParser{} +} + +func (p *XLSXParser) Parse(filename string, data []byte) error { + fmt.Printf("Parsing XLSX file: %s\n", filename) + return nil +} + +func (p *XLSXParser) String() string { + return "XLSXParser" +} diff --git a/internal/service/file.go b/internal/service/file.go index e4be10feb0..7783343788 100644 --- a/internal/service/file.go +++ b/internal/service/file.go @@ -375,7 +375,7 @@ func (s *FileService) UploadFile(tenantID, parentID string, files []*multipart.F Name: uniqueName, Location: &location, Size: int64(len(data)), - Type: fileType, + Type: string(fileType), SourceType: "", } diff --git a/internal/utility/file.go b/internal/utility/file.go index 898ebae435..1b372a3af4 100644 --- a/internal/utility/file.go +++ b/internal/utility/file.go @@ -22,13 +22,20 @@ import ( "strings" ) +type FileType string + const ( - FileTypePDF = "pdf" - FileTypeDOC = "doc" - FileTypeVISUAL = "visual" - FileTypeAURAL = "aural" - FileTypeFOLDER = "folder" - FileTypeOTHER = "other" + FileTypePDF FileType = "pdf" + FileTypeDOC FileType = "doc" + FileTypeDOCX FileType = "docx" + FileTypePPT FileType = "ppt" + FileTypePPTX FileType = "pptx" + FileTypeXLS FileType = "xls" + FileTypeXLSX FileType = "xlsx" + FileTypeVISUAL FileType = "visual" + FileTypeAURAL FileType = "aural" + FileTypeFOLDER FileType = "folder" + FileTypeOTHER FileType = "other" ) var ( @@ -50,7 +57,37 @@ func normalizeFilename(filename string) (string, bool) { return strings.ToLower(base), true } -func FilenameType(filename string) string { +func GetFileType(filename string) FileType { + + ext := filepath.Ext(filename) + var suffix string + if len(ext) > 0 && ext[0] == '.' { + suffix = strings.ToLower(ext[1:]) + } else { + suffix = strings.ToLower(ext) + } + + switch suffix { + case "pdf": + return FileTypePDF + case "xls": + return FileTypeXLS + case "xlsx": + return FileTypeXLSX + case "doc": + return FileTypeDOC + case "docx": + return FileTypeDOCX + case "ppt": + return FileTypePPT + case "pptx": + return FileTypePPTX + default: + return FileTypeOTHER + } +} + +func FilenameType(filename string) FileType { normalized, ok := normalizeFilename(filename) if !ok { return FileTypeOTHER @@ -216,7 +253,7 @@ var FORCE_ATTACHMENT_CONTENT_TYPES = map[string]bool{ "image/svg+xml": true, "application/xhtml+xml": true, "text/xml": true, - "application/xml": true, + "application/xml": true, "multipart/related": true, } @@ -241,7 +278,7 @@ func GetContentType(ext string, fileType string) string { return contentType } fallbackPrefix := "application" - if fileType == FileTypeVISUAL { + if fileType == string(FileTypeVISUAL) { fallbackPrefix = "image" } return fallbackPrefix + "/" + normalizedExt