diff --git a/internal/ingestion/parser/html_parser.go b/internal/ingestion/parser/html_parser.go
new file mode 100644
index 0000000000..4b6af46bc7
--- /dev/null
+++ b/internal/ingestion/parser/html_parser.go
@@ -0,0 +1,118 @@
+//
+// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+package parser
+
+import (
+ "fmt"
+ "strings"
+
+ "golang.org/x/net/html"
+)
+
+const (
+ Official string = "official"
+)
+
+type HTMLParser struct {
+ libType string
+}
+
+func NewHTMLParser(libType string) (*HTMLParser, error) {
+ switch libType {
+ case Official:
+ return &HTMLParser{
+ libType: Official,
+ }, nil
+ default:
+ return nil, fmt.Errorf("unsupported HTML library type: %s", libType)
+ }
+}
+
+func (p *HTMLParser) Parse(filename string, data []byte) error {
+ fmt.Printf("Parsing HTML file: %s\n", filename)
+ switch p.libType {
+ case Official:
+ return p.OfficialHTMLParse(data)
+ default:
+ return fmt.Errorf("unsupported HTML library type: %s", p.libType)
+ }
+}
+
+func (p *HTMLParser) OfficialHTMLParse(data []byte) error {
+ doc, _ := html.Parse(strings.NewReader(string(data)))
+ p.WalkIterative(doc)
+ return nil
+}
+
+func (p *HTMLParser) WalkIterative(root *html.Node) {
+ if root == nil {
+ return
+ }
+
+ // Stack: stores node and its depth
+ type item struct {
+ node *html.Node
+ depth int
+ }
+ stack := []item{{root, 0}}
+
+ for len(stack) > 0 {
+ // Pop the top of the stack
+ current := stack[len(stack)-1]
+ stack = stack[:len(stack)-1]
+
+ indent := strings.Repeat(" ", current.depth)
+
+ // Handle different node types
+ switch current.node.Type {
+ case html.ElementNode:
+ // Print opening tag
+ fmt.Printf("%s<%s", indent, current.node.Data)
+ // Optionally print attributes
+ for _, attr := range current.node.Attr {
+ fmt.Printf(" %s=%q", attr.Key, attr.Val)
+ }
+ fmt.Println(">")
+
+ case html.TextNode:
+ // Print text content (trim extra whitespace)
+ text := strings.TrimSpace(current.node.Data)
+ if text != "" {
+ fmt.Printf("%stext: %q\n", indent, text)
+ }
+
+ case html.CommentNode:
+ fmt.Printf("%scomment: %s\n", indent, current.node.Data)
+
+ case html.DoctypeNode:
+ fmt.Printf("%sDOCTYPE: %s\n", indent, current.node.Data)
+ }
+
+ // Push children onto stack in reverse order to maintain original sequence
+ var children []*html.Node
+ for child := current.node.FirstChild; child != nil; child = child.NextSibling {
+ children = append([]*html.Node{child}, children...) // Reverse order
+ }
+ for _, child := range children {
+ stack = append(stack, item{child, current.depth + 1})
+ }
+ }
+}
+
+func (p *HTMLParser) String() string {
+ return "HTMLParser"
+}
diff --git a/internal/ingestion/parser/type.go b/internal/ingestion/parser/type.go
index 45f61de111..44a3b10288 100644
--- a/internal/ingestion/parser/type.go
+++ b/internal/ingestion/parser/type.go
@@ -41,6 +41,8 @@ func GetParser(fileType utility.FileType, config map[string]string) (FileParser,
return NewDOCParser(libType)
case utility.FileTypePDF:
return NewPDFParser(), nil
+ case utility.FileTypeHTML:
+ return NewHTMLParser(Official)
case utility.FileTypeMarkdown:
return NewMarkdownParser(GoMarkdown)
default:
diff --git a/internal/utility/file.go b/internal/utility/file.go
index ed21f030bf..2c12e59480 100644
--- a/internal/utility/file.go
+++ b/internal/utility/file.go
@@ -32,6 +32,7 @@ const (
FileTypePPTX FileType = "pptx"
FileTypeXLS FileType = "xls"
FileTypeXLSX FileType = "xlsx"
+ FileTypeHTML FileType = "html"
FileTypeMarkdown FileType = "md"
FileTypeVISUAL FileType = "visual"
FileTypeAURAL FileType = "aural"
@@ -83,6 +84,8 @@ func GetFileType(filename string) FileType {
return FileTypePPT
case "pptx":
return FileTypePPTX
+ case "html", "htm":
+ return FileTypeHTML
case "md":
return FileTypeMarkdown
default: