Feat: Add knowledge compilation workflows (#16515)

## Summary
- Add knowledge compilation template APIs, services, and builtin
template seed data
- Add advanced knowledge compile structure/artifact/RAPTOR workflow
support
- Update parsing, dataset/document APIs, and supporting services for
compilation workflows
This commit is contained in:
Kevin Hu
2026-07-02 23:22:07 +08:00
committed by GitHub
parent 7d64a78f83
commit 62f94cd59b
57 changed files with 14587 additions and 3094 deletions

View File

@@ -423,6 +423,7 @@ class ParserConfig(Base):
filename_embd_weight: Annotated[float | None, Field(default=0.1, ge=0.0, le=1.0)]
task_page_size: Annotated[int | None, Field(default=None, ge=1)]
pages: Annotated[list[list[int]] | None, Field(default=None)]
compilation_template_group_id: Annotated[list[str], Field(default_factory=list)]
ext: Annotated[dict, Field(default={})]
# Table parser: column name -> "indexing" | "metadata" | "both". Absence => all columns "both".
# Table parser: "auto" = all columns both (default), "manual" = use table_column_roles. None → treated as "auto".
@@ -444,6 +445,25 @@ class ParserConfig(Base):
out[k] = "indexing" if val == "vectorize" else val
return out
@field_validator("compilation_template_group_id", mode="before")
@classmethod
def normalize_compilation_template_group_ids(cls, v: Any) -> Any:
if v is None:
return []
raw = [v] if isinstance(v, str) else v
if not isinstance(raw, list):
return []
ids: list[str] = []
seen: set[str] = set()
for group_id in raw:
if not isinstance(group_id, str):
continue
group_id = group_id.strip()
if group_id and group_id not in seen:
seen.add(group_id)
ids.append(group_id)
return ids
class UpdateDocumentReq(Base):
"""
@@ -563,7 +583,7 @@ class CreateDatasetReq(Base):
CreateDatasetReq(avatar="data:video/mp4;base64,...") # Unsupported MIME type
```
"""
if not v: # cover both None and empty string
if not v: # cover both None and empty string
return v
if "," in v: