fix: return full record in get_ingestion_log (#16120)

### What problem does this PR solve? The `get_ingestion_log` endpoint (both Python `dataset_api_service.get_ingestion_log` and Go `DatasetService.GetIngestionLog`) was returning only the **dataset-level** field set, which omits critical fields such as `dsl`, `document_id`, `parser_id`, `document_name`, `pipeline_id`, etc. This caused the front-end **dataflow-result page** to be unable to render the pipeline timeline and chunks when viewing a single ingestion log, regardless of whether the log was a dataset-level operation (graph/raptor/mindmap) or a per-file parse. ### Background `PipelineOperationLogService` provides two field sets: | Method | Fields | |---|---| | `get_dataset_logs_fields` | Minimal set (progress, status, timestamps, etc.) | | `get_file_logs_fields` | Superset — includes `document_id`, `dsl`, `parser_id`, `document_name`, `pipeline_id`, … | When listing logs, the API correctly distinguishes dataset-level vs file-level logs and uses the appropriate converter. However, when **fetching a single log by ID**, both the Python and Go implementations were hardcoded to the dataset-level set, dropping the extra fields that the front-end needs.
2026-06-29 23:41:12 +08:00 · 2026-06-16 22:03:51 -07:00
parent fd196f694e
commit 9bd53ce675
2 changed files with 12 additions and 4 deletions
--- a/api/apps/services/dataset_api_service.py
+++ b/api/apps/services/dataset_api_service.py
@@ -781,7 +781,11 @@ def get_ingestion_log(dataset_id: str, tenant_id: str, log_id: str):

    from api.db.services.pipeline_operation_log_service import PipelineOperationLogService

-    fields = PipelineOperationLogService.get_dataset_logs_fields()
+    # Return the full record (including `dsl`) so the front-end dataflow-result
+    # page can render the pipeline timeline and chunks. The file-level field set
+    # is a superset of the dataset-level fields, so it is valid for both
+    # dataset-level (graph/raptor/mindmap) and per-file logs.
+    fields = PipelineOperationLogService.get_file_logs_fields()
    log = PipelineOperationLogService.model.select(*fields).where((PipelineOperationLogService.model.id == log_id) & (PipelineOperationLogService.model.kb_id == dataset_id)).first()
    if not log:
        return False, "Log not found"
--- a/internal/service/dataset.go
+++ b/internal/service/dataset.go
@@ -1172,8 +1172,12 @@ func (s *DatasetService) ListIngestionLogs(datasetID, userID string, page, pageS
 	}, common.CodeSuccess, nil
 }

-// GetIngestionLog returns a single dataset-level ingestion log, mirroring
-// dataset_api_service.get_ingestion_log.
+// GetIngestionLog returns a single ingestion log, mirroring
+// dataset_api_service.get_ingestion_log. It returns the full record (including
+// the `dsl`, `document_id`, `parser_id`, etc.) so that the front-end
+// dataflow-result page can render the pipeline timeline and chunks. The
+// file-level converter is a superset of the dataset-level fields, so it is
+// correct for both dataset-level (graph/raptor/mindmap) and per-file logs.
 func (s *DatasetService) GetIngestionLog(datasetID, userID, logID string) (map[string]interface{}, common.ErrorCode, error) {
 	datasetID = strings.TrimSpace(datasetID)
 	if datasetID == "" {
@@ -1192,7 +1196,7 @@ func (s *DatasetService) GetIngestionLog(datasetID, userID, logID string) (map[s
 		return nil, common.CodeServerError, errors.New("Database operation failed")
 	}

-	return datasetIngestionLogToMap(log), common.CodeSuccess, nil
+	return fileIngestionLogToMap(log), common.CodeSuccess, nil
 }

 func datasetIngestionLogToMap(log *entity.PipelineOperationLog) map[string]interface{} {