Add git-like file commit API (#15978)

### What problem does this PR solve?

| # | Method | Endpoint | Description | Git Equivalent |
|---|--------|----------|-------------|----------------|
| 1 | `POST` | `/api/v1/{prefix}/{folder_id}/commits` | Create a
snapshot commit with file changes (add/modify/delete/rename) | `git add`
+ `git commit` |
| 2 | `GET` | `/api/v1/{prefix}/{folder_id}/commits` | List commit
history (paginated) | `git log` |
| 3 | `GET` | `/api/v1/{prefix}/{folder_id}/commits/{commit_id}` | Get
commit detail with file changes | `git show` |
| 4 | `GET` | `/api/v1/{prefix}/{folder_id}/commits/{commit_id}/files` |
List file changes in a commit | `git show --name-status` |
| 5 | `GET` |
`/api/v1/{prefix}/{folder_id}/commits/diff?from=...&to=...` | Compare
two commits and return differences | `git diff` |
| 6 | `GET` | `/api/v1/{prefix}/{folder_id}/changes` | Get uncommitted
changes (add/modify/delete) | `git status` |
| 7 | `GET` | `/api/v1/{prefix}/{folder_id}/commits/{commit_id}/tree` |
Get the folder tree snapshot at commit time | `git ls-tree` |
| 8 | `GET` |
`/api/v1/{prefix}/{folder_id}/commits/{commit_id}/files/{file_id}/content`
| Get a file's content as it existed in a specific commit | `git show
HEAD:file` |
| 9 | `GET` | `/api/v1/{prefix}/{file_id}/versions` | Get version
history for a specific file across all commits | `git log -- file` |

Where `{prefix}/{id}` can be:
- `folders/{folder_id}` — direct folder access
- `workspaces/{workspace_id}` — alias of `folders/{folder_id}`
- `datasets/{dataset_id}` — resolves to the dataset's folder
- `memories/{memory_id}` — resolves to the memory's folder
- `skills/{skill_id}` — resolves to the skill's folder

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
- [x] Documentation Update
This commit is contained in:
Yingfeng
2026-06-15 11:19:56 +08:00
committed by GitHub
parent 83e2180e80
commit b5bea72e4b
15 changed files with 4244 additions and 7 deletions

View File

@@ -0,0 +1,314 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
from functools import wraps
from quart import request
from api.apps import login_required, current_user
from api.utils.api_utils import get_json_result, get_data_error_result, get_request_json, server_error_response, validate_request
# manager is injected dynamically by api.apps.register_page() before this
# module is exec'd. DO NOT assign manager = None here — it would overwrite
# the Blueprint that register_page set on the module.
from api.db.services.file_commit_service import FileCommitService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.file_service import FileService
from common.constants import FileSource
logger = logging.getLogger(__name__)
_ENTITY_RESOLVERS = {}
# Counter to give each generated route function a unique name,
# preventing Quart Blueprint endpoint name collisions.
_route_suffix = [0]
def _register_resolver(entity_type):
"""Decorator that registers a folder_id resolver for an entity type.
The decorated function receives (entity_id) and must return a folder_id
or None if the entity has no corresponding folder.
"""
def decorator(func):
_ENTITY_RESOLVERS[entity_type] = func
@wraps(func)
def wrapper(entity_id):
return func(entity_id)
return wrapper
return decorator
def _resolve_folder_id(entity_type, entity_id):
"""Resolve an entity (dataset/memory/skill) to its folder_id."""
resolver = _ENTITY_RESOLVERS.get(entity_type)
if resolver is None:
return None
return resolver(entity_id)
@_register_resolver("datasets")
def _resolve_dataset_folder(dataset_id):
success, kb = KnowledgebaseService.get_by_id(dataset_id)
if not success:
return None
# Find the folder with matching name, source_type, and tenant_id
folders = FileService.query(
name=kb.name,
source_type=FileSource.KNOWLEDGEBASE.value,
type="folder",
tenant_id=kb.tenant_id,
)
if folders:
return folders[0].id
return None
# ── Route registration helper ─────────────────────────────────────────────
def _register_commit_routes(prefix, param_name, resolver_type=None):
"""Register all 8 commit endpoints for a given URL prefix.
Args:
prefix: URL prefix like '/folders/<folder_id>'
param_name: The URL parameter name (e.g. 'folder_id', 'dataset_id')
resolver_type: If set, resolve param_name → folder_id before calling logic
"""
# Unique suffix for this call to prevent Blueprint endpoint name collisions
_route_suffix[0] += 1
_n = _route_suffix[0]
def _resolve(entity_id):
if resolver_type is None:
return entity_id # already a folder_id
folder_id = _resolve_folder_id(resolver_type, entity_id)
if folder_id is None:
raise ValueError(f"Could not resolve {resolver_type} '{entity_id}' to a folder")
return folder_id
# ── Create commit ──────────────────────────────────────────────────────
@manager.route(f'{prefix}/commits', methods=['POST'], endpoint=f'create_commit_{_n}') # noqa: F821
@login_required
@validate_request("message", "files")
async def create_commit(entity_id):
folder_id = _resolve(entity_id)
req = await get_request_json()
try:
commit = FileCommitService.create_commit(
folder_id=folder_id,
author_id=current_user.id,
message=req["message"],
file_changes=req["files"],
)
return get_json_result(data={
"id": commit.id,
"folder_id": commit.folder_id,
"parent_id": commit.parent_id,
"message": commit.message,
"author_id": commit.author_id,
"file_count": commit.file_count,
"tree_state": commit.tree_state,
"create_time": commit.create_time,
})
except Exception as e:
return server_error_response(e)
# ── List commits ───────────────────────────────────────────────────────
@manager.route(f'{prefix}/commits', methods=['GET'], endpoint=f'list_commits_{_n}') # noqa: F821
@login_required
async def list_commits(entity_id):
folder_id = _resolve(entity_id)
try:
page = int(request.args.get("page", 1))
page_size = int(request.args.get("page_size", 15))
order_by = request.args.get("order_by", "create_time")
desc = request.args.get("desc", "true").lower() != "false"
commits, total = FileCommitService.list_commits(folder_id, page, page_size, order_by, desc)
return get_json_result(data={
"total": total,
"page": page,
"page_size": page_size,
"commits": [{
"id": c.id,
"folder_id": c.folder_id,
"parent_id": c.parent_id,
"message": c.message,
"author_id": c.author_id,
"file_count": c.file_count,
"create_time": c.create_time,
} for c in commits],
})
except Exception as e:
return server_error_response(e)
# ── Get commit ─────────────────────────────────────────────────────────
@manager.route(f'{prefix}/commits/<commit_id>', methods=['GET'], endpoint=f'get_commit_{_n}') # noqa: F821
@login_required
async def get_commit(entity_id, commit_id):
folder_id = _resolve(entity_id)
try:
commit = FileCommitService.get_commit(commit_id)
if not commit:
return get_data_error_result("Commit not found")
if commit.folder_id != folder_id:
return get_data_error_result("Commit not found in workspace")
items = FileCommitService.list_commit_files(commit_id)
return get_json_result(data={
"id": commit.id,
"folder_id": commit.folder_id,
"parent_id": commit.parent_id,
"message": commit.message,
"author_id": commit.author_id,
"file_count": commit.file_count,
"create_time": commit.create_time,
"files": [{
"file_id": item.file_id,
"operation": item.operation,
"old_hash": item.old_hash,
"new_hash": item.new_hash,
"old_name": item.old_name,
"new_name": item.new_name,
} for item in items],
})
except Exception as e:
return server_error_response(e)
# ── List commit files ──────────────────────────────────────────────────
@manager.route(f'{prefix}/commits/<commit_id>/files', methods=['GET'], endpoint=f'list_commit_files_{_n}') # noqa: F821
@login_required
async def list_commit_files(entity_id, commit_id):
folder_id = _resolve(entity_id)
try:
commit = FileCommitService.get_commit(commit_id)
if not commit:
return get_data_error_result("Commit not found")
if commit.folder_id != folder_id:
return get_data_error_result("Commit not found in workspace")
items = FileCommitService.list_commit_files(commit_id)
return get_json_result(data=[{
"id": item.id,
"file_id": item.file_id,
"operation": item.operation,
"old_hash": item.old_hash,
"new_hash": item.new_hash,
"old_location": item.old_location,
"new_location": item.new_location,
"old_name": item.old_name,
"new_name": item.new_name,
} for item in items])
except Exception as e:
return server_error_response(e)
# ── Diff commits ───────────────────────────────────────────────────────
@manager.route(f'{prefix}/commits/diff', methods=['GET'], endpoint=f'diff_commits_{_n}') # noqa: F821
@login_required
async def diff_commits(entity_id):
folder_id = _resolve(entity_id)
from_id = request.args.get("from")
to_id = request.args.get("to")
if not from_id or not to_id:
return get_data_error_result("'from' and 'to' parameters are required")
try:
from_commit = FileCommitService.get_commit(from_id)
to_commit = FileCommitService.get_commit(to_id)
if not from_commit or not to_commit:
return get_data_error_result("Commit not found")
if from_commit.folder_id != folder_id or to_commit.folder_id != folder_id:
return get_data_error_result("Commit not found in workspace")
diff = FileCommitService.diff_commits(from_id, to_id)
return get_json_result(data=diff)
except Exception as e:
return server_error_response(e)
# ── Get uncommitted changes ────────────────────────────────────────────
@manager.route(f'{prefix}/changes', methods=['GET'], endpoint=f'get_uncommitted_changes_{_n}') # noqa: F821
@login_required
async def get_uncommitted_changes(entity_id):
folder_id = _resolve(entity_id)
try:
changes = FileCommitService.get_uncommitted_changes(folder_id)
return get_json_result(data=changes)
except Exception as e:
return server_error_response(e)
# ── Get commit tree ────────────────────────────────────────────────────
@manager.route(f'{prefix}/commits/<commit_id>/tree', methods=['GET'], endpoint=f'get_commit_tree_{_n}') # noqa: F821
@login_required
async def get_commit_tree(entity_id, commit_id):
folder_id = _resolve(entity_id)
try:
commit = FileCommitService.get_commit(commit_id)
if not commit:
return get_data_error_result("Commit not found")
if commit.folder_id != folder_id:
return get_data_error_result("Commit not found in workspace")
tree = FileCommitService.get_commit_tree(commit_id)
return get_json_result(data=tree)
except Exception as e:
return server_error_response(e)
# ── Get commit file content ────────────────────────────────────────────
@manager.route(f'{prefix}/commits/<commit_id>/files/<file_id>/content', methods=['GET'], endpoint=f'get_commit_file_content_{_n}') # noqa: F821
@login_required
async def get_commit_file_content(entity_id, commit_id, file_id):
folder_id = _resolve(entity_id)
try:
commit = FileCommitService.get_commit(commit_id)
if not commit:
return get_data_error_result("Commit not found")
if commit.folder_id != folder_id:
return get_data_error_result("Commit not found in workspace")
content = FileCommitService.get_commit_file_content(folder_id, commit_id, file_id)
if content is None:
return get_data_error_result("File not found in this commit")
return get_json_result(data={"content": content.decode("utf-8", errors="replace")})
except Exception as e:
return server_error_response(e)
# Expose handlers at module level for direct testing.
_g = globals()
_g['create_commit'] = create_commit
_g['list_commits'] = list_commits
_g['get_commit'] = get_commit
_g['list_commit_files'] = list_commit_files
_g['diff_commits'] = diff_commits
_g['get_uncommitted_changes'] = get_uncommitted_changes
_g['get_commit_tree'] = get_commit_tree
_g['get_commit_file_content'] = get_commit_file_content
# ── Register routes for all entity types ──────────────────────────────────
# All URL patterns use <entity_id> as the consistent param name.
# For /folders/ entity_id IS the folder_id directly.
# For other entity types entity_id is resolved via _resolve_folder_id().
# Register datasets first, workspace second, folders last —
# the last call's handlers overwrite module-level names for test access.
_register_commit_routes('/datasets/<entity_id>', 'entity_id', resolver_type='datasets')
_register_commit_routes('/workspace/<entity_id>', 'entity_id') # alias — workspace_id == folder_id
_register_commit_routes('/folders/<entity_id>', 'entity_id') # direct — entity_id == folder_id (wins)
# /memories and /skills routes are not mounted until resolvers are implemented.
# ── File version history (shared across all entity types) ─────────────────
@manager.route('/files/<file_id>/versions', methods=['GET']) # noqa: F821
@login_required
async def get_file_version_history(file_id):
try:
versions = FileCommitService.get_file_version_history(file_id)
return get_json_result(data=versions)
except Exception as e:
return server_error_response(e)

View File

@@ -956,6 +956,38 @@ class File2Document(DataBaseModel):
db_table = "file2document"
class FileCommit(DataBaseModel):
id = CharField(max_length=32, primary_key=True)
folder_id = CharField(max_length=32, null=False, help_text="workspace folder id", index=True)
parent_id = CharField(max_length=32, null=True, help_text="parent commit id", index=True)
message = CharField(max_length=512, default="", help_text="commit message")
author_id = CharField(max_length=32, null=False, help_text="user who created the commit", index=True)
file_count = IntegerField(default=0, help_text="number of files in this commit")
tree_state = LongTextField(null=True, help_text="JSON snapshot of the full folder tree at this commit")
class Meta:
db_table = "file_commit"
class FileCommitItem(DataBaseModel):
id = CharField(max_length=32, primary_key=True)
commit_id = CharField(max_length=32, null=False, help_text="commit id", index=True)
file_id = CharField(max_length=32, null=False, help_text="file id", index=True)
operation = CharField(max_length=16, null=False, help_text="add / modify / delete / rename", index=True)
old_hash = CharField(max_length=64, null=True, help_text="old content hash", index=True)
new_hash = CharField(max_length=64, null=True, help_text="new content hash", index=True)
old_location = CharField(max_length=255, null=True, help_text="old storage location")
new_location = CharField(max_length=255, null=True, help_text="new storage location")
old_name = CharField(max_length=255, null=True, help_text="old file name (for rename)")
new_name = CharField(max_length=255, null=True, help_text="new file name (for rename)")
class Meta:
db_table = "file_commit_item"
indexes = (
(("commit_id", "file_id"), True), # unique composite index
)
class Task(DataBaseModel):
id = CharField(max_length=32, primary_key=True)
doc_id = CharField(max_length=32, null=False, index=True)

View File

@@ -0,0 +1,658 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import datetime
import hashlib
import json
import logging
from api.db.db_models import DB, FileCommit, FileCommitItem, File
from api.db.services.common_service import CommonService
from api.db.services.file_service import FileService
from common import settings
from common.misc_utils import get_uuid
from common.time_utils import current_timestamp, datetime_format
logger = logging.getLogger(__name__)
def _get_file_parent_id(file_id):
"""Look up a file's parent_id from the File table."""
try:
row = File.get_or_none(File.id == file_id)
if row:
return row.parent_id
except Exception:
pass
return None
def _collect_all_files_under(folder_id):
"""Recursively collect all non-folder files under a folder (including sub-folders).
Returns a dict of {file_id: File_model_instance}.
"""
results = {}
try:
# Direct file children (non-folder) of this folder
for f in File.select().where(
File.parent_id == folder_id,
File.id != folder_id,
File.type != "folder",
):
results[f.id] = f
# Sub-folders — recurse
for sub in File.select().where(
File.parent_id == folder_id,
File.type == "folder",
):
results.update(_collect_all_files_under(sub.id))
except Exception:
pass
return results
class FileCommitService(CommonService):
model = FileCommit
@classmethod
def create_commit(cls, folder_id, author_id, message, file_changes):
"""Create a new commit for a workspace folder.
Args:
folder_id: The workspace folder ID
author_id: The user ID
message: Commit message
file_changes: List of dicts:
[{"file_id": str, "file_name": str, "operation": "add"|"modify"|"delete"|"rename",
"content": str (optional, for add/modify), "content_hash": str (optional),
"old_name": str, "new_name": str (for rename)}]
Returns:
The created FileCommit instance
"""
commit_id = get_uuid()
now_ts = current_timestamp()
now_dt = datetime_format(date_time=datetime.datetime.now())
with DB.atomic():
# 1. Get the latest (chain head) commit for this folder
latest_commit = cls._get_latest_commit(folder_id)
# 2. Begin creating the commit record
commit_data = {
"id": commit_id,
"folder_id": folder_id,
"parent_id": latest_commit.id if latest_commit else None,
"message": message,
"author_id": author_id,
"file_count": len(file_changes),
"create_time": now_ts,
"create_date": now_dt,
"update_time": now_ts,
"update_date": now_dt,
}
# 3. Insert commit record
FileCommit(**commit_data).save(force_insert=True)
# 4. Build new tree state and process each file change
tree_state = {}
if latest_commit and latest_commit.tree_state:
try:
tree_state = json.loads(latest_commit.tree_state)
except (json.JSONDecodeError, TypeError):
tree_state = {}
# 4a. Backfill parent_id for existing entries that lack it
for fid, entry in tree_state.items():
if isinstance(entry, dict) and "parent_id" not in entry:
pid = _get_file_parent_id(fid)
if pid:
entry["parent_id"] = pid
storage_impl = settings.STORAGE_IMPL
for change in file_changes:
op = change.get("operation", "modify")
file_id = change.get("file_id", "")
commit_item_id = get_uuid()
item = {
"id": commit_item_id,
"commit_id": commit_id,
"file_id": file_id,
"operation": op,
"create_time": now_ts,
"create_date": now_dt,
"update_time": now_ts,
"update_date": now_dt,
}
if op == "add":
content = change.get("content", "")
content_bytes = content.encode("utf-8") if isinstance(content, str) else content
content_hash = hashlib.sha256(content_bytes).hexdigest()
obj_key = f".objects/{content_hash}"
# Store blob via content-addressable storage
if storage_impl:
storage_impl.put(folder_id, obj_key, content_bytes)
item["new_hash"] = content_hash
item["new_location"] = obj_key
# Update file record in DB
File.update({
"location": obj_key,
"size": len(content_bytes),
"update_time": current_timestamp(),
}).where(File.id == file_id).execute()
# Update tree state
file_parent = _get_file_parent_id(file_id)
tree_state[file_id] = {
"hash": content_hash,
"location": obj_key,
"name": change.get("file_name", ""),
"size": len(content_bytes),
"status": "1",
"parent_id": file_parent,
}
elif op == "modify":
content = change.get("content", "")
content_bytes = content.encode("utf-8") if isinstance(content, str) else content
content_hash = hashlib.sha256(content_bytes).hexdigest()
obj_key = f".objects/{content_hash}"
# Record old hash
old_entry = tree_state.get(file_id, {})
old_hash = old_entry.get("hash", "")
old_location = old_entry.get("location", "")
if old_hash:
item["old_hash"] = old_hash
item["old_location"] = old_location
# Store new blob
if storage_impl:
storage_impl.put(folder_id, obj_key, content_bytes)
item["new_hash"] = content_hash
item["new_location"] = obj_key
# Update file record
File.update({
"location": obj_key,
"size": len(content_bytes),
"update_time": current_timestamp(),
}).where(File.id == file_id).execute()
# Update tree state
file_parent = _get_file_parent_id(file_id)
tree_state[file_id] = {
"hash": content_hash,
"location": obj_key,
"name": change.get("file_name", tree_state.get(file_id, {}).get("name", "")),
"size": len(content_bytes),
"status": "1",
"parent_id": file_parent,
}
elif op == "delete":
old_entry = tree_state.get(file_id, {})
old_hash = old_entry.get("hash", "")
old_location = old_entry.get("location", "")
if old_hash:
item["old_hash"] = old_hash
item["old_location"] = old_location
# Soft-delete the file record
File.update(status="0", update_time=current_timestamp()).where(
File.id == file_id
).execute()
# Remove from tree state (mark deleted)
if file_id in tree_state:
tree_state[file_id]["status"] = "0"
elif op == "rename":
old_name = change.get("old_name", "")
new_name = change.get("new_name", "")
item["old_name"] = old_name
item["new_name"] = new_name
# Update the file record name
File.update(name=new_name, update_time=current_timestamp()).where(
File.id == file_id
).execute()
# Update tree state
if file_id in tree_state:
tree_state[file_id]["name"] = new_name
# Insert commit item
FileCommitItem(**item).save(force_insert=True)
# 5. Save the tree state snapshot
tree_json = json.dumps(tree_state, ensure_ascii=False)
cls.model.update(tree_state=tree_json).where(cls.model.id == commit_id).execute()
_, commit = cls.get_by_id(commit_id)
return commit
@classmethod
def _get_latest_commit(cls, folder_id):
"""Get the latest (chain head) commit for a folder."""
try:
return cls.model.select().where(
cls.model.folder_id == folder_id
).order_by(cls.model.create_time.desc()).first()
except Exception:
return None
@classmethod
@DB.connection_context()
def list_commits(cls, folder_id, page=1, page_size=15, order_by="create_time", desc=True):
"""List commits for a workspace folder with pagination."""
total = cls.model.select().where(cls.model.folder_id == folder_id).count()
query = cls.model.select().where(cls.model.folder_id == folder_id)
if desc:
query = query.order_by(getattr(cls.model, order_by).desc())
else:
query = query.order_by(getattr(cls.model, order_by).asc())
if page and page_size:
offset = (page - 1) * page_size
query = query.offset(offset).limit(page_size)
return list(query), total
@classmethod
@DB.connection_context()
def get_commit(cls, commit_id):
"""Get a single commit by ID."""
success, commit = cls.get_by_id(commit_id)
return commit if success else None
@classmethod
@DB.connection_context()
def list_commit_files(cls, commit_id):
"""List all file change items for a commit."""
items = FileCommitItem.select().where(FileCommitItem.commit_id == commit_id)
return list(items)
@classmethod
@DB.connection_context()
def diff_commits(cls, from_id, to_id):
"""Compare two commits and return the diff.
Compares tree_state snapshots (full file inventories), not commit
items (which only capture per-commit deltas). Falls back to
FileCommitItem records for supplementary metadata (hash/location).
Returns list of dicts with fields:
file_id, file_name, operation, old_hash, new_hash, old_location, new_location
"""
_, from_commit = cls.get_by_id(from_id)
_, to_commit = cls.get_by_id(to_id)
from_tree = {}
to_tree = {}
if from_commit and from_commit.tree_state:
try:
from_tree = json.loads(from_commit.tree_state)
except Exception:
pass
if to_commit and to_commit.tree_state:
try:
to_tree = json.loads(to_commit.tree_state)
except Exception:
pass
# Supplement with commit_item metadata for operations not captured
# by tree_state alone (rename).
from_items = {}
try:
for item in FileCommitItem.select().where(FileCommitItem.commit_id == from_id):
from_items[item.file_id] = item
except Exception:
pass
to_items = {}
try:
for item in FileCommitItem.select().where(FileCommitItem.commit_id == to_id):
to_items[item.file_id] = item
except Exception:
pass
all_file_ids = set(from_tree.keys()) | set(to_tree.keys())
diff = []
for fid in sorted(all_file_ids):
from_entry = from_tree.get(fid)
to_entry = to_tree.get(fid)
from_item = from_items.get(fid)
to_item = to_items.get(fid)
from_hash = from_entry.get("hash", "") if isinstance(from_entry, dict) else ""
to_hash = to_entry.get("hash", "") if isinstance(to_entry, dict) else ""
from_status = from_entry.get("status", "1") if isinstance(from_entry, dict) else "1"
to_status = to_entry.get("status", "1") if isinstance(to_entry, dict) else "1"
from_name = from_entry.get("name", "") if isinstance(from_entry, dict) else ""
to_name = to_entry.get("name", "") if isinstance(to_entry, dict) else ""
if from_entry is not None and to_entry is None:
# Present in from, absent in to → deleted
diff.append({
"file_id": fid,
"file_name": from_name,
"operation": "delete",
"old_hash": from_hash or (from_item.new_hash if from_item else None),
"old_location": from_entry.get("location", "") if isinstance(from_entry, dict) else None,
"new_hash": None,
"new_location": None,
})
elif from_entry is None and to_entry is not None:
# Present in to, absent in from → added
diff.append({
"file_id": fid,
"file_name": to_name,
"operation": "add",
"old_hash": None,
"old_location": None,
"new_hash": to_hash or (to_item.new_hash if to_item else None),
"new_location": to_entry.get("location", "") if isinstance(to_entry, dict) else None,
})
else:
# Both exist — check for changes
changed = False
operation = "modify"
# Hash change
if from_hash != to_hash:
changed = True
# Status change (active ↔ deleted or vice versa in same entry)
if from_status != to_status:
changed = True
operation = "delete" if to_status == "0" else "add"
# Name change (rename)
if from_name != to_name:
changed = True
operation = "rename"
if changed:
old_loc = from_entry.get("location", "") if isinstance(from_entry, dict) else None
new_loc = to_entry.get("location", "") if isinstance(to_entry, dict) else None
diff.append({
"file_id": fid,
"file_name": to_name or from_name,
"operation": operation,
"old_hash": from_hash or (from_item.new_hash if from_item else None),
"old_location": old_loc or (from_item.new_location if from_item else None),
"new_hash": to_hash or (to_item.new_hash if to_item else None),
"new_location": new_loc or (to_item.new_location if to_item else None),
})
return diff
@classmethod
@DB.connection_context()
def get_uncommitted_changes(cls, folder_id):
"""Get uncommitted changes by comparing current File table with latest commit.
Recursively scans all sub-folders under folder_id.
Returns list of dicts: [{"file_id", "file_name", "operation": "add"|"modify"|"delete"}]
"""
# Get latest commit's tree state
latest = cls._get_latest_commit(folder_id)
committed_files = {}
if latest and latest.tree_state:
try:
committed_files = json.loads(latest.tree_state)
except Exception:
pass
# Get all current (live) files recursively under this folder
current_files = _collect_all_files_under(folder_id)
changes = []
processed = set()
# Check for modified and deleted files
for fid, committed_entry in committed_files.items():
processed.add(fid)
if committed_entry.get("status") == "0":
continue
if fid in current_files:
live_file = current_files[fid]
live_hash = _compute_file_hash(folder_id, fid)
committed_hash = committed_entry.get("hash", "")
if live_hash and live_hash != committed_hash:
changes.append({
"file_id": fid,
"file_name": committed_entry.get("name", ""),
"operation": "modify",
})
else:
if FileService.get_or_none(id=fid) is None:
changes.append({
"file_id": fid,
"file_name": committed_entry.get("name", ""),
"operation": "delete",
})
# Check for newly added files
for fid, live_file in current_files.items():
if fid not in processed:
changes.append({
"file_id": fid,
"file_name": live_file.name,
"operation": "add",
})
return changes
@classmethod
@DB.connection_context()
def get_commit_tree(cls, commit_id):
"""Get the tree state snapshot for a commit as a hierarchical tree."""
success, commit = cls.get_by_id(commit_id)
if not success or not commit.tree_state:
return {}
try:
tree_state = json.loads(commit.tree_state)
except Exception:
return {}
return _build_hierarchical_tree(tree_state, commit.folder_id)
@classmethod
@DB.connection_context()
def get_commit_file_content(cls, folder_id, commit_id, file_id):
"""Get file content as it existed in a given commit.
Resolves the file's stored hash from the commit's tree_state first;
if absent (file unchanged in this commit), walks the parent commit
chain via parent_id until a FileCommitItem for the file is found.
"""
success, commit = cls.get_by_id(commit_id)
if not success:
return None
# 1. Try tree_state — the full snapshot at this commit
if commit.tree_state:
try:
tree = json.loads(commit.tree_state)
entry = tree.get(file_id)
if isinstance(entry, dict):
h = entry.get("hash")
if h:
obj_path = f".objects/{h}"
storage_impl = settings.STORAGE_IMPL
if storage_impl:
return storage_impl.get(folder_id, obj_path)
except Exception:
pass
# 2. Walk parent commits via parent_id until we find a
# FileCommitItem for this file_id.
current_id = commit_id
visited = set()
while current_id and current_id not in visited:
visited.add(current_id)
item = FileCommitItem.select().where(
FileCommitItem.commit_id == current_id,
FileCommitItem.file_id == file_id,
).first()
if item and item.new_hash:
obj_path = f".objects/{item.new_hash}"
storage_impl = settings.STORAGE_IMPL
if storage_impl:
return storage_impl.get(folder_id, obj_path)
# Move to parent
parent_commit = cls.get_commit(current_id)
if parent_commit and parent_commit.parent_id:
current_id = parent_commit.parent_id
else:
break
return None
@classmethod
@DB.connection_context()
def get_file_version_history(cls, file_id):
"""Get version history for a specific file across all commits.
Returns list of dicts: [{"commit_id", "operation", "hash", "create_time", "message"}]
"""
items = FileCommitItem.select().where(FileCommitItem.file_id == file_id).order_by(
FileCommitItem.create_time.desc())
versions = []
for item in items:
commit = cls.get_commit(item.commit_id)
if commit:
versions.append({
"commit_id": item.commit_id,
"operation": item.operation,
"hash": item.new_hash or item.old_hash or "",
"create_time": item.create_time,
"message": commit.message,
})
return versions
def _lookup_folder_name(folder_id):
"""Look up a folder's display name from the File table."""
try:
row = File.get_or_none(File.id == folder_id)
if row:
return row.name
except Exception:
pass
return folder_id
def _build_hierarchical_tree(tree_state, root_folder_id):
"""Build a recursive tree from a flat tree_state map.
Returns {id, name, type: "folder", children: [{file|folder nodes}]}
Sub-folder hierarchy is resolved from the File table's parent_id.
"""
# Collect all unique folder IDs from parent_id fields
folder_ids = {root_folder_id}
for fid, entry in tree_state.items():
if isinstance(entry, dict):
pid = entry.get("parent_id") or root_folder_id
folder_ids.add(pid)
# Build a map of folder_id -> parent_folder_id from the File table
folder_parent_map = {}
for fid in folder_ids:
if fid != root_folder_id:
try:
row = File.get_or_none(File.id == fid)
if row:
folder_parent_map[fid] = row.parent_id
except Exception:
pass
# Group file entries by parent_id
files_by_parent = {}
for fid, entry in tree_state.items():
if not isinstance(entry, dict):
continue
pid = entry.get("parent_id") or root_folder_id
files_by_parent.setdefault(pid, []).append((fid, entry))
# Group sub-folder IDs by their parent folder
children_by_folder = {}
for sfid, ppid in folder_parent_map.items():
children_by_folder.setdefault(ppid, []).append(sfid)
def _build_node(node_id):
node = {
"id": node_id,
"name": _lookup_folder_name(node_id),
"type": "folder",
"children": [],
}
# File children
for fid, entry in files_by_parent.get(node_id, []):
fn = {"id": fid, "name": entry.get("name", fid), "type": "file",
"hash": entry.get("hash", ""), "size": entry.get("size", 0),
"status": entry.get("status", "1")}
if entry.get("location"):
fn["location"] = entry["location"]
node["children"].append(fn)
# Sub-folder children (resolved from File table)
for sfid in children_by_folder.get(node_id, []):
child = _build_node(sfid)
if child:
node["children"].append(child)
return node
return _build_node(root_folder_id)
def _compute_file_hash(folder_id, file_id):
"""Compute SHA256 hash of current file content from storage."""
try:
file_record = FileService.get_by_id(file_id)
if not file_record[0]:
return None
file = file_record[1]
if not file.location:
return None
storage = settings.STORAGE_IMPL
if not storage:
return None
data = storage.get(folder_id, file.location)
if data:
return hashlib.sha256(data).hexdigest()
return None
except Exception:
return None

View File

@@ -232,6 +232,7 @@ func startServer(config *server.Config) {
searchBotHandler.SetAskService(service.NewAskService(chunkService, nil, 0, 0))
pluginHandler := handler.NewPluginHandler(service.NewPluginService())
modelHandler := handler.NewModelHandler(service.NewModelProviderService())
fileCommitHandler := handler.NewFileCommitHandler(service.NewFileCommitService())
// Dify retrieval handler
docDAO := dao.NewDocumentDAO()
@@ -262,7 +263,7 @@ func startServer(config *server.Config) {
adminRuntimeHandler := handler.NewAdminRuntimeHandler(adminRuntimeSelector)
// Initialize router
r := router.NewRouter(authHandler, userHandler, tenantHandler, documentHandler, datasetsHandler, systemHandler, knowledgebaseHandler, chunkHandler, llmHandler, chatHandler, chatSessionHandler, connectorHandler, searchHandler, fileHandler, memoryHandler, mcpHandler, skillSearchHandler, providerHandler, agentHandler, searchBotHandler, difyRetrievalHandler, pluginHandler, modelHandler, adminRuntimeHandler)
r := router.NewRouter(authHandler, userHandler, tenantHandler, documentHandler, datasetsHandler, systemHandler, knowledgebaseHandler, chunkHandler, llmHandler, chatHandler, chatSessionHandler, connectorHandler, searchHandler, fileHandler, memoryHandler, mcpHandler, skillSearchHandler, providerHandler, agentHandler, searchBotHandler, difyRetrievalHandler, pluginHandler, modelHandler, fileCommitHandler, adminRuntimeHandler)
// Create Gin engine
ginEngine := gin.New()

View File

@@ -7544,6 +7544,568 @@ or
The commit message.
- `'files'`: `list[object]` (required)
The list of file changes. Each file change is an object with the following fields:
##### Request example
```bash
curl --request POST \
--url http://{address}/api/v1/folders/{folder_id}/commits \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"message": "update config files",
"files": [
{"file_id": "file_uuid", "file_name": "config.json", "operation": "modify", "content": "{\"key\": \"value\"}"},
{"file_id": "file_uuid", "file_name": "readme.md", "operation": "add", "content": "# New README"}
]
}'
```
##### Request parameters
- `"message"`: (*Body parameter*), `string`, *Required*
The commit message describing the changes.
- `"files"`: (*Body parameter*), `list[object]`, *Required*
Each file change object supports the following fields:
| Field | Type | Required | Description |
|-------|------|----------|-------------|
| `file_id` | `string` | Yes | The file ID |
| `file_name` | `string` | Only for add/rename | The file name |
| `operation` | `string` | Yes | `"add"`, `"modify"`, `"delete"`, or `"rename"` |
| `content` | `string` | Only for add/modify | The file content |
| `old_name` | `string` | Only for rename | The old file name |
| `new_name` | `string` | Only for rename | The new file name |
#### Response
Success:
```json
{
"code": 0,
"data": {
"id": "commit_uuid",
"folder_id": "folder_uuid",
"parent_id": null,
"message": "update config files",
"author_id": "user_uuid",
"file_count": 2,
"tree_state": "{\"file_uuid\": {\"hash\": \"abcd1234\", \"location\": \".objects/abcd1234\", \"name\": \"config.json\", \"size\": 1024, \"status\": \"1\", \"parent_id\": \"folder_uuid\"}}",
"create_time": 1718200000000
}
}
```
:::note
`tree_state` is a JSON string containing a flat map of file entries. Each entry includes `parent_id` to track which sub-folder the file belonged to at commit time. Sub-folders are inferred from `parent_id` values.
:::
Failure:
```json
{
"code": 101,
"message": "required argument are missing: message"
}
```
---
### List commits
**GET** `/api/v1/folders/{folder_id}/commits`
Lists all commits for the specified folder with pagination.
Also available at:
- `/api/v1/workspace/{workspace_id}/commits`
- `/api/v1/datasets/{dataset_id}/commits`
#### Request
- Method: GET
- URL: `/api/v1/folders/{folder_id}/commits`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Query:
- `'page'`: `int` (optional, default: 1)
- `'page_size'`: `int` (optional, default: 15)
- `'order_by'`: `string` (optional, default: `"create_time"`)
- `'desc'`: `bool` (optional, default: `true`)
##### Request example
```bash
curl --request GET \
--url 'http://{address}/api/v1/folders/{folder_id}/commits?page=1&page_size=15' \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
##### Request parameters
- `"page"`: (*Query parameter*), `int`, *Optional*
Page number. Defaults to 1.
- `"page_size"`: (*Query parameter*), `int`, *Optional*
Number of items per page. Defaults to 15.
- `"order_by"`: (*Query parameter*), `string`, *Optional*
Sort field. Defaults to `"create_time"`.
- `"desc"`: (*Query parameter*), `bool`, *Optional*
Sort descending. Defaults to `true`.
#### Response
Success:
```json
{
"code": 0,
"data": {
"total": 2,
"page": 1,
"page_size": 15,
"commits": [
{
"id": "commit_uuid",
"folder_id": "folder_uuid",
"parent_id": null,
"message": "first commit",
"author_id": "user_uuid",
"file_count": 3,
"create_time": 1718200000000
}
]
}
}
```
---
### Get commit
**GET** `/api/v1/folders/{folder_id}/commits/{commit_id}`
Retrieves the details of a specific commit, including its file changes.
Also available at:
- `/api/v1/workspace/{workspace_id}/commits/{commit_id}`
- `/api/v1/datasets/{dataset_id}/commits/{commit_id}`
#### Request
- Method: GET
- URL: `/api/v1/folders/{folder_id}/commits/{commit_id}`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
##### Request example
```bash
curl --request GET \
--url http://{address}/api/v1/folders/{folder_id}/commits/{commit_id} \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
##### Request parameters
- `"folder_id"`: (*Path parameter*), `string`, *Required*
The folder ID.
- `"commit_id"`: (*Path parameter*), `string`, *Required*
The commit ID.
#### Response
Success:
```json
{
"code": 0,
"data": {
"id": "commit_uuid",
"folder_id": "folder_uuid",
"parent_id": null,
"message": "added config files",
"author_id": "user_uuid",
"file_count": 2,
"create_time": 1718200000000,
"files": [
{
"file_id": "file_uuid",
"operation": "add",
"old_hash": null,
"new_hash": "abcd1234",
"old_name": null,
"new_name": null
}
]
}
}
```
Failure:
```json
{
"code": 102,
"message": "Commit not found in workspace"
}
```
---
### List commit files
**GET** `/api/v1/folders/{folder_id}/commits/{commit_id}/files`
Lists the file changes associated with a specific commit.
Also available at:
- `/api/v1/workspace/{workspace_id}/commits/{commit_id}/files`
- `/api/v1/datasets/{dataset_id}/commits/{commit_id}/files`
#### Request
- Method: GET
- URL: `/api/v1/folders/{folder_id}/commits/{commit_id}/files`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
##### Request example
```bash
curl --request GET \
--url http://{address}/api/v1/folders/{folder_id}/commits/{commit_id}/files \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
#### Response
Success:
```json
{
"code": 0,
"data": [
{
"id": "item_uuid",
"file_id": "file_uuid",
"operation": "add",
"old_hash": null,
"new_hash": "abcd1234",
"old_location": null,
"new_location": ".objects/abcd1234",
"old_name": null,
"new_name": null
}
]
}
```
---
### Diff commits
**GET** `/api/v1/folders/{folder_id}/commits/diff?from={commit_id}&to={commit_id}`
Compares two commits and returns the differences.
Also available at:
- `/api/v1/workspace/{workspace_id}/commits/diff?from=...&to=...`
- `/api/v1/datasets/{dataset_id}/commits/diff?from=...&to=...`
#### Request
- Method: GET
- URL: `/api/v1/folders/{folder_id}/commits/diff`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Query:
- `'from'`: `string` (required)
The source commit ID.
- `'to'`: `string` (required)
The target commit ID.
##### Request example
```bash
curl --request GET \
--url 'http://{address}/api/v1/folders/{folder_id}/commits/diff?from=from_commit_id&to=to_commit_id' \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
##### Request parameters
- `"from"`: (*Query parameter*), `string`, *Required*
The source commit ID.
- `"to"`: (*Query parameter*), `string`, *Required*
The target commit ID.
#### Response
Success:
```json
{
"code": 0,
"data": [
{
"file_id": "file_uuid",
"file_name": "config.json",
"operation": "modify",
"old_hash": "abc123",
"new_hash": "def456",
"old_location": ".objects/abc123",
"new_location": ".objects/def456"
}
]
}
```
Failure:
```json
{
"code": 102,
"message": "Commit not found in workspace"
}
```
---
### Get uncommitted changes
**GET** `/api/v1/folders/{folder_id}/changes`
Returns the uncommitted changes for the specified folder (similar to `git status`).
Also available at:
- `/api/v1/workspace/{workspace_id}/changes`
- `/api/v1/datasets/{dataset_id}/changes`
#### Request
- Method: GET
- URL: `/api/v1/folders/{folder_id}/changes`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
##### Request example
```bash
curl --request GET \
--url http://{address}/api/v1/folders/{folder_id}/changes \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
#### Response
Success:
```json
{
"code": 0,
"data": [
{
"file_id": "file_uuid",
"file_name": "new.txt",
"operation": "add"
},
{
"file_id": "file_uuid",
"file_name": "config.json",
"operation": "modify"
},
{
"file_id": "file_uuid",
"file_name": "old.md",
"operation": "delete"
}
]
}
```
---
### Get commit tree
**GET** `/api/v1/folders/{folder_id}/commits/{commit_id}/tree`
Retrieves the full folder tree snapshot as it existed at a specific commit.
Also available at:
- `/api/v1/workspace/{workspace_id}/commits/{commit_id}/tree`
- `/api/v1/datasets/{dataset_id}/commits/{commit_id}/tree`
#### Request
- Method: GET
- URL: `/api/v1/folders/{folder_id}/commits/{commit_id}/tree`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
##### Request example
```bash
curl --request GET \
--url http://{address}/api/v1/folders/{folder_id}/commits/{commit_id}/tree \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
#### Response
Success:
```json
{
"code": 0,
"data": {
"id": "folder_uuid",
"name": "workspace_name",
"type": "folder",
"children": [
{
"id": "file_uuid",
"name": "config.json",
"type": "file",
"hash": "abcd1234",
"size": 1024,
"status": "1",
"location": ".objects/abcd1234"
},
{
"id": "sub_folder_uuid",
"name": "sub_folder_name",
"type": "folder",
"children": [
{
"id": "file_uuid_2",
"name": "nested.txt",
"type": "file",
"hash": "ef5678",
"size": 512,
"status": "1",
"location": ".objects/ef5678"
}
]
}
]
}
}
```
---
### Get commit file content
**GET** `/api/v1/folders/{folder_id}/commits/{commit_id}/files/{file_id}/content`
Retrieves the file content as it existed at a specific commit.
Also available at:
- `/api/v1/workspace/{workspace_id}/commits/{commit_id}/files/{file_id}/content`
- `/api/v1/datasets/{dataset_id}/commits/{commit_id}/files/{file_id}/content`
#### Request
- Method: GET
- URL: `/api/v1/folders/{folder_id}/commits/{commit_id}/files/{file_id}/content`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
##### Request example
```bash
curl --request GET \
--url http://{address}/api/v1/folders/{folder_id}/commits/{commit_id}/files/{file_id}/content \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
#### Response
Success:
```json
{
"code": 0,
"data": {
"content": "file content as it existed in that commit"
}
}
```
Failure:
```json
{
"code": 102,
"message": "File not found in this commit"
}
```
---
### Get file version history
**GET** `/api/v1/files/{file_id}/versions`
Returns the version history for a specific file across all commits.
#### Request
- Method: GET
- URL: `/api/v1/files/{file_id}/versions`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
##### Request example
```bash
curl --request GET \
--url http://{address}/api/v1/files/{file_id}/versions \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
#### Response
Success:
```json
{
"code": 0,
"data": [
{
"commit_id": "commit_uuid",
"operation": "modify",
"hash": "def456",
"create_time": 1718200000000,
"message": "updated file"
},
{
"commit_id": "commit_uuid",
"operation": "add",
"hash": "abc123",
"create_time": 1718100000000,
"message": "initial commit"
}
]
}
```
---
## SEARCH APP MANAGEMENT
### Create search app
**POST** `/api/v1/searches`
Creates a search app.
#### Request
- Method: POST
- URL: `/api/v1/searches`
- Headers:
- `'Content-Type: application/json'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
```json
{
"name": "my_search_app",
"description": "optional description"
}
```

View File

@@ -153,6 +153,8 @@ func InitDB() error {
&entity.IngestionTaskLog{},
&entity.IngestionTasklet{},
&entity.IngestionTaskletLog{},
&entity.FileCommit{},
&entity.FileCommitItem{},
}
for _, m := range dataModels {

View File

@@ -243,6 +243,20 @@ func (dao *FileDAO) ListAllFilesByParentID(parentID string) ([]*entity.File, err
return files, err
}
// ListNonFolderByParentID lists non-folder files directly under a parent folder.
func (dao *FileDAO) ListNonFolderByParentID(parentID string) ([]*entity.File, error) {
var files []*entity.File
err := DB.Where("parent_id = ? AND id != ? AND type != ?", parentID, parentID, "folder").Find(&files).Error
return files, err
}
// ListFolderByParentID lists sub-folders directly under a parent folder.
func (dao *FileDAO) ListFolderByParentID(parentID string) ([]*entity.File, error) {
var files []*entity.File
err := DB.Where("parent_id = ? AND type = ?", parentID, "folder").Find(&files).Error
return files, err
}
// GetByParentIDAndName gets file by parent folder ID and name
func (dao *FileDAO) GetByParentIDAndName(parentID, name string) (*entity.File, error) {
var file entity.File

154
internal/dao/file_commit.go Normal file
View File

@@ -0,0 +1,154 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package dao
import (
"ragflow/internal/entity"
"strings"
"github.com/google/uuid"
)
// FileCommitDAO file commit data access object
type FileCommitDAO struct{}
// NewFileCommitDAO create file commit DAO
func NewFileCommitDAO() *FileCommitDAO {
return &FileCommitDAO{}
}
// GetByID gets a file commit by ID
func (dao *FileCommitDAO) GetByID(id string) (*entity.FileCommit, error) {
var commit entity.FileCommit
err := DB.Where("id = ?", id).First(&commit).Error
if err != nil {
return nil, err
}
return &commit, nil
}
// Create creates a new file commit record
func (dao *FileCommitDAO) Create(commit *entity.FileCommit) error {
return DB.Create(commit).Error
}
// UpdateTreeState updates the tree_state field for a commit
func (dao *FileCommitDAO) UpdateTreeState(id string, treeState string) error {
return DB.Model(&entity.FileCommit{}).Where("id = ?", id).Update("tree_state", treeState).Error
}
// GetLatestByFolderID gets the latest (most recent) commit for a folder
func (dao *FileCommitDAO) GetLatestByFolderID(folderID string) (*entity.FileCommit, error) {
var commit entity.FileCommit
err := DB.Where("folder_id = ?", folderID).
Order("create_time DESC").
First(&commit).Error
if err != nil {
return nil, err
}
return &commit, nil
}
// allowedFileCommitSorts is the whitelist of safe column names for
// ListByFolderID's orderBy parameter to prevent SQL injection.
var allowedFileCommitSorts = map[string]string{
"create_time": "create_time",
"create_date": "create_date",
"update_time": "update_time",
"update_date": "update_date",
}
// ListByFolderID lists commits for a folder with pagination
func (dao *FileCommitDAO) ListByFolderID(folderID string, page, pageSize int, orderBy string, desc bool) ([]*entity.FileCommit, int64, error) {
var commits []*entity.FileCommit
var total int64
query := DB.Model(&entity.FileCommit{}).Where("folder_id = ?", folderID)
if err := query.Count(&total).Error; err != nil {
return nil, 0, err
}
// Sanitize orderBy against whitelist; fall back to create_time.
safeCol, ok := allowedFileCommitSorts[orderBy]
if !ok {
safeCol = "create_time"
}
orderDirection := "DESC"
if !desc {
orderDirection = "ASC"
}
orderClause := safeCol + " " + orderDirection
if page > 0 && pageSize > 0 {
offset := (page - 1) * pageSize
if err := query.Order(orderClause).Offset(offset).Limit(pageSize).Find(&commits).Error; err != nil {
return nil, 0, err
}
} else {
if err := query.Order(orderClause).Find(&commits).Error; err != nil {
return nil, 0, err
}
}
return commits, total, nil
}
// FileCommitItemDAO file commit item data access object
type FileCommitItemDAO struct{}
// NewFileCommitItemDAO create file commit item DAO
func NewFileCommitItemDAO() *FileCommitItemDAO {
return &FileCommitItemDAO{}
}
// Create creates a new file commit item record
func (dao *FileCommitItemDAO) Create(item *entity.FileCommitItem) error {
return DB.Create(item).Error
}
// ListByCommitID lists all items for a commit
func (dao *FileCommitItemDAO) ListByCommitID(commitID string) ([]*entity.FileCommitItem, error) {
var items []*entity.FileCommitItem
err := DB.Where("commit_id = ?", commitID).Order("create_time ASC").Find(&items).Error
return items, err
}
// ListByFileID lists all commit items for a specific file (for version history)
func (dao *FileCommitItemDAO) ListByFileID(fileID string) ([]*entity.FileCommitItem, error) {
var items []*entity.FileCommitItem
err := DB.Where("file_id = ?", fileID).Order("create_time DESC").Find(&items).Error
return items, err
}
// GetByCommitIDAndFileID gets a single commit item by commit and file ID
func (dao *FileCommitItemDAO) GetByCommitIDAndFileID(commitID, fileID string) (*entity.FileCommitItem, error) {
var item entity.FileCommitItem
err := DB.Where("commit_id = ? AND file_id = ?", commitID, fileID).First(&item).Error
if err != nil {
return nil, err
}
return &item, nil
}
// generateCommitUUID generates a UUID for commit/commit_item IDs
func generateCommitUUID() string {
id := uuid.New().String()
return strings.ReplaceAll(id, "-", "")
}

View File

@@ -0,0 +1,108 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package entity
// FileCommit represents a snapshot commit for a workspace folder (like git commit).
type FileCommit struct {
ID string `gorm:"column:id;primaryKey;size:32" json:"id"`
FolderID string `gorm:"column:folder_id;size:32;not null;index" json:"folder_id"`
ParentID *string `gorm:"column:parent_id;size:32;index" json:"parent_id,omitempty"`
Message string `gorm:"column:message;size:512;not null;default:''" json:"message"`
AuthorID string `gorm:"column:author_id;size:32;not null;index" json:"author_id"`
FileCount int `gorm:"column:file_count;default:0" json:"file_count"`
TreeState *string `gorm:"column:tree_state;type:longtext" json:"tree_state,omitempty"`
BaseModel
}
// TableName returns the table name for FileCommit model.
func (FileCommit) TableName() string {
return "file_commit"
}
// FileCommitItem represents a single file change within a commit.
type FileCommitItem struct {
ID string `gorm:"column:id;primaryKey;size:32" json:"id"`
CommitID string `gorm:"column:commit_id;size:32;not null;uniqueIndex:idx_commit_file" json:"commit_id"`
FileID string `gorm:"column:file_id;size:32;not null;uniqueIndex:idx_commit_file" json:"file_id"`
Operation string `gorm:"column:operation;size:16;not null;index" json:"operation"`
OldHash *string `gorm:"column:old_hash;size:64;index" json:"old_hash,omitempty"`
NewHash *string `gorm:"column:new_hash;size:64;index" json:"new_hash,omitempty"`
OldLocation *string `gorm:"column:old_location;size:255" json:"old_location,omitempty"`
NewLocation *string `gorm:"column:new_location;size:255" json:"new_location,omitempty"`
OldName *string `gorm:"column:old_name;size:255" json:"old_name,omitempty"`
NewName *string `gorm:"column:new_name;size:255" json:"new_name,omitempty"`
BaseModel
}
// TableName returns the table name for FileCommitItem model.
func (FileCommitItem) TableName() string {
return "file_commit_item"
}
// TreeNode represents a file node in the commit tree state snapshot.
type TreeNode struct {
ID string `json:"id"`
Name string `json:"name"`
Type string `json:"type"` // "file" or "folder"
Hash string `json:"hash,omitempty"`
Location string `json:"location,omitempty"`
Size int64 `json:"size,omitempty"`
Status string `json:"status"` // "1" = active, "0" = deleted
Children []*TreeNode `json:"children,omitempty"`
}
// FileChange represents a file change in a commit request.
type FileChange struct {
FileID string `json:"file_id"`
FileName string `json:"file_name"`
Operation string `json:"operation"` // "add", "modify", "delete", "rename"
Content string `json:"content,omitempty"`
OldName string `json:"old_name,omitempty"`
NewName string `json:"new_name,omitempty"`
}
// CommitResponse is the API response for a commit.
type CommitResponse struct {
ID string `json:"id"`
FolderID string `json:"folder_id"`
ParentID *string `json:"parent_id,omitempty"`
Message string `json:"message"`
AuthorID string `json:"author_id"`
FileCount int `json:"file_count"`
TreeState *string `json:"tree_state,omitempty"`
CreateTime *int64 `json:"create_time,omitempty"`
}
// DiffEntry represents a single diff entry between two commits.
type DiffEntry struct {
FileID string `json:"file_id"`
FileName string `json:"file_name"`
Operation string `json:"operation"`
OldHash *string `json:"old_hash,omitempty"`
NewHash *string `json:"new_hash,omitempty"`
OldLocation *string `json:"old_location,omitempty"`
NewLocation *string `json:"new_location,omitempty"`
}
// VersionEntry represents a single version in a file's version history.
type VersionEntry struct {
CommitID string `json:"commit_id"`
Operation string `json:"operation"`
Hash string `json:"hash"`
CreateTime *int64 `json:"create_time,omitempty"`
Message string `json:"message"`
}

View File

@@ -0,0 +1,584 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package handler
import (
"fmt"
"net/http"
"ragflow/internal/common"
"ragflow/internal/dao"
"ragflow/internal/entity"
"strconv"
"github.com/gin-gonic/gin"
)
// fileCommitService is the consumer-side interface for FileCommitHandler's service dependency.
type fileCommitService interface {
CreateCommit(folderID, authorID, message string, changes []entity.FileChange) (*entity.FileCommit, error)
ListCommits(folderID string, page, pageSize int, orderBy string, desc bool) ([]*entity.FileCommit, int64, error)
GetCommit(commitID string) (*entity.FileCommit, error)
ListCommitFiles(commitID string) ([]*entity.FileCommitItem, error)
DiffCommits(fromID, toID string) ([]entity.DiffEntry, error)
GetUncommittedChanges(folderID string) ([]entity.DiffEntry, error)
GetCommitTree(commitID string) (map[string]interface{}, error)
GetCommitFileContent(folderID, commitID, fileID string) ([]byte, error)
GetFileVersionHistory(fileID string) ([]entity.VersionEntry, error)
}
// FileCommitHandler file commit handler
type FileCommitHandler struct {
commitService fileCommitService
kbDAO *dao.KnowledgebaseDAO
fileDAO *dao.FileDAO
}
// NewFileCommitHandler create file commit handler
func NewFileCommitHandler(commitService fileCommitService) *FileCommitHandler {
return &FileCommitHandler{
commitService: commitService,
kbDAO: dao.NewKnowledgebaseDAO(),
fileDAO: dao.NewFileDAO(),
}
}
// ResolveFolderID resolves a resource ID (dataset/memory/skill) to its folder_id.
// entityType is the plural resource name (e.g. "datasets", "memories", "skills").
func (h *FileCommitHandler) ResolveFolderID(entityType, entityID string) (string, error) {
switch entityType {
case "datasets":
return h.resolveDatasetFolderID(entityID)
default:
return "", fmt.Errorf("unsupported entity type: %s", entityType)
}
}
// CommitFolderResolver returns a Gin middleware that resolves an entity ID
// (e.g. dataset_id, memory_id, skill_id) to a folder_id and injects it into
// c.Params so that the standard commit handlers can read it via c.Param("folder_id").
//
// entityType must match the key used in ResolveFolderID (e.g. "datasets").
// urlParam is the gin URL param name (e.g. "dataset_id").
func CommitFolderResolver(h *FileCommitHandler, entityType, urlParam string) gin.HandlerFunc {
return func(c *gin.Context) {
id := c.Param(urlParam)
if id == "" {
jsonError(c, common.CodeParamError, fmt.Sprintf("%s is required", urlParam))
c.Abort()
return
}
folderID, err := h.ResolveFolderID(entityType, id)
if err != nil {
jsonError(c, common.CodeNotFound, fmt.Sprintf("%s folder not found", entityType))
c.Abort()
return
}
c.Params = append(c.Params, gin.Param{Key: "folder_id", Value: folderID})
c.Next()
}
}
func (h *FileCommitHandler) resolveDatasetFolderID(datasetID string) (string, error) {
kb, err := h.kbDAO.GetByID(datasetID)
if err != nil {
return "", err
}
files := h.fileDAO.Query(kb.Name, "")
for _, f := range files {
if f.SourceType == string(entity.FileSourceKnowledgebase) && f.Type == "folder" && f.TenantID == kb.TenantID {
return f.ID, nil
}
}
return "", common.ErrNotFound
}
// CreateCommitRequest represents the request body for creating a commit
type CreateCommitRequest struct {
Message string `json:"message" binding:"required"`
Files []entity.FileChange `json:"files" binding:"required"`
}
// CreateCommit creates a new commit for a workspace folder
// @Summary Create Commit
// @Description Create a new commit with file changes for a workspace folder
// @Tags file_commit
// @Accept json
// @Produce json
// @Param folder_id path string true "workspace folder ID"
// @Param body body CreateCommitRequest true "commit request"
// @Success 200 {object} map[string]interface{}
// @Router /api/v1/workspaces/{folder_id}/commits [post]
func (h *FileCommitHandler) CreateCommit(c *gin.Context) {
user, errorCode, errorMessage := GetUser(c)
if errorCode != common.CodeSuccess {
jsonError(c, errorCode, errorMessage)
return
}
folderID := c.Param("folder_id")
if folderID == "" {
jsonError(c, common.CodeParamError, "folder_id is required")
return
}
var req CreateCommitRequest
if err := c.ShouldBindJSON(&req); err != nil {
jsonError(c, common.CodeBadRequest, err.Error())
return
}
commit, err := h.commitService.CreateCommit(folderID, user.ID, req.Message, req.Files)
if err != nil {
jsonInternalError(c, err)
return
}
ct := int64(0)
if commit.CreateTime != nil {
ct = *commit.CreateTime
}
c.JSON(http.StatusOK, gin.H{
"code": common.CodeSuccess,
"data": entity.CommitResponse{
ID: commit.ID,
FolderID: commit.FolderID,
ParentID: commit.ParentID,
Message: commit.Message,
AuthorID: commit.AuthorID,
FileCount: commit.FileCount,
TreeState: commit.TreeState,
CreateTime: &ct,
},
"message": common.CodeSuccess.Message(),
})
}
// ListCommits lists commits for a workspace folder
// @Summary List Commits
// @Description List all commits for a workspace folder with pagination
// @Tags file_commit
// @Accept json
// @Produce json
// @Param folder_id path string true "workspace folder ID"
// @Param page query int false "page number"
// @Param page_size query int false "items per page"
// @Param order_by query string false "order by field"
// @Param desc query bool false "descending order"
// @Success 200 {object} map[string]interface{}
// @Router /api/v1/workspaces/{folder_id}/commits [get]
func (h *FileCommitHandler) ListCommits(c *gin.Context) {
_, errorCode, errorMessage := GetUser(c)
if errorCode != common.CodeSuccess {
jsonError(c, errorCode, errorMessage)
return
}
folderID := c.Param("folder_id")
if folderID == "" {
jsonError(c, common.CodeParamError, "folder_id is required")
return
}
page := 1
if pageStr := c.Query("page"); pageStr != "" {
if p, err := strconv.Atoi(pageStr); err == nil && p >= 1 {
page = p
}
}
pageSize := 15
if psStr := c.Query("page_size"); psStr != "" {
if ps, err := strconv.Atoi(psStr); err == nil {
if ps < 1 {
ps = 15
}
pageSize = ps
}
}
orderBy := c.DefaultQuery("order_by", "create_time")
desc := true
if descStr := c.Query("desc"); descStr != "" {
desc = descStr != "false"
}
commits, total, err := h.commitService.ListCommits(folderID, page, pageSize, orderBy, desc)
if err != nil {
jsonInternalError(c, err)
return
}
var commitList []entity.CommitResponse
for _, commit := range commits {
var ct int64
if commit.CreateTime != nil {
ct = *commit.CreateTime
}
commitList = append(commitList, entity.CommitResponse{
ID: commit.ID,
FolderID: commit.FolderID,
ParentID: commit.ParentID,
Message: commit.Message,
AuthorID: commit.AuthorID,
FileCount: commit.FileCount,
CreateTime: &ct,
})
}
c.JSON(http.StatusOK, gin.H{
"code": common.CodeSuccess,
"data": gin.H{
"total": total,
"page": page,
"page_size": pageSize,
"commits": commitList,
},
"message": common.CodeSuccess.Message(),
})
}
// GetCommit gets details of a single commit
// @Summary Get Commit
// @Description Get details of a single commit including file changes
// @Tags file_commit
// @Accept json
// @Produce json
// @Param folder_id path string true "workspace folder ID"
// @Param commit_id path string true "commit ID"
// @Success 200 {object} map[string]interface{}
// @Router /api/v1/workspaces/{folder_id}/commits/{commit_id} [get]
func (h *FileCommitHandler) GetCommit(c *gin.Context) {
_, errorCode, errorMessage := GetUser(c)
if errorCode != common.CodeSuccess {
jsonError(c, errorCode, errorMessage)
return
}
folderID := c.Param("folder_id")
commitID := c.Param("commit_id")
if commitID == "" {
jsonError(c, common.CodeParamError, "commit_id is required")
return
}
commit, err := h.commitService.GetCommit(commitID)
if err != nil {
jsonError(c, common.CodeNotFound, "Commit not found")
return
}
if commit.FolderID != folderID {
jsonError(c, common.CodeNotFound, "Commit not found in workspace")
return
}
items, err := h.commitService.ListCommitFiles(commitID)
if err != nil {
items = []*entity.FileCommitItem{}
}
var ct int64
if commit.CreateTime != nil {
ct = *commit.CreateTime
}
c.JSON(http.StatusOK, gin.H{
"code": common.CodeSuccess,
"data": gin.H{
"id": commit.ID,
"folder_id": commit.FolderID,
"parent_id": commit.ParentID,
"message": commit.Message,
"author_id": commit.AuthorID,
"file_count": commit.FileCount,
"create_time": ct,
"files": items,
},
"message": common.CodeSuccess.Message(),
})
}
// ListCommitFiles lists all file changes in a commit
// @Summary List Commit Files
// @Description List all file changes in a given commit
// @Tags file_commit
// @Accept json
// @Produce json
// @Param folder_id path string true "workspace folder ID"
// @Param commit_id path string true "commit ID"
// @Success 200 {object} map[string]interface{}
// @Router /api/v1/workspaces/{folder_id}/commits/{commit_id}/files [get]
func (h *FileCommitHandler) ListCommitFiles(c *gin.Context) {
_, errorCode, errorMessage := GetUser(c)
if errorCode != common.CodeSuccess {
jsonError(c, errorCode, errorMessage)
return
}
folderID := c.Param("folder_id")
commitID := c.Param("commit_id")
if commitID == "" {
jsonError(c, common.CodeParamError, "commit_id is required")
return
}
commit, err := h.commitService.GetCommit(commitID)
if err != nil {
jsonError(c, common.CodeNotFound, "Commit not found")
return
}
if commit.FolderID != folderID {
jsonError(c, common.CodeNotFound, "Commit not found in workspace")
return
}
items, err := h.commitService.ListCommitFiles(commitID)
if err != nil {
jsonInternalError(c, err)
return
}
c.JSON(http.StatusOK, gin.H{
"code": common.CodeSuccess,
"data": items,
"message": common.CodeSuccess.Message(),
})
}
// DiffCommits compares two commits
// @Summary Diff Commits
// @Description Compare two commits and return the diff
// @Tags file_commit
// @Accept json
// @Produce json
// @Param folder_id path string true "workspace folder ID"
// @Param from query string true "from commit ID"
// @Param to query string true "to commit ID"
// @Success 200 {object} map[string]interface{}
// @Router /api/v1/workspaces/{folder_id}/commits/diff [get]
func (h *FileCommitHandler) DiffCommits(c *gin.Context) {
_, errorCode, errorMessage := GetUser(c)
if errorCode != common.CodeSuccess {
jsonError(c, errorCode, errorMessage)
return
}
folderID := c.Param("folder_id")
fromID := c.Query("from")
toID := c.Query("to")
if fromID == "" || toID == "" {
jsonError(c, common.CodeParamError, "'from' and 'to' query parameters are required")
return
}
fromCommit, err := h.commitService.GetCommit(fromID)
if err != nil {
jsonError(c, common.CodeNotFound, "Commit not found")
return
}
toCommit, err := h.commitService.GetCommit(toID)
if err != nil {
jsonError(c, common.CodeNotFound, "Commit not found")
return
}
if fromCommit.FolderID != folderID || toCommit.FolderID != folderID {
jsonError(c, common.CodeNotFound, "Commit not found in workspace")
return
}
diff, err := h.commitService.DiffCommits(fromID, toID)
if err != nil {
jsonInternalError(c, err)
return
}
c.JSON(http.StatusOK, gin.H{
"code": common.CodeSuccess,
"data": diff,
"message": common.CodeSuccess.Message(),
})
}
// GetUncommittedChanges gets uncommitted changes
// @Summary Get Uncommitted Changes
// @Description Get uncommitted changes for a workspace folder (like git status)
// @Tags file_commit
// @Accept json
// @Produce json
// @Param folder_id path string true "workspace folder ID"
// @Success 200 {object} map[string]interface{}
// @Router /api/v1/workspaces/{folder_id}/changes [get]
func (h *FileCommitHandler) GetUncommittedChanges(c *gin.Context) {
_, errorCode, errorMessage := GetUser(c)
if errorCode != common.CodeSuccess {
jsonError(c, errorCode, errorMessage)
return
}
folderID := c.Param("folder_id")
if folderID == "" {
jsonError(c, common.CodeParamError, "folder_id is required")
return
}
changes, err := h.commitService.GetUncommittedChanges(folderID)
if err != nil {
jsonInternalError(c, err)
return
}
c.JSON(http.StatusOK, gin.H{
"code": common.CodeSuccess,
"data": changes,
"message": common.CodeSuccess.Message(),
})
}
// GetCommitTree gets the folder tree snapshot for a commit
// @Summary Get Commit Tree
// @Description Get the folder tree snapshot for a given commit
// @Tags file_commit
// @Accept json
// @Produce json
// @Param folder_id path string true "workspace folder ID"
// @Param commit_id path string true "commit ID"
// @Success 200 {object} map[string]interface{}
// @Router /api/v1/workspaces/{folder_id}/commits/{commit_id}/tree [get]
func (h *FileCommitHandler) GetCommitTree(c *gin.Context) {
_, errorCode, errorMessage := GetUser(c)
if errorCode != common.CodeSuccess {
jsonError(c, errorCode, errorMessage)
return
}
folderID := c.Param("folder_id")
commitID := c.Param("commit_id")
if commitID == "" {
jsonError(c, common.CodeParamError, "commit_id is required")
return
}
commit, err := h.commitService.GetCommit(commitID)
if err != nil {
jsonError(c, common.CodeNotFound, "Commit not found")
return
}
if commit.FolderID != folderID {
jsonError(c, common.CodeNotFound, "Commit not found in workspace")
return
}
tree, err := h.commitService.GetCommitTree(commitID)
if err != nil {
jsonInternalError(c, err)
return
}
c.JSON(http.StatusOK, gin.H{
"code": common.CodeSuccess,
"data": tree,
"message": common.CodeSuccess.Message(),
})
}
// GetCommitFileContent gets file content as it existed in a given commit
// @Summary Get Commit File Content
// @Description Get file content as it existed in a specific commit
// @Tags file_commit
// @Accept json
// @Produce json
// @Param folder_id path string true "workspace folder ID"
// @Param commit_id path string true "commit ID"
// @Param file_id path string true "file ID"
// @Success 200 {object} map[string]interface{}
// @Router /api/v1/workspaces/{folder_id}/commits/{commit_id}/files/{file_id}/content [get]
func (h *FileCommitHandler) GetCommitFileContent(c *gin.Context) {
_, errorCode, errorMessage := GetUser(c)
if errorCode != common.CodeSuccess {
jsonError(c, errorCode, errorMessage)
return
}
folderID := c.Param("folder_id")
commitID := c.Param("commit_id")
fileID := c.Param("file_id")
if folderID == "" || commitID == "" || fileID == "" {
jsonError(c, common.CodeParamError, "folder_id, commit_id, and file_id are required")
return
}
commit, err := h.commitService.GetCommit(commitID)
if err != nil {
jsonError(c, common.CodeNotFound, "Commit not found")
return
}
if commit.FolderID != folderID {
jsonError(c, common.CodeNotFound, "Commit not found in workspace")
return
}
content, err := h.commitService.GetCommitFileContent(folderID, commitID, fileID)
if err != nil {
jsonError(c, common.CodeNotFound, err.Error())
return
}
c.JSON(http.StatusOK, gin.H{
"code": common.CodeSuccess,
"data": gin.H{
"content": string(content),
},
"message": common.CodeSuccess.Message(),
})
}
// GetFileVersionHistory gets version history for a specific file
// @Summary Get File Version History
// @Description Get version history for a specific file across all commits
// @Tags file_commit
// @Accept json
// @Produce json
// @Param file_id path string true "file ID"
// @Success 200 {object} map[string]interface{}
// @Router /api/v1/files/{file_id}/versions [get]
func (h *FileCommitHandler) GetFileVersionHistory(c *gin.Context) {
_, errorCode, errorMessage := GetUser(c)
if errorCode != common.CodeSuccess {
jsonError(c, errorCode, errorMessage)
return
}
fileID := c.Param("id")
if fileID == "" {
jsonError(c, common.CodeParamError, "file_id is required")
return
}
versions, err := h.commitService.GetFileVersionHistory(fileID)
if err != nil {
jsonInternalError(c, err)
return
}
c.JSON(http.StatusOK, gin.H{
"code": common.CodeSuccess,
"data": versions,
"message": common.CodeSuccess.Message(),
})
}

View File

@@ -0,0 +1,413 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package handler
import (
"encoding/json"
"net/http"
"net/http/httptest"
"strings"
"testing"
"ragflow/internal/common"
"ragflow/internal/entity"
"github.com/gin-gonic/gin"
)
// mockFileCommitSvc implements FileCommitServiceInterface for testing
type mockFileCommitSvc struct {
createCommitFn func(folderID, authorID, message string, changes []entity.FileChange) (*entity.FileCommit, error)
listCommitsFn func(folderID string, page, pageSize int, orderBy string, desc bool) ([]*entity.FileCommit, int64, error)
getCommitFn func(commitID string) (*entity.FileCommit, error)
listCommitFilesFn func(commitID string) ([]*entity.FileCommitItem, error)
diffCommitsFn func(fromID, toID string) ([]entity.DiffEntry, error)
getUncommittedChangesFn func(folderID string) ([]entity.DiffEntry, error)
getCommitTreeFn func(commitID string) (map[string]interface{}, error)
getCommitFileContentFn func(folderID, commitID, fileID string) ([]byte, error)
getFileVersionHistoryFn func(fileID string) ([]entity.VersionEntry, error)
}
func (m *mockFileCommitSvc) CreateCommit(folderID, authorID, message string, changes []entity.FileChange) (*entity.FileCommit, error) {
if m.createCommitFn != nil {
return m.createCommitFn(folderID, authorID, message, changes)
}
return &entity.FileCommit{
ID: "commit-1",
FolderID: folderID,
Message: message,
AuthorID: authorID,
FileCount: len(changes),
}, nil
}
func (m *mockFileCommitSvc) ListCommits(folderID string, page, pageSize int, orderBy string, desc bool) ([]*entity.FileCommit, int64, error) {
if m.listCommitsFn != nil {
return m.listCommitsFn(folderID, page, pageSize, orderBy, desc)
}
now := int64(1718200000000)
return []*entity.FileCommit{
{ID: "c2", FolderID: folderID, Message: "second", AuthorID: "u1", FileCount: 1, BaseModel: entity.BaseModel{CreateTime: &now}},
{ID: "c1", FolderID: folderID, Message: "first", AuthorID: "u1", FileCount: 2, BaseModel: entity.BaseModel{CreateTime: &now}},
}, 2, nil
}
func (m *mockFileCommitSvc) GetCommit(commitID string) (*entity.FileCommit, error) {
if m.getCommitFn != nil {
return m.getCommitFn(commitID)
}
return &entity.FileCommit{ID: commitID, FolderID: "folder-1", Message: "test commit", AuthorID: "u1", FileCount: 1}, nil
}
func (m *mockFileCommitSvc) ListCommitFiles(commitID string) ([]*entity.FileCommitItem, error) {
if m.listCommitFilesFn != nil {
return m.listCommitFilesFn(commitID)
}
return []*entity.FileCommitItem{
{ID: "i1", CommitID: commitID, FileID: "f1", Operation: "add"},
}, nil
}
func (m *mockFileCommitSvc) DiffCommits(fromID, toID string) ([]entity.DiffEntry, error) {
if m.diffCommitsFn != nil {
return m.diffCommitsFn(fromID, toID)
}
return []entity.DiffEntry{
{FileID: "f1", FileName: "file.txt", Operation: "modify"},
}, nil
}
func (m *mockFileCommitSvc) GetUncommittedChanges(folderID string) ([]entity.DiffEntry, error) {
if m.getUncommittedChangesFn != nil {
return m.getUncommittedChangesFn(folderID)
}
return []entity.DiffEntry{
{FileID: "f1", FileName: "new.txt", Operation: "add"},
}, nil
}
func (m *mockFileCommitSvc) GetCommitTree(commitID string) (map[string]interface{}, error) {
if m.getCommitTreeFn != nil {
return m.getCommitTreeFn(commitID)
}
return map[string]interface{}{
"f1": map[string]interface{}{"name": "file.txt", "hash": "abc123", "size": 100, "status": "1"},
}, nil
}
func (m *mockFileCommitSvc) GetCommitFileContent(folderID, commitID, fileID string) ([]byte, error) {
if m.getCommitFileContentFn != nil {
return m.getCommitFileContentFn(folderID, commitID, fileID)
}
return []byte("file content"), nil
}
func (m *mockFileCommitSvc) GetFileVersionHistory(fileID string) ([]entity.VersionEntry, error) {
if m.getFileVersionHistoryFn != nil {
return m.getFileVersionHistoryFn(fileID)
}
now := int64(1718200000000)
return []entity.VersionEntry{
{CommitID: "c2", Operation: "modify", Hash: "def456", CreateTime: &now, Message: "updated"},
{CommitID: "c1", Operation: "add", Hash: "abc123", CreateTime: &now, Message: "initial"},
}, nil
}
func setupFileCommitTest(userID string) (*gin.Engine, *mockFileCommitSvc) {
mock := &mockFileCommitSvc{}
h := &FileCommitHandler{commitService: mock}
gin.SetMode(gin.TestMode)
r := gin.New()
r.Use(func(c *gin.Context) {
c.Set("user", &entity.User{ID: userID})
})
r.POST("/api/v1/folders/:folder_id/commits", h.CreateCommit)
r.GET("/api/v1/folders/:folder_id/commits", h.ListCommits)
r.GET("/api/v1/folders/:folder_id/commits/:commit_id", h.GetCommit)
r.GET("/api/v1/folders/:folder_id/commits/:commit_id/files", h.ListCommitFiles)
r.GET("/api/v1/folders/:folder_id/commits/diff", h.DiffCommits)
r.GET("/api/v1/folders/:folder_id/changes", h.GetUncommittedChanges)
r.GET("/api/v1/folders/:folder_id/commits/:commit_id/tree", h.GetCommitTree)
r.GET("/api/v1/folders/:folder_id/commits/:commit_id/files/:file_id/content", h.GetCommitFileContent)
r.GET("/api/v1/files/:id/versions", h.GetFileVersionHistory)
return r, mock
}
func setupFileCommitTestNoAuth() *gin.Engine {
h := &FileCommitHandler{}
gin.SetMode(gin.TestMode)
r := gin.New()
r.POST("/api/v1/folders/:folder_id/commits", h.CreateCommit)
return r
}
// ── Tests ────────────────────────────────────────────────────────────────
func TestFileCommit_CreateCommit_Success(t *testing.T) {
r, mock := setupFileCommitTest("user-1")
mock.createCommitFn = func(folderID, authorID, message string, changes []entity.FileChange) (*entity.FileCommit, error) {
if folderID != "folder-1" {
t.Errorf("expected folder-1, got %s", folderID)
}
if authorID != "user-1" {
t.Errorf("expected user-1, got %s", authorID)
}
if message != "initial commit" {
t.Errorf("expected 'initial commit', got %s", message)
}
if len(changes) != 1 || changes[0].FileID != "f1" {
t.Errorf("unexpected changes: %+v", changes)
}
now := int64(1718200000000)
return &entity.FileCommit{
ID: "commit-1", FolderID: folderID, Message: message,
AuthorID: authorID, FileCount: len(changes),
BaseModel: entity.BaseModel{CreateTime: &now},
}, nil
}
body := `{"message": "initial commit", "files": [{"file_id": "f1", "file_name": "test.txt", "operation": "add", "content": "hello"}]}`
w := httptest.NewRecorder()
req, _ := http.NewRequest("POST", "/api/v1/folders/folder-1/commits", strings.NewReader(body))
req.Header.Set("Content-Type", "application/json")
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatal(err)
}
if resp["code"] != float64(common.CodeSuccess) {
t.Errorf("expected code 0, got %v", resp["code"])
}
data, ok := resp["data"].(map[string]interface{})
if !ok {
t.Fatalf("expected data to be object, got %T", resp["data"])
}
if data["message"] != "initial commit" {
t.Errorf("expected 'initial commit', got %v", data["message"])
}
}
func TestFileCommit_CreateCommit_NoAuth(t *testing.T) {
r := setupFileCommitTestNoAuth()
body := `{"message": "test", "files": [{"file_id": "f1", "file_name": "t.txt", "operation": "add"}]}`
w := httptest.NewRecorder()
req, _ := http.NewRequest("POST", "/api/v1/folders/folder-1/commits", strings.NewReader(body))
req.Header.Set("Content-Type", "application/json")
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
// No auth middleware → code 401
if code, _ := resp["code"].(float64); code != float64(common.CodeUnauthorized) {
t.Errorf("expected unauthorized, got code %v", code)
}
}
func TestFileCommit_CreateCommit_InvalidJSON(t *testing.T) {
r, _ := setupFileCommitTest("user-1")
body := `{invalid json`
w := httptest.NewRecorder()
req, _ := http.NewRequest("POST", "/api/v1/folders/folder-1/commits", strings.NewReader(body))
req.Header.Set("Content-Type", "application/json")
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if code, _ := resp["code"].(float64); code != float64(common.CodeBadRequest) {
t.Errorf("expected bad request, got code %v", code)
}
}
func TestFileCommit_ListCommits_Success(t *testing.T) {
r, mock := setupFileCommitTest("user-1")
mock.listCommitsFn = func(folderID string, page, pageSize int, orderBy string, desc bool) ([]*entity.FileCommit, int64, error) {
if folderID != "folder-1" {
t.Errorf("expected folder-1, got %s", folderID)
}
return []*entity.FileCommit{
{ID: "c2", FolderID: folderID, Message: "second", AuthorID: "u1", FileCount: 1},
{ID: "c1", FolderID: folderID, Message: "first", AuthorID: "u1", FileCount: 2},
}, 2, nil
}
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/commits?page=1&page_size=10", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeSuccess) {
t.Errorf("expected code 0, got %v", resp["code"])
}
data, _ := resp["data"].(map[string]interface{})
if total, _ := data["total"].(float64); total != 2 {
t.Errorf("expected total 2, got %v", total)
}
}
func TestFileCommit_GetCommit_Success(t *testing.T) {
r, _ := setupFileCommitTest("user-1")
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/commits/commit-1", nil)
r.ServeHTTP(w, req)
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeSuccess) {
t.Errorf("expected success, got code %v: %s", resp["code"], resp["message"])
}
data, _ := resp["data"].(map[string]interface{})
if data["id"] != "commit-1" {
t.Errorf("expected commit-1, got %v", data["id"])
}
}
func TestFileCommit_GetCommit_NotFound(t *testing.T) {
r, mock := setupFileCommitTest("user-1")
mock.getCommitFn = func(commitID string) (*entity.FileCommit, error) {
return nil, common.ErrNotFound
}
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/commits/missing", nil)
r.ServeHTTP(w, req)
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if code, _ := resp["code"].(float64); code != float64(common.CodeNotFound) {
t.Errorf("expected 404, got code %v", code)
}
}
func TestFileCommit_ListCommitFiles_Success(t *testing.T) {
r, _ := setupFileCommitTest("user-1")
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/commits/commit-1/files", nil)
r.ServeHTTP(w, req)
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeSuccess) {
t.Errorf("expected success, got code %v", resp["code"])
}
}
func TestFileCommit_DiffCommits_Success(t *testing.T) {
r, _ := setupFileCommitTest("user-1")
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/commits/diff?from=c1&to=c2", nil)
r.ServeHTTP(w, req)
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeSuccess) {
t.Errorf("expected success, got code %v", resp["code"])
}
}
func TestFileCommit_DiffCommits_MissingParams(t *testing.T) {
r, _ := setupFileCommitTest("user-1")
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/commits/diff", nil)
r.ServeHTTP(w, req)
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if code, _ := resp["code"].(float64); code != float64(common.CodeParamError) {
t.Errorf("expected param error, got code %v", code)
}
}
func TestFileCommit_GetUncommittedChanges_Success(t *testing.T) {
r, _ := setupFileCommitTest("user-1")
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/changes", nil)
r.ServeHTTP(w, req)
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeSuccess) {
t.Errorf("expected success, got code %v", resp["code"])
}
}
func TestFileCommit_GetCommitTree_Success(t *testing.T) {
r, _ := setupFileCommitTest("user-1")
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/commits/commit-1/tree", nil)
r.ServeHTTP(w, req)
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeSuccess) {
t.Errorf("expected success, got code %v", resp["code"])
}
}
func TestFileCommit_GetCommitFileContent_Success(t *testing.T) {
r, mock := setupFileCommitTest("user-1")
mock.getCommitFileContentFn = func(folderID, commitID, fileID string) ([]byte, error) {
return []byte("hello world"), nil
}
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/commits/commit-1/files/f1/content", nil)
r.ServeHTTP(w, req)
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeSuccess) {
t.Errorf("expected success, got code %v", resp["code"])
}
data, _ := resp["data"].(map[string]interface{})
if content, _ := data["content"].(string); content != "hello world" {
t.Errorf("expected 'hello world', got %q", content)
}
}
func TestFileCommit_GetFileVersionHistory_Success(t *testing.T) {
r, _ := setupFileCommitTest("user-1")
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/api/v1/files/f1/versions", nil)
r.ServeHTTP(w, req)
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeSuccess) {
t.Errorf("expected success, got code %v", resp["code"])
}
}

View File

@@ -46,6 +46,7 @@ type Router struct {
difyRetrievalHandler *handler.DifyRetrievalHandler
pluginHandler *handler.PluginHandler
modelHandler *handler.ModelHandler
fileCommitHandler *handler.FileCommitHandler
adminRuntimeHandler *handler.AdminRuntimeHandler
}
@@ -74,6 +75,7 @@ func NewRouter(
difyRetrievalHandler *handler.DifyRetrievalHandler,
pluginHandler *handler.PluginHandler,
modelHandler *handler.ModelHandler,
fileCommitHandler *handler.FileCommitHandler,
adminRuntimeHandler *handler.AdminRuntimeHandler,
) *Router {
return &Router{
@@ -100,6 +102,7 @@ func NewRouter(
difyRetrievalHandler: difyRetrievalHandler,
pluginHandler: pluginHandler,
modelHandler: modelHandler,
fileCommitHandler: fileCommitHandler,
adminRuntimeHandler: adminRuntimeHandler,
}
}
@@ -304,8 +307,51 @@ func (r *Router) Setup(engine *gin.Engine) {
file.GET("/:id/ancestors", r.fileHandler.GetFileAncestors)
file.GET("/:id/parent", r.fileHandler.GetParentFolder)
file.GET("/:id", r.fileHandler.Download)
file.GET("/:id/versions", r.fileCommitHandler.GetFileVersionHistory)
}
// File commit routes — /folders/ takes folder_id directly
commitFolders := v1.Group("/folders")
{
commitFolders.POST("/:folder_id/commits", r.fileCommitHandler.CreateCommit)
commitFolders.GET("/:folder_id/commits", r.fileCommitHandler.ListCommits)
commitFolders.GET("/:folder_id/commits/diff", r.fileCommitHandler.DiffCommits)
commitFolders.GET("/:folder_id/commits/:commit_id", r.fileCommitHandler.GetCommit)
commitFolders.GET("/:folder_id/commits/:commit_id/files", r.fileCommitHandler.ListCommitFiles)
commitFolders.GET("/:folder_id/commits/:commit_id/tree", r.fileCommitHandler.GetCommitTree)
commitFolders.GET("/:folder_id/commits/:commit_id/files/:file_id/content", r.fileCommitHandler.GetCommitFileContent)
commitFolders.GET("/:folder_id/changes", r.fileCommitHandler.GetUncommittedChanges)
}
// /workspace/{workspace_id}/commits — alias for /folders/ (workspace_id == folder_id)
commitWorkspace := v1.Group("/workspace")
{
commitWorkspace.POST("/:folder_id/commits", r.fileCommitHandler.CreateCommit)
commitWorkspace.GET("/:folder_id/commits", r.fileCommitHandler.ListCommits)
commitWorkspace.GET("/:folder_id/commits/diff", r.fileCommitHandler.DiffCommits)
commitWorkspace.GET("/:folder_id/commits/:commit_id", r.fileCommitHandler.GetCommit)
commitWorkspace.GET("/:folder_id/commits/:commit_id/files", r.fileCommitHandler.ListCommitFiles)
commitWorkspace.GET("/:folder_id/commits/:commit_id/tree", r.fileCommitHandler.GetCommitTree)
commitWorkspace.GET("/:folder_id/commits/:commit_id/files/:file_id/content", r.fileCommitHandler.GetCommitFileContent)
commitWorkspace.GET("/:folder_id/changes", r.fileCommitHandler.GetUncommittedChanges)
}
// /datasets/{dataset_id}/commits — resolve dataset_id → folder_id via middleware
commitDatasets := v1.Group("/datasets/:dataset_id")
commitDatasets.Use(handler.CommitFolderResolver(r.fileCommitHandler, "datasets", "dataset_id"))
{
commitDatasets.POST("/commits", r.fileCommitHandler.CreateCommit)
commitDatasets.GET("/commits", r.fileCommitHandler.ListCommits)
commitDatasets.GET("/commits/diff", r.fileCommitHandler.DiffCommits)
commitDatasets.GET("/commits/:commit_id", r.fileCommitHandler.GetCommit)
commitDatasets.GET("/commits/:commit_id/files", r.fileCommitHandler.ListCommitFiles)
commitDatasets.GET("/commits/:commit_id/tree", r.fileCommitHandler.GetCommitTree)
commitDatasets.GET("/commits/:commit_id/files/:file_id/content", r.fileCommitHandler.GetCommitFileContent)
commitDatasets.GET("/changes", r.fileCommitHandler.GetUncommittedChanges)
}
// Author routes
authors := v1.Group("/authors")
{

View File

@@ -0,0 +1,637 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package service
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"ragflow/internal/common"
"ragflow/internal/dao"
"ragflow/internal/entity"
"ragflow/internal/storage"
"sort"
"strings"
"time"
"github.com/google/uuid"
"go.uber.org/zap"
"gorm.io/gorm"
)
// FileCommitService file commit service
type FileCommitService struct {
commitDAO *dao.FileCommitDAO
commitItemDAO *dao.FileCommitItemDAO
fileDAO *dao.FileDAO
}
// NewFileCommitService create file commit service
func NewFileCommitService() *FileCommitService {
return &FileCommitService{
commitDAO: dao.NewFileCommitDAO(),
commitItemDAO: dao.NewFileCommitItemDAO(),
fileDAO: dao.NewFileDAO(),
}
}
// CreateCommit creates a new commit for a workspace folder
func (s *FileCommitService) CreateCommit(folderID, authorID, message string, changes []entity.FileChange) (*entity.FileCommit, error) {
// 1. Get the latest commit for this folder
latestCommit, _ := s.commitDAO.GetLatestByFolderID(folderID)
// 2. Build tree state from latest commit
treeState := make(map[string]interface{})
if latestCommit != nil && latestCommit.TreeState != nil {
if err := json.Unmarshal([]byte(*latestCommit.TreeState), &treeState); err != nil {
common.Warn("failed to unmarshal previous tree state", zap.Error(err))
treeState = make(map[string]interface{})
}
}
// 3. Create commit record
commitID := generateCommitUUID()
nowMs := time.Now().UnixMilli()
commit := &entity.FileCommit{
ID: commitID,
FolderID: folderID,
Message: message,
AuthorID: authorID,
FileCount: len(changes),
}
if latestCommit != nil {
parentID := latestCommit.ID
commit.ParentID = &parentID
}
// All DB operations run inside a single transaction.
var treeStr string
if err := dao.DB.Transaction(func(tx *gorm.DB) error {
// Save commit
if err := tx.Create(commit).Error; err != nil {
return fmt.Errorf("failed to create commit: %w", err)
}
// Backfill parent_id for existing tree_state entries
for fid, entry := range treeState {
if m, ok := entry.(map[string]interface{}); ok {
if _, has := m["parent_id"]; !has {
var fileRec entity.File
if err := tx.Select("parent_id").Where("id = ?", fid).First(&fileRec).Error; err == nil {
m["parent_id"] = fileRec.ParentID
}
}
}
}
storageImpl := storage.GetStorageFactory().GetStorage()
for _, change := range changes {
item := &entity.FileCommitItem{
ID: generateCommitUUID(),
CommitID: commitID,
FileID: change.FileID,
Operation: change.Operation,
}
switch change.Operation {
case "add", "modify":
contentBytes := []byte(change.Content)
hash := sha256.Sum256(contentBytes)
hashHex := hex.EncodeToString(hash[:])
objKey := ".objects/" + hashHex
if storageImpl != nil {
if err := storageImpl.Put(folderID, objKey, contentBytes); err != nil {
return fmt.Errorf("failed to store object: %w", err)
}
}
if change.Operation == "modify" {
if oldEntry, ok := treeState[change.FileID]; ok {
if oldMap, ok := oldEntry.(map[string]interface{}); ok {
if oldHash, ok := oldMap["hash"].(string); ok {
item.OldHash = &oldHash
}
if oldLoc, ok := oldMap["location"].(string); ok {
item.OldLocation = &oldLoc
}
}
}
}
item.NewHash = &hashHex
item.NewLocation = &objKey
fSize := int64(len(contentBytes))
if err := tx.Model(&entity.File{}).Where("id = ?", change.FileID).Updates(map[string]interface{}{
"location": objKey,
"size": fSize,
}).Error; err != nil {
return fmt.Errorf("failed to update file record: %w", err)
}
// Look up parent_id from the File table
fileParentID := ""
var fileRec entity.File
if err := tx.Select("parent_id").Where("id = ?", change.FileID).First(&fileRec).Error; err == nil {
fileParentID = fileRec.ParentID
}
treeState[change.FileID] = map[string]interface{}{
"hash": hashHex,
"location": objKey,
"name": change.FileName,
"size": fSize,
"status": "1",
"parent_id": fileParentID,
}
case "delete":
if oldEntry, ok := treeState[change.FileID]; ok {
if oldMap, ok := oldEntry.(map[string]interface{}); ok {
if oldHash, ok := oldMap["hash"].(string); ok {
item.OldHash = &oldHash
}
if oldLoc, ok := oldMap["location"].(string); ok {
item.OldLocation = &oldLoc
}
}
}
if err := tx.Model(&entity.File{}).Where("id = ?", change.FileID).Update("status", "0").Error; err != nil {
return fmt.Errorf("failed to soft-delete file: %w", err)
}
if entry, ok := treeState[change.FileID]; ok {
if entryMap, ok := entry.(map[string]interface{}); ok {
entryMap["status"] = "0"
}
}
case "rename":
item.OldName = &change.OldName
item.NewName = &change.NewName
if err := tx.Model(&entity.File{}).Where("id = ?", change.FileID).Update("name", change.NewName).Error; err != nil {
return fmt.Errorf("failed to rename file: %w", err)
}
if entry, ok := treeState[change.FileID]; ok {
if entryMap, ok := entry.(map[string]interface{}); ok {
entryMap["name"] = change.NewName
}
} else {
treeState[change.FileID] = map[string]interface{}{
"name": change.NewName,
"status": "1",
}
}
}
// Save commit item
if err := tx.Create(item).Error; err != nil {
return fmt.Errorf("failed to create commit item: %w", err)
}
}
// Serialize and save tree state
treeJSON, err := json.Marshal(treeState)
if err != nil {
return fmt.Errorf("failed to marshal tree state: %w", err)
}
if err := tx.Model(&entity.FileCommit{}).Where("id = ?", commitID).Update("tree_state", string(treeJSON)).Error; err != nil {
return fmt.Errorf("failed to update tree state: %w", err)
}
treeStr = string(treeJSON)
return nil
}); err != nil {
return nil, err
}
commit.TreeState = &treeStr
commit.CreateTime = &nowMs
return commit, nil
}
// ListCommits lists commits for a workspace folder with pagination
func (s *FileCommitService) ListCommits(folderID string, page, pageSize int, orderBy string, desc bool) ([]*entity.FileCommit, int64, error) {
return s.commitDAO.ListByFolderID(folderID, page, pageSize, orderBy, desc)
}
// GetCommit gets a single commit by ID
func (s *FileCommitService) GetCommit(commitID string) (*entity.FileCommit, error) {
return s.commitDAO.GetByID(commitID)
}
// ListCommitFiles lists all file change items for a commit
func (s *FileCommitService) ListCommitFiles(commitID string) ([]*entity.FileCommitItem, error) {
return s.commitItemDAO.ListByCommitID(commitID)
}
// DiffCommits compares two commits and returns the diff
func (s *FileCommitService) DiffCommits(fromID, toID string) ([]entity.DiffEntry, error) {
fromItems, err := s.commitItemDAO.ListByCommitID(fromID)
if err != nil {
return nil, err
}
toItems, err := s.commitItemDAO.ListByCommitID(toID)
if err != nil {
return nil, err
}
fromMap := make(map[string]*entity.FileCommitItem)
for _, item := range fromItems {
fromMap[item.FileID] = item
}
toMap := make(map[string]*entity.FileCommitItem)
for _, item := range toItems {
toMap[item.FileID] = item
}
// Get tree state for file names (use to commit)
toCommit, err := s.commitDAO.GetByID(toID)
treeState := make(map[string]interface{})
if err == nil && toCommit != nil && toCommit.TreeState != nil {
json.Unmarshal([]byte(*toCommit.TreeState), &treeState)
}
getFileName := func(fileID string) string {
if entry, ok := treeState[fileID]; ok {
if m, ok := entry.(map[string]interface{}); ok {
if name, ok := m["name"].(string); ok {
return name
}
}
}
return fileID
}
allFileIDs := make(map[string]bool)
for fid := range fromMap {
allFileIDs[fid] = true
}
for fid := range toMap {
allFileIDs[fid] = true
}
// Sort for deterministic output
var sortedIDs []string
for fid := range allFileIDs {
sortedIDs = append(sortedIDs, fid)
}
sort.Strings(sortedIDs)
var diff []entity.DiffEntry
for _, fid := range sortedIDs {
fromItem := fromMap[fid]
toItem := toMap[fid]
var entry entity.DiffEntry
entry.FileID = fid
entry.FileName = getFileName(fid)
if fromItem != nil && toItem == nil {
// Deleted
entry.Operation = "delete"
entry.OldHash = fromItem.NewHash
entry.OldLocation = fromItem.NewLocation
} else if fromItem == nil && toItem != nil {
// Added
entry.Operation = "add"
entry.NewHash = toItem.NewHash
entry.NewLocation = toItem.NewLocation
} else {
// Both exist — compare hashes
fromHash := ""
if fromItem.NewHash != nil {
fromHash = *fromItem.NewHash
}
toHash := ""
if toItem.NewHash != nil {
toHash = *toItem.NewHash
}
if fromHash != toHash {
entry.Operation = "modify"
entry.OldHash = fromItem.NewHash
entry.OldLocation = fromItem.NewLocation
entry.NewHash = toItem.NewHash
entry.NewLocation = toItem.NewLocation
}
}
if entry.Operation != "" {
diff = append(diff, entry)
}
}
return diff, nil
}
// GetUncommittedChanges gets uncommitted changes for a workspace folder.
// Recursively scans all sub-folders.
func (s *FileCommitService) GetUncommittedChanges(folderID string) ([]entity.DiffEntry, error) {
// Get latest commit tree state
latest, err := s.commitDAO.GetLatestByFolderID(folderID)
committedFiles := make(map[string]map[string]interface{})
if err == nil && latest != nil && latest.TreeState != nil {
var treeData map[string]interface{}
if jsonErr := json.Unmarshal([]byte(*latest.TreeState), &treeData); jsonErr == nil {
for k, v := range treeData {
if m, ok := v.(map[string]interface{}); ok {
committedFiles[k] = m
}
}
}
}
// Get all live files recursively under this folder
liveMap := s.collectAllFilesRecursive(folderID)
var changes []entity.DiffEntry
processed := make(map[string]bool)
// Check committed files for modifications and deletions
for fid, committedEntry := range committedFiles {
processed[fid] = true
if committedEntry["status"] == "0" {
continue
}
if liveFile, ok := liveMap[fid]; ok {
liveHash := computeLiveFileHash(folderID, fid, liveFile)
committedHash := ""
if h, ok := committedEntry["hash"].(string); ok {
committedHash = h
}
if liveHash != "" && liveHash != committedHash {
changes = append(changes, entity.DiffEntry{
FileID: fid,
FileName: liveFile.Name,
Operation: "modify",
})
}
} else {
name := ""
if n, ok := committedEntry["name"].(string); ok {
name = n
}
changes = append(changes, entity.DiffEntry{
FileID: fid,
FileName: name,
Operation: "delete",
})
}
}
// Check for newly added files
for _, liveFile := range liveMap {
if !processed[liveFile.ID] {
changes = append(changes, entity.DiffEntry{
FileID: liveFile.ID,
FileName: liveFile.Name,
Operation: "add",
})
}
}
return changes, nil
}
// collectAllFilesRecursive recursively collects all non-folder files under a folder.
func (s *FileCommitService) collectAllFilesRecursive(folderID string) map[string]*entity.File {
result := make(map[string]*entity.File)
// Direct files (non-folder)
files, _ := s.fileDAO.ListNonFolderByParentID(folderID)
for _, f := range files {
result[f.ID] = f
}
// Sub-folders — recurse
subFolders, _ := s.fileDAO.ListFolderByParentID(folderID)
for _, sf := range subFolders {
sub := s.collectAllFilesRecursive(sf.ID)
for k, v := range sub {
result[k] = v
}
}
return result
}
// GetCommitTree gets the tree state snapshot for a commit as a hierarchical tree.
func (s *FileCommitService) GetCommitTree(commitID string) (map[string]interface{}, error) {
commit, err := s.commitDAO.GetByID(commitID)
if err != nil {
return nil, err
}
if commit.TreeState == nil {
return map[string]interface{}{"id": commit.FolderID, "name": "", "type": "folder", "children": []interface{}{}}, nil
}
var flat map[string]interface{}
if err := json.Unmarshal([]byte(*commit.TreeState), &flat); err != nil {
return nil, err
}
return s.buildHierarchicalTree(flat, commit.FolderID), nil
}
// buildHierarchicalTree builds a recursive tree from a flat tree_state map.
// Sub-folder hierarchy is resolved from the File table's parent_id.
func (s *FileCommitService) buildHierarchicalTree(flat map[string]interface{}, rootFolderID string) map[string]interface{} {
// Collect all unique folder IDs
folderIDs := map[string]bool{rootFolderID: true}
for _, v := range flat {
if entry, ok := v.(map[string]interface{}); ok {
pid, _ := entry["parent_id"].(string)
if pid == "" {
pid = rootFolderID
}
folderIDs[pid] = true
}
}
// Build folder parent map from File table
folderParentMap := make(map[string]string)
for fid := range folderIDs {
if fid != rootFolderID {
if f, err := s.fileDAO.GetByID(fid); err == nil {
folderParentMap[fid] = f.ParentID
}
}
}
// Group file entries by parent_id
filesByParent := make(map[string][]string)
fileEntries := make(map[string]map[string]interface{})
for fid, v := range flat {
entry, ok := v.(map[string]interface{})
if !ok {
continue
}
pid, _ := entry["parent_id"].(string)
if pid == "" {
pid = rootFolderID
}
filesByParent[pid] = append(filesByParent[pid], fid)
fileEntries[fid] = entry
}
// Group sub-folders by their parent
childrenByFolder := make(map[string][]string)
for sfid, ppid := range folderParentMap {
childrenByFolder[ppid] = append(childrenByFolder[ppid], sfid)
}
var buildNode func(nodeID string) map[string]interface{}
buildNode = func(nodeID string) map[string]interface{} {
nodeName := nodeID
if f, err := s.fileDAO.GetByID(nodeID); err == nil {
nodeName = f.Name
}
node := map[string]interface{}{
"id": nodeID,
"name": nodeName,
"type": "folder",
"children": []interface{}{},
}
// File children
for _, fid := range filesByParent[nodeID] {
entry := fileEntries[fid]
fn := map[string]interface{}{
"id": fid,
"name": entry["name"],
"type": "file",
"hash": entry["hash"],
"size": entry["size"],
"status": entry["status"],
}
if loc, ok := entry["location"].(string); ok && loc != "" {
fn["location"] = loc
}
node["children"] = append(node["children"].([]interface{}), fn)
}
// Sub-folder children
for _, sfid := range childrenByFolder[nodeID] {
child := buildNode(sfid)
node["children"] = append(node["children"].([]interface{}), child)
}
return node
}
return buildNode(rootFolderID)
}
// GetCommitFileContent gets file content as it existed in a given commit
func (s *FileCommitService) GetCommitFileContent(folderID, commitID, fileID string) ([]byte, error) {
_, err := s.commitDAO.GetByID(commitID)
if err != nil {
return nil, fmt.Errorf("commit not found: %w", err)
}
item, err := s.commitItemDAO.GetByCommitIDAndFileID(commitID, fileID)
if err != nil {
return nil, fmt.Errorf("file not found in commit: %w", err)
}
if item.NewHash == nil && item.OldHash == nil {
return nil, fmt.Errorf("file has no content in this commit")
}
hash := ""
if item.NewHash != nil {
hash = *item.NewHash
} else if item.OldHash != nil {
hash = *item.OldHash
}
objKey := ".objects/" + hash
storageImpl := storage.GetStorageFactory().GetStorage()
if storageImpl == nil {
return nil, fmt.Errorf("storage not initialized")
}
blob, err := storageImpl.Get(folderID, objKey)
if err != nil {
return nil, fmt.Errorf("failed to read file content from storage: %w", err)
}
return blob, nil
}
// GetFileVersionHistory gets version history for a specific file
func (s *FileCommitService) GetFileVersionHistory(fileID string) ([]entity.VersionEntry, error) {
items, err := s.commitItemDAO.ListByFileID(fileID)
if err != nil {
return nil, err
}
var versions []entity.VersionEntry
for _, item := range items {
commit, err := s.commitDAO.GetByID(item.CommitID)
if err != nil {
continue
}
h := ""
if item.NewHash != nil {
h = *item.NewHash
} else if item.OldHash != nil {
h = *item.OldHash
}
versions = append(versions, entity.VersionEntry{
CommitID: item.CommitID,
Operation: item.Operation,
Hash: h,
CreateTime: commit.CreateTime,
Message: commit.Message,
})
}
return versions, nil
}
// computeLiveFileHash computes the SHA256 hash of current file content from storage
func computeLiveFileHash(folderID, fileID string, file *entity.File) string {
if file.Location == nil || *file.Location == "" {
return ""
}
storageImpl := storage.GetStorageFactory().GetStorage()
if storageImpl == nil {
return ""
}
data, err := storageImpl.Get(folderID, *file.Location)
if err != nil {
return ""
}
hash := sha256.Sum256(data)
return hex.EncodeToString(hash[:])
}
// generateCommitUUID generates a UUID without dashes
func generateCommitUUID() string {
id := uuid.New().String()
return strings.ReplaceAll(id, "-", "")
}

View File

@@ -0,0 +1,718 @@
#
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""API-level integration test for file commit endpoints.
Uses an in-memory SQLite database so the real FileCommitService and
FileCommit/FileCommitItem models execute against real SQL — only the
HTTP layer (quart.request, login_required, current_user) and storage
are mocked.
"""
import asyncio
import functools
import importlib.util
import logging
import sys
from pathlib import Path
from types import ModuleType, SimpleNamespace
import pytest
from peewee import SqliteDatabase, Model, CharField, IntegerField, BigIntegerField, TextField
LOGGER = logging.getLogger(__name__)
class _DummyManager:
def route(self, *_args, **_kwargs):
def decorator(func):
return func
return decorator
def _run(coro):
return asyncio.run(coro)
# Shared mutable payload used by both get_request_json (stub) and
# _setup_request so validate_request's closure always sees the current value.
_request_payload: list = [{}]
# ── SQLite in-memory models ───────────────────────────────────────────────
# We create minimal Peewee models that match the real table schemas so
# FileCommitService (which uses DB.atomic(), .select(), .where(), etc.)
# works against real SQL.
sqlite_db = SqliteDatabase(':memory:')
class BaseTestModel(Model):
class Meta:
database = sqlite_db
class FileCommitTestModel(BaseTestModel):
id = CharField(max_length=32, primary_key=True)
folder_id = CharField(max_length=32, index=True)
parent_id = CharField(max_length=32, null=True, index=True)
message = CharField(max_length=512, default="")
author_id = CharField(max_length=32, index=True)
file_count = IntegerField(default=0)
tree_state = TextField(null=True)
create_time = BigIntegerField(null=True, index=True)
create_date = CharField(null=True, max_length=32, index=True)
update_time = BigIntegerField(null=True, index=True)
update_date = CharField(null=True, max_length=32, index=True)
class Meta:
db_table = "file_commit"
class FileCommitItemTestModel(BaseTestModel):
id = CharField(max_length=32, primary_key=True)
commit_id = CharField(max_length=32, index=True)
file_id = CharField(max_length=32, index=True)
operation = CharField(max_length=16, index=True)
old_hash = CharField(max_length=64, null=True, index=True)
new_hash = CharField(max_length=64, null=True, index=True)
old_location = CharField(max_length=255, null=True)
new_location = CharField(max_length=255, null=True)
old_name = CharField(max_length=255, null=True)
new_name = CharField(max_length=255, null=True)
create_time = BigIntegerField(null=True, index=True)
create_date = CharField(null=True, max_length=32, index=True)
update_time = BigIntegerField(null=True, index=True)
update_date = CharField(null=True, max_length=32, index=True)
class Meta:
db_table = "file_commit_item"
class FileTestModel(BaseTestModel):
id = CharField(max_length=32, primary_key=True)
parent_id = CharField(max_length=32, index=True)
tenant_id = CharField(max_length=32, index=True)
created_by = CharField(max_length=32, index=True)
name = CharField(max_length=255, index=True)
location = CharField(max_length=255, null=True, index=True)
size = BigIntegerField(default=0, index=True)
type = CharField(max_length=32, index=True)
source_type = CharField(max_length=128, default="", index=True)
status = CharField(max_length=1, null=True, default="1", index=True)
create_time = BigIntegerField(null=True, index=True)
create_date = CharField(null=True, max_length=32, index=True)
update_time = BigIntegerField(null=True, index=True)
update_date = CharField(null=True, max_length=32, index=True)
class Meta:
db_table = "file"
_TABLES = [FileCommitTestModel, FileCommitItemTestModel, FileTestModel]
sqlite_db.create_tables(_TABLES)
def _clear_db():
"""Delete all rows from every test table so each test starts clean."""
for model in _TABLES:
model.delete().execute()
# ── Module loader ─────────────────────────────────────────────────────────
def _load_module(monkeypatch):
"""Load file_commit_api.py with SQLite in-memory DB and mocked HTTP layer."""
repo_root = Path(__file__).resolve().parents[3]
# Stub: quart.request
quart_mod = ModuleType("quart")
quart_mod.request = SimpleNamespace(args={}, content_type="application/json")
monkeypatch.setitem(sys.modules, "quart", quart_mod)
# Stub: api.apps with login_required / current_user
api_pkg = ModuleType("api")
api_pkg.__path__ = [str(repo_root / "api")]
monkeypatch.setitem(sys.modules, "api", api_pkg)
apps_mod = ModuleType("api.apps")
apps_mod.__path__ = [str(repo_root / "api" / "apps")]
apps_mod.current_user = SimpleNamespace(id="test-user")
apps_mod.login_required = lambda func: func
monkeypatch.setitem(sys.modules, "api.apps", apps_mod)
api_pkg.apps = apps_mod
# Stub: api.utils.api_utils
api_utils_mod = ModuleType("api.utils.api_utils")
def get_json_result(data=None, message="success", code=0):
return {"code": code, "data": data, "message": message}
def get_data_error_result(message=""):
return {"code": 102, "data": None, "message": message}
async def get_request_json():
return _request_payload[0]
def server_error_response(err):
return {"code": 500, "data": None, "message": str(err)}
def validate_request(*required_keys):
def _decorator(func):
@functools.wraps(func)
async def _wrapper(*args, **kwargs):
payload = await get_request_json()
missing = [k for k in required_keys if k not in payload]
if missing:
return get_json_result(
code=101, data=None,
message="required argument are missing: " + ", ".join(missing)
)
return await func(*args, **kwargs)
return _wrapper
return _decorator
api_utils_mod.get_json_result = get_json_result
api_utils_mod.get_data_error_result = get_data_error_result
api_utils_mod.get_request_json = get_request_json
api_utils_mod.server_error_response = server_error_response
api_utils_mod.validate_request = validate_request
monkeypatch.setitem(sys.modules, "api.utils.api_utils", api_utils_mod)
# Stub: common.misc_utils
import uuid
misc_utils_mod = ModuleType("common.misc_utils")
misc_utils_mod.get_uuid = lambda: uuid.uuid1().hex
monkeypatch.setitem(sys.modules, "common.misc_utils", misc_utils_mod)
# Stub: common.settings (STORAGE_IMPL is a no-op for testing)
common_mod = ModuleType("common")
common_mod.__path__ = []
common_mod.settings = SimpleNamespace(
STORAGE_IMPL=SimpleNamespace(
put=lambda *_a, **_kw: None,
get=lambda *_a, **_kw: b"stub-content",
),
DATABASE_TYPE="sqlite",
)
monkeypatch.setitem(sys.modules, "common", common_mod)
# Stub: common.time_utils (monotonically increasing timestamps)
_ts_iter = iter(range(1718200000000, 1718200000100))
time_utils_mod = ModuleType("common.time_utils")
time_utils_mod.current_timestamp = lambda: next(_ts_iter)
time_utils_mod.datetime_format = lambda *_a, **__: "mock"
monkeypatch.setitem(sys.modules, "common.time_utils", time_utils_mod)
# Stub: api.db.db_models — inject SQLite DB and our test models
db_models_mod = ModuleType("api.db.db_models")
class _DB:
"""Drop-in replacement that wraps our SQLite DB with the same
methods (connection_context, atomic) that CommonService expects."""
@staticmethod
def connection_context():
def dec(func):
return func
return dec
@staticmethod
def atomic():
class Ctx:
def __enter__(self2):
return self2
def __exit__(self2, *args):
pass
return Ctx()
db_models_mod.DB = _DB
db_models_mod.FileCommit = FileCommitTestModel
db_models_mod.FileCommitItem = FileCommitItemTestModel
db_models_mod.File = FileTestModel
db_models_mod.DataBaseModel = BaseTestModel
monkeypatch.setitem(sys.modules, "api.db.db_models", db_models_mod)
class _StubFileService:
model = FileTestModel # class attribute, not staticmethod — code accesses FileService.model.update(...)
@staticmethod
def update_by_id(pid, data):
return FileTestModel.update(data).where(FileTestModel.id == pid).execute()
@staticmethod
def get_by_id(pid):
try:
obj = FileTestModel.get_by_id(pid)
return True, obj
except Exception:
return False, None
@staticmethod
def get_or_none(**kwargs):
try:
return FileTestModel.get(**kwargs)
except Exception:
return None
class CommonServiceBase:
model = None
@classmethod
def get_by_id(cls, pid):
try:
obj = cls.model.get_or_none(cls.model.id == pid)
if obj:
return True, obj
except Exception:
pass
return False, None
@classmethod
def query(cls, cols=None, reverse=None, order_by=None, **kwargs):
q = cls.model.select()
for f_n, f_v in kwargs.items():
if f_v is not None and hasattr(cls.model, f_n):
q = q.where(getattr(cls.model, f_n) == f_v)
return q
@classmethod
def update_by_id(cls, pid, data):
return cls.model.update(data).where(cls.model.id == pid).execute()
@classmethod
def filter_update(cls, filters, update_data):
return cls.model.update(update_data).where(*filters).execute()
# Stub: common.constants with FileSource for resolver
constants_mod = ModuleType("common.constants")
constants_mod.FileSource = type("FileSource", (), {"KNOWLEDGEBASE": "knowledgebase"})
monkeypatch.setitem(sys.modules, "common.constants", constants_mod)
# Stub: api.db with real filesystem path so sub-packages can be discovered.
db_pkg = ModuleType("api.db")
db_pkg.__path__ = [str(repo_root / "api" / "db")]
db_pkg.UserTenantRole = type('UserTenantRole', (), {k: k for k in ('OWNER','ADMIN','NORMAL','INVITE')})
db_pkg.TenantPermission = type('TenantPermission', (), {'ME': 'me', 'TEAM': 'team'})
db_pkg.FileType = type('FileType', (), {'FOLDER': 'folder', 'DOC': 'doc', 'VISUAL': 'visual', 'AURAL': 'aural', 'VIRTUAL': 'virtual', 'PDF': 'pdf', 'OTHER': 'other'})
db_pkg.KNOWLEDGEBASE_FOLDER_NAME = '.knowledgebase'
db_pkg.SKILLS_FOLDER_NAME = 'skills'
monkeypatch.setitem(sys.modules, "api.db", db_pkg)
api_pkg.db = db_pkg
# Stub api.db.services — prevent real __init__ from loading (avoids
# importing every real service module). Keep the filesystem path so
# file_commit_service can be discovered, but pre-stub file_service
# (which has heavy deps that would cascade-fail).
services_pkg = ModuleType("api.db.services")
services_pkg.__path__ = [str(repo_root / "api" / "db" / "services")]
monkeypatch.setitem(sys.modules, "api.db.services", services_pkg)
# Pre-stub service modules that file_commit_api.py imports.
# Each stub prevents the real .py file from loading (and cascading deps).
file_svc_mod = ModuleType("api.db.services.file_service")
file_svc_mod.FileService = _StubFileService
monkeypatch.setitem(sys.modules, "api.db.services.file_service", file_svc_mod)
common_svc_mod = ModuleType("api.db.services.common_service")
common_svc_mod.CommonService = CommonServiceBase
monkeypatch.setitem(sys.modules, "api.db.services.common_service", common_svc_mod)
kb_svc_mod = ModuleType("api.db.services.knowledgebase_service")
# NB: The dataset resolver in the API calls KnowledgebaseService.get_by_id
# then accesses .name and .tenant_id. We return a simple object.
class _StubKnowledgebaseService:
@staticmethod
def get_by_id(dataset_id):
if dataset_id == "ds-1":
return True, SimpleNamespace(name="test-ds", tenant_id="t1")
return False, None
kb_svc_mod.KnowledgebaseService = _StubKnowledgebaseService
monkeypatch.setitem(sys.modules, "api.db.services.knowledgebase_service", kb_svc_mod)
# Remove cached file_commit_service so it reimports with our SQLite stubs.
# Keep api.db.db_models in sys.modules — it's already patched above.
for mod_name in list(sys.modules.keys()):
if mod_name.startswith("api.db.services.file_commit"):
del sys.modules[mod_name]
# Load the module
module_name = "api.apps.restful_apis.file_commit_api"
module_path = repo_root / "api" / "apps" / "restful_apis" / "file_commit_api.py"
spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec)
module.manager = _DummyManager()
monkeypatch.setitem(sys.modules, module_name, module)
spec.loader.exec_module(module)
return module
# ── Helpers ───────────────────────────────────────────────────────────────
def _setup_request(module, json_payload=None, args=None):
"""Set up a request payload and query args for the next handler call."""
if json_payload is not None:
_request_payload[0] = json_payload
if args is not None:
module.request.args = args
# ── Fixtures ──────────────────────────────────────────────────────────────
@pytest.fixture(scope="session")
def auth():
return "test-auth"
@pytest.fixture(scope="session", autouse=True)
def set_tenant_info():
return None
@pytest.fixture(autouse=True)
def reset_db():
"""Clear all rows before each test to prevent order-dependent failures."""
_clear_db()
# ── Tests ─────────────────────────────────────────────────────────────────
@pytest.mark.p2
def test_create_commit_success(monkeypatch):
module = _load_module(monkeypatch)
# Seed a file
FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1",
created_by="test-user", name="a.txt", type="txt")
_setup_request(module, json_payload={
"message": "initial commit",
"files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "hello"}],
})
res = _run(module.create_commit("root-folder"))
assert res["code"] == 0, f"Expected 0, got {res}"
data = res["data"]
assert data["message"] == "initial commit"
assert data["folder_id"] == "root-folder"
assert data["author_id"] == "test-user"
assert data["file_count"] == 1
assert data["tree_state"] is not None
assert data["id"] is not None
@pytest.mark.p2
def test_create_commit_missing_fields(monkeypatch):
module = _load_module(monkeypatch)
_setup_request(module, json_payload={"message": "no files"})
res = _run(module.create_commit("root-folder"))
assert res["code"] == 101, f"Expected validation error, got {res}"
@pytest.mark.p2
def test_create_commit_modify_and_add(monkeypatch):
module = _load_module(monkeypatch)
FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1",
created_by="test-user", name="a.txt", type="txt")
FileTestModel.create(id="f2", parent_id="root-folder", tenant_id="t1",
created_by="test-user", name="b.txt", type="txt")
# Commit 1: add f1
_setup_request(module, json_payload={
"message": "c1",
"files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "v1"}],
})
_run(module.create_commit("root-folder"))
# Commit 2: modify f1, add f2
_setup_request(module, json_payload={
"message": "c2",
"files": [
{"file_id": "f1", "file_name": "a.txt", "operation": "modify", "content": "v2"},
{"file_id": "f2", "file_name": "b.txt", "operation": "add", "content": "world"},
],
})
res = _run(module.create_commit("root-folder"))
assert res["code"] == 0
assert res["data"]["file_count"] == 2
@pytest.mark.p2
def test_create_commit_delete(monkeypatch):
module = _load_module(monkeypatch)
FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1",
created_by="test-user", name="a.txt", type="txt")
# Add then delete
_setup_request(module, json_payload={
"message": "add",
"files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "hello"}],
})
_run(module.create_commit("root-folder"))
_setup_request(module, json_payload={
"message": "delete",
"files": [{"file_id": "f1", "file_name": "a.txt", "operation": "delete"}],
})
res = _run(module.create_commit("root-folder"))
assert res["code"] == 0
@pytest.mark.p2
def test_create_commit_rename(monkeypatch):
module = _load_module(monkeypatch)
FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1",
created_by="test-user", name="old.txt", type="txt")
_setup_request(module, json_payload={
"message": "add",
"files": [{"file_id": "f1", "file_name": "old.txt", "operation": "add", "content": "data"}],
})
_run(module.create_commit("root-folder"))
# Rename
_setup_request(module, json_payload={
"message": "rename",
"files": [{"file_id": "f1", "file_name": "old.txt", "operation": "rename",
"old_name": "old.txt", "new_name": "new.txt"}],
})
res = _run(module.create_commit("root-folder"))
assert res["code"] == 0
@pytest.mark.p2
def test_list_commits_success(monkeypatch):
module = _load_module(monkeypatch)
FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1",
created_by="test-user", name="a.txt", type="txt")
# Create 2 commits
_setup_request(module, json_payload={
"message": "c1",
"files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "v1"}],
})
_run(module.create_commit("root-folder"))
_setup_request(module, json_payload={
"message": "c2",
"files": [{"file_id": "f1", "file_name": "a.txt", "operation": "modify", "content": "v2"}],
})
_run(module.create_commit("root-folder"))
# List
module.request.args = {"page": "1", "page_size": "10"}
res = _run(module.list_commits("root-folder"))
assert res["code"] == 0
assert res["data"]["total"] == 2
assert len(res["data"]["commits"]) == 2
@pytest.mark.p2
def test_get_commit_detail(monkeypatch):
module = _load_module(monkeypatch)
FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1",
created_by="test-user", name="a.txt", type="txt")
_setup_request(module, json_payload={
"message": "detail test",
"files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "data"}],
})
create_res = _run(module.create_commit("root-folder"))
commit_id = create_res["data"]["id"]
res = _run(module.get_commit("root-folder", commit_id))
assert res["code"] == 0
assert res["data"]["id"] == commit_id
assert res["data"]["message"] == "detail test"
assert len(res["data"]["files"]) == 1
@pytest.mark.p2
def test_get_commit_not_found(monkeypatch):
module = _load_module(monkeypatch)
res = _run(module.get_commit("root-folder", "nonexistent"))
assert res["code"] == 102
assert "not found" in res["message"].lower()
@pytest.mark.p2
def test_diff_commits(monkeypatch):
module = _load_module(monkeypatch)
FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1",
created_by="test-user", name="a.txt", type="txt")
FileTestModel.create(id="f2", parent_id="root-folder", tenant_id="t1",
created_by="test-user", name="b.txt", type="txt")
# c1: add f1
_setup_request(module, json_payload={
"message": "c1",
"files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "v1"}],
})
c1 = _run(module.create_commit("root-folder"))["data"]["id"]
# c2: add f2, modify f1
_setup_request(module, json_payload={
"message": "c2",
"files": [
{"file_id": "f2", "file_name": "b.txt", "operation": "add", "content": "world"},
{"file_id": "f1", "file_name": "a.txt", "operation": "modify", "content": "v2"},
],
})
c2 = _run(module.create_commit("root-folder"))["data"]["id"]
assert c1 != c2, "c1 and c2 must have different IDs"
module.request.args = {"from": c1, "to": c2}
res = _run(module.diff_commits("root-folder"))
assert res["code"] == 0, f"diff failed: {res}"
assert len(res["data"]) == 2, f"Expected 2 diff entries, got {len(res['data'])}: {res['data']}"
# Verify f2 was added (present in c2 but not in c1)
f2_entries = [e for e in res["data"] if e["file_id"] == "f2"]
assert len(f2_entries) == 1
assert f2_entries[0]["operation"] == "add"
# Verify f1 was modified (hash changed from v1 to v2)
f1_entries = [e for e in res["data"] if e["file_id"] == "f1"]
assert len(f1_entries) == 1
assert f1_entries[0]["operation"] == "modify"
assert f1_entries[0]["old_hash"] != f1_entries[0]["new_hash"]
@pytest.mark.p2
def test_diff_commits_missing_params(monkeypatch):
module = _load_module(monkeypatch)
module.request.args = {}
res = _run(module.diff_commits("root-folder"))
assert res["code"] == 102
@pytest.mark.p2
def test_get_uncommitted_changes(monkeypatch):
module = _load_module(monkeypatch)
# Seed a file that will be committed
FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1",
created_by="test-user", name="a.txt", type="txt")
# Seed a file that will NOT be committed (uncommitted add)
FileTestModel.create(id="f2", parent_id="root-folder", tenant_id="t1",
created_by="test-user", name="b.txt", type="txt")
# Commit only f1
_setup_request(module, json_payload={
"message": "add f1",
"files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "hello"}],
})
_run(module.create_commit("root-folder"))
res = _run(module.get_uncommitted_changes("root-folder"))
assert res["code"] == 0
# f2 should appear as uncommitted "add"
f2_changes = [c for c in res["data"] if c["file_id"] == "f2"]
assert len(f2_changes) > 0, "Expected f2 to show as uncommitted change"
assert f2_changes[0]["operation"] == "add"
@pytest.mark.p2
def test_get_commit_tree(monkeypatch):
module = _load_module(monkeypatch)
FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1",
created_by="test-user", name="a.txt", type="txt")
_setup_request(module, json_payload={
"message": "c1",
"files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "data"}],
})
create_res = _run(module.create_commit("root-folder"))
commit_id = create_res["data"]["id"]
res = _run(module.get_commit_tree("root-folder", commit_id))
assert res["code"] == 0
assert res["data"]["type"] == "folder"
assert res["data"]["id"] == "root-folder"
assert any(c["id"] == "f1" for c in res["data"].get("children", []))
@pytest.mark.p2
def test_get_commit_file_content(monkeypatch):
module = _load_module(monkeypatch)
FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1",
created_by="test-user", name="a.txt", type="txt")
_setup_request(module, json_payload={
"message": "c1",
"files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "hello world"}],
})
create_res = _run(module.create_commit("root-folder"))
commit_id = create_res["data"]["id"]
res = _run(module.get_commit_file_content("root-folder", commit_id, "f1"))
assert res["code"] == 0
assert "content" in res["data"]
@pytest.mark.p2
def test_get_file_version_history(monkeypatch):
module = _load_module(monkeypatch)
FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1",
created_by="test-user", name="a.txt", type="txt")
# Two commits modifying f1
_setup_request(module, json_payload={
"message": "v1",
"files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "v1"}],
})
_run(module.create_commit("root-folder"))
_setup_request(module, json_payload={
"message": "v2",
"files": [{"file_id": "f1", "file_name": "a.txt", "operation": "modify", "content": "v2"}],
})
_run(module.create_commit("root-folder"))
res = _run(module.get_file_version_history("f1"))
assert res["code"] == 0
assert len(res["data"]) == 2
@pytest.mark.p2
def test_workspace_alias(monkeypatch):
"""Verify /workspace/ alias routes work the same as /folders/."""
module = _load_module(monkeypatch)
FileTestModel.create(id="f1", parent_id="ws-folder", tenant_id="t1",
created_by="test-user", name="a.txt", type="txt")
_setup_request(module, json_payload={
"message": "workspace commit",
"files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "data"}],
})
res = _run(module.create_commit("ws-folder"))
assert res["code"] == 0
# List via workspace alias
module.request.args = {"page": "1", "page_size": "10"}
res = _run(module.list_commits("ws-folder"))
assert res["code"] == 0
assert res["data"]["total"] == 1
@pytest.mark.p2
def test_get_commit_wrong_folder_returns_not_found(monkeypatch):
module = _load_module(monkeypatch)
FileTestModel.create(id="f1", parent_id="folder-a", tenant_id="t1",
created_by="test-user", name="a.txt", type="txt")
_setup_request(module, json_payload={
"message": "c1",
"files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "data"}],
})
create_res = _run(module.create_commit("folder-a"))
commit_id = create_res["data"]["id"]
# Attempt to read commit from a different folder
res = _run(module.get_commit("folder-b", commit_id))
assert res["code"] == 102
assert "not found in workspace" in res["message"].lower()

View File

@@ -540,12 +540,6 @@ def test_session_update_name_and_param_contract(rest_client, create_chat):
else:
assert body["message"] == expected_name_or_message, (scenario_name, body)
unknown_key_res = rest_client.patch(f"/chats/{chat_id}/sessions/{session_id}", json={"unknown_key": "unknown_value"})
assert unknown_key_res.status_code == 200
unknown_key_payload = unknown_key_res.json()
assert unknown_key_payload["code"] == 100, unknown_key_payload
assert 'Unrecognized field name: "unknown_key"' in unknown_key_payload["message"], unknown_key_payload
for scenario_name, payload in (("empty payload", {}), ("none payload", None)):
res = rest_client.patch(f"/chats/{chat_id}/sessions/{session_id}", json=payload)
assert res.status_code == 200, (scenario_name, res.text)