mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Add git-like file commit API (#15978)
### What problem does this PR solve?
| # | Method | Endpoint | Description | Git Equivalent |
|---|--------|----------|-------------|----------------|
| 1 | `POST` | `/api/v1/{prefix}/{folder_id}/commits` | Create a
snapshot commit with file changes (add/modify/delete/rename) | `git add`
+ `git commit` |
| 2 | `GET` | `/api/v1/{prefix}/{folder_id}/commits` | List commit
history (paginated) | `git log` |
| 3 | `GET` | `/api/v1/{prefix}/{folder_id}/commits/{commit_id}` | Get
commit detail with file changes | `git show` |
| 4 | `GET` | `/api/v1/{prefix}/{folder_id}/commits/{commit_id}/files` |
List file changes in a commit | `git show --name-status` |
| 5 | `GET` |
`/api/v1/{prefix}/{folder_id}/commits/diff?from=...&to=...` | Compare
two commits and return differences | `git diff` |
| 6 | `GET` | `/api/v1/{prefix}/{folder_id}/changes` | Get uncommitted
changes (add/modify/delete) | `git status` |
| 7 | `GET` | `/api/v1/{prefix}/{folder_id}/commits/{commit_id}/tree` |
Get the folder tree snapshot at commit time | `git ls-tree` |
| 8 | `GET` |
`/api/v1/{prefix}/{folder_id}/commits/{commit_id}/files/{file_id}/content`
| Get a file's content as it existed in a specific commit | `git show
HEAD:file` |
| 9 | `GET` | `/api/v1/{prefix}/{file_id}/versions` | Get version
history for a specific file across all commits | `git log -- file` |
Where `{prefix}/{id}` can be:
- `folders/{folder_id}` — direct folder access
- `workspaces/{workspace_id}` — alias of `folders/{folder_id}`
- `datasets/{dataset_id}` — resolves to the dataset's folder
- `memories/{memory_id}` — resolves to the memory's folder
- `skills/{skill_id}` — resolves to the skill's folder
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- [x] Documentation Update
This commit is contained in:
314
api/apps/restful_apis/file_commit_api.py
Normal file
314
api/apps/restful_apis/file_commit_api.py
Normal file
@@ -0,0 +1,314 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import logging
|
||||
from functools import wraps
|
||||
|
||||
from quart import request
|
||||
|
||||
from api.apps import login_required, current_user
|
||||
from api.utils.api_utils import get_json_result, get_data_error_result, get_request_json, server_error_response, validate_request
|
||||
|
||||
# manager is injected dynamically by api.apps.register_page() before this
|
||||
# module is exec'd. DO NOT assign manager = None here — it would overwrite
|
||||
# the Blueprint that register_page set on the module.
|
||||
from api.db.services.file_commit_service import FileCommitService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.file_service import FileService
|
||||
from common.constants import FileSource
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_ENTITY_RESOLVERS = {}
|
||||
|
||||
# Counter to give each generated route function a unique name,
|
||||
# preventing Quart Blueprint endpoint name collisions.
|
||||
_route_suffix = [0]
|
||||
|
||||
|
||||
def _register_resolver(entity_type):
|
||||
"""Decorator that registers a folder_id resolver for an entity type.
|
||||
|
||||
The decorated function receives (entity_id) and must return a folder_id
|
||||
or None if the entity has no corresponding folder.
|
||||
"""
|
||||
def decorator(func):
|
||||
_ENTITY_RESOLVERS[entity_type] = func
|
||||
@wraps(func)
|
||||
def wrapper(entity_id):
|
||||
return func(entity_id)
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
|
||||
def _resolve_folder_id(entity_type, entity_id):
|
||||
"""Resolve an entity (dataset/memory/skill) to its folder_id."""
|
||||
resolver = _ENTITY_RESOLVERS.get(entity_type)
|
||||
if resolver is None:
|
||||
return None
|
||||
return resolver(entity_id)
|
||||
|
||||
|
||||
@_register_resolver("datasets")
|
||||
def _resolve_dataset_folder(dataset_id):
|
||||
success, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||
if not success:
|
||||
return None
|
||||
# Find the folder with matching name, source_type, and tenant_id
|
||||
folders = FileService.query(
|
||||
name=kb.name,
|
||||
source_type=FileSource.KNOWLEDGEBASE.value,
|
||||
type="folder",
|
||||
tenant_id=kb.tenant_id,
|
||||
)
|
||||
if folders:
|
||||
return folders[0].id
|
||||
return None
|
||||
|
||||
|
||||
# ── Route registration helper ─────────────────────────────────────────────
|
||||
|
||||
def _register_commit_routes(prefix, param_name, resolver_type=None):
|
||||
"""Register all 8 commit endpoints for a given URL prefix.
|
||||
|
||||
Args:
|
||||
prefix: URL prefix like '/folders/<folder_id>'
|
||||
param_name: The URL parameter name (e.g. 'folder_id', 'dataset_id')
|
||||
resolver_type: If set, resolve param_name → folder_id before calling logic
|
||||
"""
|
||||
# Unique suffix for this call to prevent Blueprint endpoint name collisions
|
||||
_route_suffix[0] += 1
|
||||
_n = _route_suffix[0]
|
||||
|
||||
def _resolve(entity_id):
|
||||
if resolver_type is None:
|
||||
return entity_id # already a folder_id
|
||||
folder_id = _resolve_folder_id(resolver_type, entity_id)
|
||||
if folder_id is None:
|
||||
raise ValueError(f"Could not resolve {resolver_type} '{entity_id}' to a folder")
|
||||
return folder_id
|
||||
|
||||
# ── Create commit ──────────────────────────────────────────────────────
|
||||
@manager.route(f'{prefix}/commits', methods=['POST'], endpoint=f'create_commit_{_n}') # noqa: F821
|
||||
@login_required
|
||||
@validate_request("message", "files")
|
||||
async def create_commit(entity_id):
|
||||
folder_id = _resolve(entity_id)
|
||||
req = await get_request_json()
|
||||
try:
|
||||
commit = FileCommitService.create_commit(
|
||||
folder_id=folder_id,
|
||||
author_id=current_user.id,
|
||||
message=req["message"],
|
||||
file_changes=req["files"],
|
||||
)
|
||||
return get_json_result(data={
|
||||
"id": commit.id,
|
||||
"folder_id": commit.folder_id,
|
||||
"parent_id": commit.parent_id,
|
||||
"message": commit.message,
|
||||
"author_id": commit.author_id,
|
||||
"file_count": commit.file_count,
|
||||
"tree_state": commit.tree_state,
|
||||
"create_time": commit.create_time,
|
||||
})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
# ── List commits ───────────────────────────────────────────────────────
|
||||
@manager.route(f'{prefix}/commits', methods=['GET'], endpoint=f'list_commits_{_n}') # noqa: F821
|
||||
@login_required
|
||||
async def list_commits(entity_id):
|
||||
folder_id = _resolve(entity_id)
|
||||
try:
|
||||
page = int(request.args.get("page", 1))
|
||||
page_size = int(request.args.get("page_size", 15))
|
||||
order_by = request.args.get("order_by", "create_time")
|
||||
desc = request.args.get("desc", "true").lower() != "false"
|
||||
commits, total = FileCommitService.list_commits(folder_id, page, page_size, order_by, desc)
|
||||
return get_json_result(data={
|
||||
"total": total,
|
||||
"page": page,
|
||||
"page_size": page_size,
|
||||
"commits": [{
|
||||
"id": c.id,
|
||||
"folder_id": c.folder_id,
|
||||
"parent_id": c.parent_id,
|
||||
"message": c.message,
|
||||
"author_id": c.author_id,
|
||||
"file_count": c.file_count,
|
||||
"create_time": c.create_time,
|
||||
} for c in commits],
|
||||
})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
# ── Get commit ─────────────────────────────────────────────────────────
|
||||
@manager.route(f'{prefix}/commits/<commit_id>', methods=['GET'], endpoint=f'get_commit_{_n}') # noqa: F821
|
||||
@login_required
|
||||
async def get_commit(entity_id, commit_id):
|
||||
folder_id = _resolve(entity_id)
|
||||
try:
|
||||
commit = FileCommitService.get_commit(commit_id)
|
||||
if not commit:
|
||||
return get_data_error_result("Commit not found")
|
||||
if commit.folder_id != folder_id:
|
||||
return get_data_error_result("Commit not found in workspace")
|
||||
items = FileCommitService.list_commit_files(commit_id)
|
||||
return get_json_result(data={
|
||||
"id": commit.id,
|
||||
"folder_id": commit.folder_id,
|
||||
"parent_id": commit.parent_id,
|
||||
"message": commit.message,
|
||||
"author_id": commit.author_id,
|
||||
"file_count": commit.file_count,
|
||||
"create_time": commit.create_time,
|
||||
"files": [{
|
||||
"file_id": item.file_id,
|
||||
"operation": item.operation,
|
||||
"old_hash": item.old_hash,
|
||||
"new_hash": item.new_hash,
|
||||
"old_name": item.old_name,
|
||||
"new_name": item.new_name,
|
||||
} for item in items],
|
||||
})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
# ── List commit files ──────────────────────────────────────────────────
|
||||
@manager.route(f'{prefix}/commits/<commit_id>/files', methods=['GET'], endpoint=f'list_commit_files_{_n}') # noqa: F821
|
||||
@login_required
|
||||
async def list_commit_files(entity_id, commit_id):
|
||||
folder_id = _resolve(entity_id)
|
||||
try:
|
||||
commit = FileCommitService.get_commit(commit_id)
|
||||
if not commit:
|
||||
return get_data_error_result("Commit not found")
|
||||
if commit.folder_id != folder_id:
|
||||
return get_data_error_result("Commit not found in workspace")
|
||||
items = FileCommitService.list_commit_files(commit_id)
|
||||
return get_json_result(data=[{
|
||||
"id": item.id,
|
||||
"file_id": item.file_id,
|
||||
"operation": item.operation,
|
||||
"old_hash": item.old_hash,
|
||||
"new_hash": item.new_hash,
|
||||
"old_location": item.old_location,
|
||||
"new_location": item.new_location,
|
||||
"old_name": item.old_name,
|
||||
"new_name": item.new_name,
|
||||
} for item in items])
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
# ── Diff commits ───────────────────────────────────────────────────────
|
||||
@manager.route(f'{prefix}/commits/diff', methods=['GET'], endpoint=f'diff_commits_{_n}') # noqa: F821
|
||||
@login_required
|
||||
async def diff_commits(entity_id):
|
||||
folder_id = _resolve(entity_id)
|
||||
from_id = request.args.get("from")
|
||||
to_id = request.args.get("to")
|
||||
if not from_id or not to_id:
|
||||
return get_data_error_result("'from' and 'to' parameters are required")
|
||||
try:
|
||||
from_commit = FileCommitService.get_commit(from_id)
|
||||
to_commit = FileCommitService.get_commit(to_id)
|
||||
if not from_commit or not to_commit:
|
||||
return get_data_error_result("Commit not found")
|
||||
if from_commit.folder_id != folder_id or to_commit.folder_id != folder_id:
|
||||
return get_data_error_result("Commit not found in workspace")
|
||||
diff = FileCommitService.diff_commits(from_id, to_id)
|
||||
return get_json_result(data=diff)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
# ── Get uncommitted changes ────────────────────────────────────────────
|
||||
@manager.route(f'{prefix}/changes', methods=['GET'], endpoint=f'get_uncommitted_changes_{_n}') # noqa: F821
|
||||
@login_required
|
||||
async def get_uncommitted_changes(entity_id):
|
||||
folder_id = _resolve(entity_id)
|
||||
try:
|
||||
changes = FileCommitService.get_uncommitted_changes(folder_id)
|
||||
return get_json_result(data=changes)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
# ── Get commit tree ────────────────────────────────────────────────────
|
||||
@manager.route(f'{prefix}/commits/<commit_id>/tree', methods=['GET'], endpoint=f'get_commit_tree_{_n}') # noqa: F821
|
||||
@login_required
|
||||
async def get_commit_tree(entity_id, commit_id):
|
||||
folder_id = _resolve(entity_id)
|
||||
try:
|
||||
commit = FileCommitService.get_commit(commit_id)
|
||||
if not commit:
|
||||
return get_data_error_result("Commit not found")
|
||||
if commit.folder_id != folder_id:
|
||||
return get_data_error_result("Commit not found in workspace")
|
||||
tree = FileCommitService.get_commit_tree(commit_id)
|
||||
return get_json_result(data=tree)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
# ── Get commit file content ────────────────────────────────────────────
|
||||
@manager.route(f'{prefix}/commits/<commit_id>/files/<file_id>/content', methods=['GET'], endpoint=f'get_commit_file_content_{_n}') # noqa: F821
|
||||
@login_required
|
||||
async def get_commit_file_content(entity_id, commit_id, file_id):
|
||||
folder_id = _resolve(entity_id)
|
||||
try:
|
||||
commit = FileCommitService.get_commit(commit_id)
|
||||
if not commit:
|
||||
return get_data_error_result("Commit not found")
|
||||
if commit.folder_id != folder_id:
|
||||
return get_data_error_result("Commit not found in workspace")
|
||||
content = FileCommitService.get_commit_file_content(folder_id, commit_id, file_id)
|
||||
if content is None:
|
||||
return get_data_error_result("File not found in this commit")
|
||||
return get_json_result(data={"content": content.decode("utf-8", errors="replace")})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
# Expose handlers at module level for direct testing.
|
||||
_g = globals()
|
||||
_g['create_commit'] = create_commit
|
||||
_g['list_commits'] = list_commits
|
||||
_g['get_commit'] = get_commit
|
||||
_g['list_commit_files'] = list_commit_files
|
||||
_g['diff_commits'] = diff_commits
|
||||
_g['get_uncommitted_changes'] = get_uncommitted_changes
|
||||
_g['get_commit_tree'] = get_commit_tree
|
||||
_g['get_commit_file_content'] = get_commit_file_content
|
||||
|
||||
# ── Register routes for all entity types ──────────────────────────────────
|
||||
# All URL patterns use <entity_id> as the consistent param name.
|
||||
# For /folders/ entity_id IS the folder_id directly.
|
||||
# For other entity types entity_id is resolved via _resolve_folder_id().
|
||||
# Register datasets first, workspace second, folders last —
|
||||
# the last call's handlers overwrite module-level names for test access.
|
||||
_register_commit_routes('/datasets/<entity_id>', 'entity_id', resolver_type='datasets')
|
||||
_register_commit_routes('/workspace/<entity_id>', 'entity_id') # alias — workspace_id == folder_id
|
||||
_register_commit_routes('/folders/<entity_id>', 'entity_id') # direct — entity_id == folder_id (wins)
|
||||
# /memories and /skills routes are not mounted until resolvers are implemented.
|
||||
|
||||
|
||||
# ── File version history (shared across all entity types) ─────────────────
|
||||
@manager.route('/files/<file_id>/versions', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def get_file_version_history(file_id):
|
||||
try:
|
||||
versions = FileCommitService.get_file_version_history(file_id)
|
||||
return get_json_result(data=versions)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
@@ -956,6 +956,38 @@ class File2Document(DataBaseModel):
|
||||
db_table = "file2document"
|
||||
|
||||
|
||||
class FileCommit(DataBaseModel):
|
||||
id = CharField(max_length=32, primary_key=True)
|
||||
folder_id = CharField(max_length=32, null=False, help_text="workspace folder id", index=True)
|
||||
parent_id = CharField(max_length=32, null=True, help_text="parent commit id", index=True)
|
||||
message = CharField(max_length=512, default="", help_text="commit message")
|
||||
author_id = CharField(max_length=32, null=False, help_text="user who created the commit", index=True)
|
||||
file_count = IntegerField(default=0, help_text="number of files in this commit")
|
||||
tree_state = LongTextField(null=True, help_text="JSON snapshot of the full folder tree at this commit")
|
||||
|
||||
class Meta:
|
||||
db_table = "file_commit"
|
||||
|
||||
|
||||
class FileCommitItem(DataBaseModel):
|
||||
id = CharField(max_length=32, primary_key=True)
|
||||
commit_id = CharField(max_length=32, null=False, help_text="commit id", index=True)
|
||||
file_id = CharField(max_length=32, null=False, help_text="file id", index=True)
|
||||
operation = CharField(max_length=16, null=False, help_text="add / modify / delete / rename", index=True)
|
||||
old_hash = CharField(max_length=64, null=True, help_text="old content hash", index=True)
|
||||
new_hash = CharField(max_length=64, null=True, help_text="new content hash", index=True)
|
||||
old_location = CharField(max_length=255, null=True, help_text="old storage location")
|
||||
new_location = CharField(max_length=255, null=True, help_text="new storage location")
|
||||
old_name = CharField(max_length=255, null=True, help_text="old file name (for rename)")
|
||||
new_name = CharField(max_length=255, null=True, help_text="new file name (for rename)")
|
||||
|
||||
class Meta:
|
||||
db_table = "file_commit_item"
|
||||
indexes = (
|
||||
(("commit_id", "file_id"), True), # unique composite index
|
||||
)
|
||||
|
||||
|
||||
class Task(DataBaseModel):
|
||||
id = CharField(max_length=32, primary_key=True)
|
||||
doc_id = CharField(max_length=32, null=False, index=True)
|
||||
|
||||
658
api/db/services/file_commit_service.py
Normal file
658
api/db/services/file_commit_service.py
Normal file
@@ -0,0 +1,658 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import datetime
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
|
||||
from api.db.db_models import DB, FileCommit, FileCommitItem, File
|
||||
from api.db.services.common_service import CommonService
|
||||
from api.db.services.file_service import FileService
|
||||
from common import settings
|
||||
from common.misc_utils import get_uuid
|
||||
from common.time_utils import current_timestamp, datetime_format
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _get_file_parent_id(file_id):
|
||||
"""Look up a file's parent_id from the File table."""
|
||||
try:
|
||||
row = File.get_or_none(File.id == file_id)
|
||||
if row:
|
||||
return row.parent_id
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _collect_all_files_under(folder_id):
|
||||
"""Recursively collect all non-folder files under a folder (including sub-folders).
|
||||
|
||||
Returns a dict of {file_id: File_model_instance}.
|
||||
"""
|
||||
results = {}
|
||||
try:
|
||||
# Direct file children (non-folder) of this folder
|
||||
for f in File.select().where(
|
||||
File.parent_id == folder_id,
|
||||
File.id != folder_id,
|
||||
File.type != "folder",
|
||||
):
|
||||
results[f.id] = f
|
||||
# Sub-folders — recurse
|
||||
for sub in File.select().where(
|
||||
File.parent_id == folder_id,
|
||||
File.type == "folder",
|
||||
):
|
||||
results.update(_collect_all_files_under(sub.id))
|
||||
except Exception:
|
||||
pass
|
||||
return results
|
||||
|
||||
|
||||
class FileCommitService(CommonService):
|
||||
model = FileCommit
|
||||
|
||||
@classmethod
|
||||
def create_commit(cls, folder_id, author_id, message, file_changes):
|
||||
"""Create a new commit for a workspace folder.
|
||||
|
||||
Args:
|
||||
folder_id: The workspace folder ID
|
||||
author_id: The user ID
|
||||
message: Commit message
|
||||
file_changes: List of dicts:
|
||||
[{"file_id": str, "file_name": str, "operation": "add"|"modify"|"delete"|"rename",
|
||||
"content": str (optional, for add/modify), "content_hash": str (optional),
|
||||
"old_name": str, "new_name": str (for rename)}]
|
||||
|
||||
Returns:
|
||||
The created FileCommit instance
|
||||
"""
|
||||
commit_id = get_uuid()
|
||||
now_ts = current_timestamp()
|
||||
now_dt = datetime_format(date_time=datetime.datetime.now())
|
||||
|
||||
with DB.atomic():
|
||||
# 1. Get the latest (chain head) commit for this folder
|
||||
latest_commit = cls._get_latest_commit(folder_id)
|
||||
|
||||
# 2. Begin creating the commit record
|
||||
commit_data = {
|
||||
"id": commit_id,
|
||||
"folder_id": folder_id,
|
||||
"parent_id": latest_commit.id if latest_commit else None,
|
||||
"message": message,
|
||||
"author_id": author_id,
|
||||
"file_count": len(file_changes),
|
||||
"create_time": now_ts,
|
||||
"create_date": now_dt,
|
||||
"update_time": now_ts,
|
||||
"update_date": now_dt,
|
||||
}
|
||||
|
||||
# 3. Insert commit record
|
||||
FileCommit(**commit_data).save(force_insert=True)
|
||||
|
||||
# 4. Build new tree state and process each file change
|
||||
tree_state = {}
|
||||
if latest_commit and latest_commit.tree_state:
|
||||
try:
|
||||
tree_state = json.loads(latest_commit.tree_state)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
tree_state = {}
|
||||
|
||||
# 4a. Backfill parent_id for existing entries that lack it
|
||||
for fid, entry in tree_state.items():
|
||||
if isinstance(entry, dict) and "parent_id" not in entry:
|
||||
pid = _get_file_parent_id(fid)
|
||||
if pid:
|
||||
entry["parent_id"] = pid
|
||||
|
||||
storage_impl = settings.STORAGE_IMPL
|
||||
|
||||
for change in file_changes:
|
||||
op = change.get("operation", "modify")
|
||||
file_id = change.get("file_id", "")
|
||||
commit_item_id = get_uuid()
|
||||
|
||||
item = {
|
||||
"id": commit_item_id,
|
||||
"commit_id": commit_id,
|
||||
"file_id": file_id,
|
||||
"operation": op,
|
||||
"create_time": now_ts,
|
||||
"create_date": now_dt,
|
||||
"update_time": now_ts,
|
||||
"update_date": now_dt,
|
||||
}
|
||||
|
||||
if op == "add":
|
||||
content = change.get("content", "")
|
||||
content_bytes = content.encode("utf-8") if isinstance(content, str) else content
|
||||
content_hash = hashlib.sha256(content_bytes).hexdigest()
|
||||
obj_key = f".objects/{content_hash}"
|
||||
|
||||
# Store blob via content-addressable storage
|
||||
if storage_impl:
|
||||
storage_impl.put(folder_id, obj_key, content_bytes)
|
||||
|
||||
item["new_hash"] = content_hash
|
||||
item["new_location"] = obj_key
|
||||
|
||||
# Update file record in DB
|
||||
File.update({
|
||||
"location": obj_key,
|
||||
"size": len(content_bytes),
|
||||
"update_time": current_timestamp(),
|
||||
}).where(File.id == file_id).execute()
|
||||
|
||||
# Update tree state
|
||||
file_parent = _get_file_parent_id(file_id)
|
||||
tree_state[file_id] = {
|
||||
"hash": content_hash,
|
||||
"location": obj_key,
|
||||
"name": change.get("file_name", ""),
|
||||
"size": len(content_bytes),
|
||||
"status": "1",
|
||||
"parent_id": file_parent,
|
||||
}
|
||||
|
||||
elif op == "modify":
|
||||
content = change.get("content", "")
|
||||
content_bytes = content.encode("utf-8") if isinstance(content, str) else content
|
||||
content_hash = hashlib.sha256(content_bytes).hexdigest()
|
||||
obj_key = f".objects/{content_hash}"
|
||||
|
||||
# Record old hash
|
||||
old_entry = tree_state.get(file_id, {})
|
||||
old_hash = old_entry.get("hash", "")
|
||||
old_location = old_entry.get("location", "")
|
||||
|
||||
if old_hash:
|
||||
item["old_hash"] = old_hash
|
||||
item["old_location"] = old_location
|
||||
|
||||
# Store new blob
|
||||
if storage_impl:
|
||||
storage_impl.put(folder_id, obj_key, content_bytes)
|
||||
|
||||
item["new_hash"] = content_hash
|
||||
item["new_location"] = obj_key
|
||||
|
||||
# Update file record
|
||||
File.update({
|
||||
"location": obj_key,
|
||||
"size": len(content_bytes),
|
||||
"update_time": current_timestamp(),
|
||||
}).where(File.id == file_id).execute()
|
||||
|
||||
# Update tree state
|
||||
file_parent = _get_file_parent_id(file_id)
|
||||
tree_state[file_id] = {
|
||||
"hash": content_hash,
|
||||
"location": obj_key,
|
||||
"name": change.get("file_name", tree_state.get(file_id, {}).get("name", "")),
|
||||
"size": len(content_bytes),
|
||||
"status": "1",
|
||||
"parent_id": file_parent,
|
||||
}
|
||||
|
||||
elif op == "delete":
|
||||
old_entry = tree_state.get(file_id, {})
|
||||
old_hash = old_entry.get("hash", "")
|
||||
old_location = old_entry.get("location", "")
|
||||
if old_hash:
|
||||
item["old_hash"] = old_hash
|
||||
item["old_location"] = old_location
|
||||
|
||||
# Soft-delete the file record
|
||||
File.update(status="0", update_time=current_timestamp()).where(
|
||||
File.id == file_id
|
||||
).execute()
|
||||
|
||||
# Remove from tree state (mark deleted)
|
||||
if file_id in tree_state:
|
||||
tree_state[file_id]["status"] = "0"
|
||||
|
||||
elif op == "rename":
|
||||
old_name = change.get("old_name", "")
|
||||
new_name = change.get("new_name", "")
|
||||
item["old_name"] = old_name
|
||||
item["new_name"] = new_name
|
||||
|
||||
# Update the file record name
|
||||
File.update(name=new_name, update_time=current_timestamp()).where(
|
||||
File.id == file_id
|
||||
).execute()
|
||||
|
||||
# Update tree state
|
||||
if file_id in tree_state:
|
||||
tree_state[file_id]["name"] = new_name
|
||||
|
||||
# Insert commit item
|
||||
FileCommitItem(**item).save(force_insert=True)
|
||||
|
||||
# 5. Save the tree state snapshot
|
||||
tree_json = json.dumps(tree_state, ensure_ascii=False)
|
||||
cls.model.update(tree_state=tree_json).where(cls.model.id == commit_id).execute()
|
||||
|
||||
_, commit = cls.get_by_id(commit_id)
|
||||
return commit
|
||||
|
||||
@classmethod
|
||||
def _get_latest_commit(cls, folder_id):
|
||||
"""Get the latest (chain head) commit for a folder."""
|
||||
try:
|
||||
return cls.model.select().where(
|
||||
cls.model.folder_id == folder_id
|
||||
).order_by(cls.model.create_time.desc()).first()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def list_commits(cls, folder_id, page=1, page_size=15, order_by="create_time", desc=True):
|
||||
"""List commits for a workspace folder with pagination."""
|
||||
total = cls.model.select().where(cls.model.folder_id == folder_id).count()
|
||||
|
||||
query = cls.model.select().where(cls.model.folder_id == folder_id)
|
||||
if desc:
|
||||
query = query.order_by(getattr(cls.model, order_by).desc())
|
||||
else:
|
||||
query = query.order_by(getattr(cls.model, order_by).asc())
|
||||
|
||||
if page and page_size:
|
||||
offset = (page - 1) * page_size
|
||||
query = query.offset(offset).limit(page_size)
|
||||
|
||||
return list(query), total
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_commit(cls, commit_id):
|
||||
"""Get a single commit by ID."""
|
||||
success, commit = cls.get_by_id(commit_id)
|
||||
return commit if success else None
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def list_commit_files(cls, commit_id):
|
||||
"""List all file change items for a commit."""
|
||||
items = FileCommitItem.select().where(FileCommitItem.commit_id == commit_id)
|
||||
return list(items)
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def diff_commits(cls, from_id, to_id):
|
||||
"""Compare two commits and return the diff.
|
||||
|
||||
Compares tree_state snapshots (full file inventories), not commit
|
||||
items (which only capture per-commit deltas). Falls back to
|
||||
FileCommitItem records for supplementary metadata (hash/location).
|
||||
|
||||
Returns list of dicts with fields:
|
||||
file_id, file_name, operation, old_hash, new_hash, old_location, new_location
|
||||
"""
|
||||
_, from_commit = cls.get_by_id(from_id)
|
||||
_, to_commit = cls.get_by_id(to_id)
|
||||
|
||||
from_tree = {}
|
||||
to_tree = {}
|
||||
if from_commit and from_commit.tree_state:
|
||||
try:
|
||||
from_tree = json.loads(from_commit.tree_state)
|
||||
except Exception:
|
||||
pass
|
||||
if to_commit and to_commit.tree_state:
|
||||
try:
|
||||
to_tree = json.loads(to_commit.tree_state)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Supplement with commit_item metadata for operations not captured
|
||||
# by tree_state alone (rename).
|
||||
from_items = {}
|
||||
try:
|
||||
for item in FileCommitItem.select().where(FileCommitItem.commit_id == from_id):
|
||||
from_items[item.file_id] = item
|
||||
except Exception:
|
||||
pass
|
||||
to_items = {}
|
||||
try:
|
||||
for item in FileCommitItem.select().where(FileCommitItem.commit_id == to_id):
|
||||
to_items[item.file_id] = item
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
all_file_ids = set(from_tree.keys()) | set(to_tree.keys())
|
||||
|
||||
diff = []
|
||||
for fid in sorted(all_file_ids):
|
||||
from_entry = from_tree.get(fid)
|
||||
to_entry = to_tree.get(fid)
|
||||
|
||||
from_item = from_items.get(fid)
|
||||
to_item = to_items.get(fid)
|
||||
|
||||
from_hash = from_entry.get("hash", "") if isinstance(from_entry, dict) else ""
|
||||
to_hash = to_entry.get("hash", "") if isinstance(to_entry, dict) else ""
|
||||
from_status = from_entry.get("status", "1") if isinstance(from_entry, dict) else "1"
|
||||
to_status = to_entry.get("status", "1") if isinstance(to_entry, dict) else "1"
|
||||
from_name = from_entry.get("name", "") if isinstance(from_entry, dict) else ""
|
||||
to_name = to_entry.get("name", "") if isinstance(to_entry, dict) else ""
|
||||
|
||||
if from_entry is not None and to_entry is None:
|
||||
# Present in from, absent in to → deleted
|
||||
diff.append({
|
||||
"file_id": fid,
|
||||
"file_name": from_name,
|
||||
"operation": "delete",
|
||||
"old_hash": from_hash or (from_item.new_hash if from_item else None),
|
||||
"old_location": from_entry.get("location", "") if isinstance(from_entry, dict) else None,
|
||||
"new_hash": None,
|
||||
"new_location": None,
|
||||
})
|
||||
|
||||
elif from_entry is None and to_entry is not None:
|
||||
# Present in to, absent in from → added
|
||||
diff.append({
|
||||
"file_id": fid,
|
||||
"file_name": to_name,
|
||||
"operation": "add",
|
||||
"old_hash": None,
|
||||
"old_location": None,
|
||||
"new_hash": to_hash or (to_item.new_hash if to_item else None),
|
||||
"new_location": to_entry.get("location", "") if isinstance(to_entry, dict) else None,
|
||||
})
|
||||
|
||||
else:
|
||||
# Both exist — check for changes
|
||||
changed = False
|
||||
operation = "modify"
|
||||
|
||||
# Hash change
|
||||
if from_hash != to_hash:
|
||||
changed = True
|
||||
|
||||
# Status change (active ↔ deleted or vice versa in same entry)
|
||||
if from_status != to_status:
|
||||
changed = True
|
||||
operation = "delete" if to_status == "0" else "add"
|
||||
|
||||
# Name change (rename)
|
||||
if from_name != to_name:
|
||||
changed = True
|
||||
operation = "rename"
|
||||
|
||||
if changed:
|
||||
old_loc = from_entry.get("location", "") if isinstance(from_entry, dict) else None
|
||||
new_loc = to_entry.get("location", "") if isinstance(to_entry, dict) else None
|
||||
diff.append({
|
||||
"file_id": fid,
|
||||
"file_name": to_name or from_name,
|
||||
"operation": operation,
|
||||
"old_hash": from_hash or (from_item.new_hash if from_item else None),
|
||||
"old_location": old_loc or (from_item.new_location if from_item else None),
|
||||
"new_hash": to_hash or (to_item.new_hash if to_item else None),
|
||||
"new_location": new_loc or (to_item.new_location if to_item else None),
|
||||
})
|
||||
|
||||
return diff
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_uncommitted_changes(cls, folder_id):
|
||||
"""Get uncommitted changes by comparing current File table with latest commit.
|
||||
|
||||
Recursively scans all sub-folders under folder_id.
|
||||
Returns list of dicts: [{"file_id", "file_name", "operation": "add"|"modify"|"delete"}]
|
||||
"""
|
||||
# Get latest commit's tree state
|
||||
latest = cls._get_latest_commit(folder_id)
|
||||
committed_files = {}
|
||||
if latest and latest.tree_state:
|
||||
try:
|
||||
committed_files = json.loads(latest.tree_state)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Get all current (live) files recursively under this folder
|
||||
current_files = _collect_all_files_under(folder_id)
|
||||
|
||||
changes = []
|
||||
processed = set()
|
||||
|
||||
# Check for modified and deleted files
|
||||
for fid, committed_entry in committed_files.items():
|
||||
processed.add(fid)
|
||||
if committed_entry.get("status") == "0":
|
||||
continue
|
||||
|
||||
if fid in current_files:
|
||||
live_file = current_files[fid]
|
||||
live_hash = _compute_file_hash(folder_id, fid)
|
||||
committed_hash = committed_entry.get("hash", "")
|
||||
if live_hash and live_hash != committed_hash:
|
||||
changes.append({
|
||||
"file_id": fid,
|
||||
"file_name": committed_entry.get("name", ""),
|
||||
"operation": "modify",
|
||||
})
|
||||
else:
|
||||
if FileService.get_or_none(id=fid) is None:
|
||||
changes.append({
|
||||
"file_id": fid,
|
||||
"file_name": committed_entry.get("name", ""),
|
||||
"operation": "delete",
|
||||
})
|
||||
|
||||
# Check for newly added files
|
||||
for fid, live_file in current_files.items():
|
||||
if fid not in processed:
|
||||
changes.append({
|
||||
"file_id": fid,
|
||||
"file_name": live_file.name,
|
||||
"operation": "add",
|
||||
})
|
||||
|
||||
return changes
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_commit_tree(cls, commit_id):
|
||||
"""Get the tree state snapshot for a commit as a hierarchical tree."""
|
||||
success, commit = cls.get_by_id(commit_id)
|
||||
if not success or not commit.tree_state:
|
||||
return {}
|
||||
try:
|
||||
tree_state = json.loads(commit.tree_state)
|
||||
except Exception:
|
||||
return {}
|
||||
return _build_hierarchical_tree(tree_state, commit.folder_id)
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_commit_file_content(cls, folder_id, commit_id, file_id):
|
||||
"""Get file content as it existed in a given commit.
|
||||
|
||||
Resolves the file's stored hash from the commit's tree_state first;
|
||||
if absent (file unchanged in this commit), walks the parent commit
|
||||
chain via parent_id until a FileCommitItem for the file is found.
|
||||
"""
|
||||
success, commit = cls.get_by_id(commit_id)
|
||||
if not success:
|
||||
return None
|
||||
|
||||
# 1. Try tree_state — the full snapshot at this commit
|
||||
if commit.tree_state:
|
||||
try:
|
||||
tree = json.loads(commit.tree_state)
|
||||
entry = tree.get(file_id)
|
||||
if isinstance(entry, dict):
|
||||
h = entry.get("hash")
|
||||
if h:
|
||||
obj_path = f".objects/{h}"
|
||||
storage_impl = settings.STORAGE_IMPL
|
||||
if storage_impl:
|
||||
return storage_impl.get(folder_id, obj_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 2. Walk parent commits via parent_id until we find a
|
||||
# FileCommitItem for this file_id.
|
||||
current_id = commit_id
|
||||
visited = set()
|
||||
while current_id and current_id not in visited:
|
||||
visited.add(current_id)
|
||||
item = FileCommitItem.select().where(
|
||||
FileCommitItem.commit_id == current_id,
|
||||
FileCommitItem.file_id == file_id,
|
||||
).first()
|
||||
if item and item.new_hash:
|
||||
obj_path = f".objects/{item.new_hash}"
|
||||
storage_impl = settings.STORAGE_IMPL
|
||||
if storage_impl:
|
||||
return storage_impl.get(folder_id, obj_path)
|
||||
# Move to parent
|
||||
parent_commit = cls.get_commit(current_id)
|
||||
if parent_commit and parent_commit.parent_id:
|
||||
current_id = parent_commit.parent_id
|
||||
else:
|
||||
break
|
||||
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_file_version_history(cls, file_id):
|
||||
"""Get version history for a specific file across all commits.
|
||||
|
||||
Returns list of dicts: [{"commit_id", "operation", "hash", "create_time", "message"}]
|
||||
"""
|
||||
items = FileCommitItem.select().where(FileCommitItem.file_id == file_id).order_by(
|
||||
FileCommitItem.create_time.desc())
|
||||
|
||||
versions = []
|
||||
for item in items:
|
||||
commit = cls.get_commit(item.commit_id)
|
||||
if commit:
|
||||
versions.append({
|
||||
"commit_id": item.commit_id,
|
||||
"operation": item.operation,
|
||||
"hash": item.new_hash or item.old_hash or "",
|
||||
"create_time": item.create_time,
|
||||
"message": commit.message,
|
||||
})
|
||||
|
||||
return versions
|
||||
|
||||
|
||||
def _lookup_folder_name(folder_id):
|
||||
"""Look up a folder's display name from the File table."""
|
||||
try:
|
||||
row = File.get_or_none(File.id == folder_id)
|
||||
if row:
|
||||
return row.name
|
||||
except Exception:
|
||||
pass
|
||||
return folder_id
|
||||
|
||||
|
||||
def _build_hierarchical_tree(tree_state, root_folder_id):
|
||||
"""Build a recursive tree from a flat tree_state map.
|
||||
|
||||
Returns {id, name, type: "folder", children: [{file|folder nodes}]}
|
||||
Sub-folder hierarchy is resolved from the File table's parent_id.
|
||||
"""
|
||||
# Collect all unique folder IDs from parent_id fields
|
||||
folder_ids = {root_folder_id}
|
||||
for fid, entry in tree_state.items():
|
||||
if isinstance(entry, dict):
|
||||
pid = entry.get("parent_id") or root_folder_id
|
||||
folder_ids.add(pid)
|
||||
|
||||
# Build a map of folder_id -> parent_folder_id from the File table
|
||||
folder_parent_map = {}
|
||||
for fid in folder_ids:
|
||||
if fid != root_folder_id:
|
||||
try:
|
||||
row = File.get_or_none(File.id == fid)
|
||||
if row:
|
||||
folder_parent_map[fid] = row.parent_id
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Group file entries by parent_id
|
||||
files_by_parent = {}
|
||||
for fid, entry in tree_state.items():
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
pid = entry.get("parent_id") or root_folder_id
|
||||
files_by_parent.setdefault(pid, []).append((fid, entry))
|
||||
|
||||
# Group sub-folder IDs by their parent folder
|
||||
children_by_folder = {}
|
||||
for sfid, ppid in folder_parent_map.items():
|
||||
children_by_folder.setdefault(ppid, []).append(sfid)
|
||||
|
||||
def _build_node(node_id):
|
||||
node = {
|
||||
"id": node_id,
|
||||
"name": _lookup_folder_name(node_id),
|
||||
"type": "folder",
|
||||
"children": [],
|
||||
}
|
||||
# File children
|
||||
for fid, entry in files_by_parent.get(node_id, []):
|
||||
fn = {"id": fid, "name": entry.get("name", fid), "type": "file",
|
||||
"hash": entry.get("hash", ""), "size": entry.get("size", 0),
|
||||
"status": entry.get("status", "1")}
|
||||
if entry.get("location"):
|
||||
fn["location"] = entry["location"]
|
||||
node["children"].append(fn)
|
||||
# Sub-folder children (resolved from File table)
|
||||
for sfid in children_by_folder.get(node_id, []):
|
||||
child = _build_node(sfid)
|
||||
if child:
|
||||
node["children"].append(child)
|
||||
return node
|
||||
|
||||
return _build_node(root_folder_id)
|
||||
|
||||
|
||||
def _compute_file_hash(folder_id, file_id):
|
||||
"""Compute SHA256 hash of current file content from storage."""
|
||||
try:
|
||||
file_record = FileService.get_by_id(file_id)
|
||||
if not file_record[0]:
|
||||
return None
|
||||
file = file_record[1]
|
||||
if not file.location:
|
||||
return None
|
||||
|
||||
storage = settings.STORAGE_IMPL
|
||||
if not storage:
|
||||
return None
|
||||
|
||||
data = storage.get(folder_id, file.location)
|
||||
if data:
|
||||
return hashlib.sha256(data).hexdigest()
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
Reference in New Issue
Block a user