From b5bea72e4b1c581f5cdc749518fb44c60980da23 Mon Sep 17 00:00:00 2001 From: Yingfeng Date: Mon, 15 Jun 2026 11:19:56 +0800 Subject: [PATCH] Add git-like file commit API (#15978) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What problem does this PR solve? | # | Method | Endpoint | Description | Git Equivalent | |---|--------|----------|-------------|----------------| | 1 | `POST` | `/api/v1/{prefix}/{folder_id}/commits` | Create a snapshot commit with file changes (add/modify/delete/rename) | `git add` + `git commit` | | 2 | `GET` | `/api/v1/{prefix}/{folder_id}/commits` | List commit history (paginated) | `git log` | | 3 | `GET` | `/api/v1/{prefix}/{folder_id}/commits/{commit_id}` | Get commit detail with file changes | `git show` | | 4 | `GET` | `/api/v1/{prefix}/{folder_id}/commits/{commit_id}/files` | List file changes in a commit | `git show --name-status` | | 5 | `GET` | `/api/v1/{prefix}/{folder_id}/commits/diff?from=...&to=...` | Compare two commits and return differences | `git diff` | | 6 | `GET` | `/api/v1/{prefix}/{folder_id}/changes` | Get uncommitted changes (add/modify/delete) | `git status` | | 7 | `GET` | `/api/v1/{prefix}/{folder_id}/commits/{commit_id}/tree` | Get the folder tree snapshot at commit time | `git ls-tree` | | 8 | `GET` | `/api/v1/{prefix}/{folder_id}/commits/{commit_id}/files/{file_id}/content` | Get a file's content as it existed in a specific commit | `git show HEAD:file` | | 9 | `GET` | `/api/v1/{prefix}/{file_id}/versions` | Get version history for a specific file across all commits | `git log -- file` | Where `{prefix}/{id}` can be: - `folders/{folder_id}` — direct folder access - `workspaces/{workspace_id}` — alias of `folders/{folder_id}` - `datasets/{dataset_id}` — resolves to the dataset's folder - `memories/{memory_id}` — resolves to the memory's folder - `skills/{skill_id}` — resolves to the skill's folder ### Type of change - [x] New Feature (non-breaking change which adds functionality) - [x] Documentation Update --- api/apps/restful_apis/file_commit_api.py | 314 ++++++++ api/db/db_models.py | 32 + api/db/services/file_commit_service.py | 658 ++++++++++++++++ cmd/server_main.go | 3 +- docs/references/http_api_reference.md | 562 ++++++++++++++ internal/dao/database.go | 2 + internal/dao/file.go | 14 + internal/dao/file_commit.go | 154 ++++ internal/entity/file_commit.go | 108 +++ internal/handler/file_commit.go | 584 ++++++++++++++ internal/handler/file_commit_test.go | 413 ++++++++++ internal/router/router.go | 46 ++ internal/service/file_commit.go | 637 ++++++++++++++++ .../test_file_commit_routes_unit.py | 718 ++++++++++++++++++ test/testcases/restful_api/test_sessions.py | 6 - 15 files changed, 4244 insertions(+), 7 deletions(-) create mode 100644 api/apps/restful_apis/file_commit_api.py create mode 100644 api/db/services/file_commit_service.py create mode 100644 internal/dao/file_commit.go create mode 100644 internal/entity/file_commit.go create mode 100644 internal/handler/file_commit.go create mode 100644 internal/handler/file_commit_test.go create mode 100644 internal/service/file_commit.go create mode 100644 test/testcases/restful_api/test_file_commit_routes_unit.py diff --git a/api/apps/restful_apis/file_commit_api.py b/api/apps/restful_apis/file_commit_api.py new file mode 100644 index 0000000000..99ab3edc4d --- /dev/null +++ b/api/apps/restful_apis/file_commit_api.py @@ -0,0 +1,314 @@ +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +from functools import wraps + +from quart import request + +from api.apps import login_required, current_user +from api.utils.api_utils import get_json_result, get_data_error_result, get_request_json, server_error_response, validate_request + +# manager is injected dynamically by api.apps.register_page() before this +# module is exec'd. DO NOT assign manager = None here — it would overwrite +# the Blueprint that register_page set on the module. +from api.db.services.file_commit_service import FileCommitService +from api.db.services.knowledgebase_service import KnowledgebaseService +from api.db.services.file_service import FileService +from common.constants import FileSource + +logger = logging.getLogger(__name__) + +_ENTITY_RESOLVERS = {} + +# Counter to give each generated route function a unique name, +# preventing Quart Blueprint endpoint name collisions. +_route_suffix = [0] + + +def _register_resolver(entity_type): + """Decorator that registers a folder_id resolver for an entity type. + + The decorated function receives (entity_id) and must return a folder_id + or None if the entity has no corresponding folder. + """ + def decorator(func): + _ENTITY_RESOLVERS[entity_type] = func + @wraps(func) + def wrapper(entity_id): + return func(entity_id) + return wrapper + return decorator + + +def _resolve_folder_id(entity_type, entity_id): + """Resolve an entity (dataset/memory/skill) to its folder_id.""" + resolver = _ENTITY_RESOLVERS.get(entity_type) + if resolver is None: + return None + return resolver(entity_id) + + +@_register_resolver("datasets") +def _resolve_dataset_folder(dataset_id): + success, kb = KnowledgebaseService.get_by_id(dataset_id) + if not success: + return None + # Find the folder with matching name, source_type, and tenant_id + folders = FileService.query( + name=kb.name, + source_type=FileSource.KNOWLEDGEBASE.value, + type="folder", + tenant_id=kb.tenant_id, + ) + if folders: + return folders[0].id + return None + + +# ── Route registration helper ───────────────────────────────────────────── + +def _register_commit_routes(prefix, param_name, resolver_type=None): + """Register all 8 commit endpoints for a given URL prefix. + + Args: + prefix: URL prefix like '/folders/' + param_name: The URL parameter name (e.g. 'folder_id', 'dataset_id') + resolver_type: If set, resolve param_name → folder_id before calling logic + """ + # Unique suffix for this call to prevent Blueprint endpoint name collisions + _route_suffix[0] += 1 + _n = _route_suffix[0] + + def _resolve(entity_id): + if resolver_type is None: + return entity_id # already a folder_id + folder_id = _resolve_folder_id(resolver_type, entity_id) + if folder_id is None: + raise ValueError(f"Could not resolve {resolver_type} '{entity_id}' to a folder") + return folder_id + + # ── Create commit ────────────────────────────────────────────────────── + @manager.route(f'{prefix}/commits', methods=['POST'], endpoint=f'create_commit_{_n}') # noqa: F821 + @login_required + @validate_request("message", "files") + async def create_commit(entity_id): + folder_id = _resolve(entity_id) + req = await get_request_json() + try: + commit = FileCommitService.create_commit( + folder_id=folder_id, + author_id=current_user.id, + message=req["message"], + file_changes=req["files"], + ) + return get_json_result(data={ + "id": commit.id, + "folder_id": commit.folder_id, + "parent_id": commit.parent_id, + "message": commit.message, + "author_id": commit.author_id, + "file_count": commit.file_count, + "tree_state": commit.tree_state, + "create_time": commit.create_time, + }) + except Exception as e: + return server_error_response(e) + + # ── List commits ─────────────────────────────────────────────────────── + @manager.route(f'{prefix}/commits', methods=['GET'], endpoint=f'list_commits_{_n}') # noqa: F821 + @login_required + async def list_commits(entity_id): + folder_id = _resolve(entity_id) + try: + page = int(request.args.get("page", 1)) + page_size = int(request.args.get("page_size", 15)) + order_by = request.args.get("order_by", "create_time") + desc = request.args.get("desc", "true").lower() != "false" + commits, total = FileCommitService.list_commits(folder_id, page, page_size, order_by, desc) + return get_json_result(data={ + "total": total, + "page": page, + "page_size": page_size, + "commits": [{ + "id": c.id, + "folder_id": c.folder_id, + "parent_id": c.parent_id, + "message": c.message, + "author_id": c.author_id, + "file_count": c.file_count, + "create_time": c.create_time, + } for c in commits], + }) + except Exception as e: + return server_error_response(e) + + # ── Get commit ───────────────────────────────────────────────────────── + @manager.route(f'{prefix}/commits/', methods=['GET'], endpoint=f'get_commit_{_n}') # noqa: F821 + @login_required + async def get_commit(entity_id, commit_id): + folder_id = _resolve(entity_id) + try: + commit = FileCommitService.get_commit(commit_id) + if not commit: + return get_data_error_result("Commit not found") + if commit.folder_id != folder_id: + return get_data_error_result("Commit not found in workspace") + items = FileCommitService.list_commit_files(commit_id) + return get_json_result(data={ + "id": commit.id, + "folder_id": commit.folder_id, + "parent_id": commit.parent_id, + "message": commit.message, + "author_id": commit.author_id, + "file_count": commit.file_count, + "create_time": commit.create_time, + "files": [{ + "file_id": item.file_id, + "operation": item.operation, + "old_hash": item.old_hash, + "new_hash": item.new_hash, + "old_name": item.old_name, + "new_name": item.new_name, + } for item in items], + }) + except Exception as e: + return server_error_response(e) + + # ── List commit files ────────────────────────────────────────────────── + @manager.route(f'{prefix}/commits//files', methods=['GET'], endpoint=f'list_commit_files_{_n}') # noqa: F821 + @login_required + async def list_commit_files(entity_id, commit_id): + folder_id = _resolve(entity_id) + try: + commit = FileCommitService.get_commit(commit_id) + if not commit: + return get_data_error_result("Commit not found") + if commit.folder_id != folder_id: + return get_data_error_result("Commit not found in workspace") + items = FileCommitService.list_commit_files(commit_id) + return get_json_result(data=[{ + "id": item.id, + "file_id": item.file_id, + "operation": item.operation, + "old_hash": item.old_hash, + "new_hash": item.new_hash, + "old_location": item.old_location, + "new_location": item.new_location, + "old_name": item.old_name, + "new_name": item.new_name, + } for item in items]) + except Exception as e: + return server_error_response(e) + + # ── Diff commits ─────────────────────────────────────────────────────── + @manager.route(f'{prefix}/commits/diff', methods=['GET'], endpoint=f'diff_commits_{_n}') # noqa: F821 + @login_required + async def diff_commits(entity_id): + folder_id = _resolve(entity_id) + from_id = request.args.get("from") + to_id = request.args.get("to") + if not from_id or not to_id: + return get_data_error_result("'from' and 'to' parameters are required") + try: + from_commit = FileCommitService.get_commit(from_id) + to_commit = FileCommitService.get_commit(to_id) + if not from_commit or not to_commit: + return get_data_error_result("Commit not found") + if from_commit.folder_id != folder_id or to_commit.folder_id != folder_id: + return get_data_error_result("Commit not found in workspace") + diff = FileCommitService.diff_commits(from_id, to_id) + return get_json_result(data=diff) + except Exception as e: + return server_error_response(e) + + # ── Get uncommitted changes ──────────────────────────────────────────── + @manager.route(f'{prefix}/changes', methods=['GET'], endpoint=f'get_uncommitted_changes_{_n}') # noqa: F821 + @login_required + async def get_uncommitted_changes(entity_id): + folder_id = _resolve(entity_id) + try: + changes = FileCommitService.get_uncommitted_changes(folder_id) + return get_json_result(data=changes) + except Exception as e: + return server_error_response(e) + + # ── Get commit tree ──────────────────────────────────────────────────── + @manager.route(f'{prefix}/commits//tree', methods=['GET'], endpoint=f'get_commit_tree_{_n}') # noqa: F821 + @login_required + async def get_commit_tree(entity_id, commit_id): + folder_id = _resolve(entity_id) + try: + commit = FileCommitService.get_commit(commit_id) + if not commit: + return get_data_error_result("Commit not found") + if commit.folder_id != folder_id: + return get_data_error_result("Commit not found in workspace") + tree = FileCommitService.get_commit_tree(commit_id) + return get_json_result(data=tree) + except Exception as e: + return server_error_response(e) + + # ── Get commit file content ──────────────────────────────────────────── + @manager.route(f'{prefix}/commits//files//content', methods=['GET'], endpoint=f'get_commit_file_content_{_n}') # noqa: F821 + @login_required + async def get_commit_file_content(entity_id, commit_id, file_id): + folder_id = _resolve(entity_id) + try: + commit = FileCommitService.get_commit(commit_id) + if not commit: + return get_data_error_result("Commit not found") + if commit.folder_id != folder_id: + return get_data_error_result("Commit not found in workspace") + content = FileCommitService.get_commit_file_content(folder_id, commit_id, file_id) + if content is None: + return get_data_error_result("File not found in this commit") + return get_json_result(data={"content": content.decode("utf-8", errors="replace")}) + except Exception as e: + return server_error_response(e) + + # Expose handlers at module level for direct testing. + _g = globals() + _g['create_commit'] = create_commit + _g['list_commits'] = list_commits + _g['get_commit'] = get_commit + _g['list_commit_files'] = list_commit_files + _g['diff_commits'] = diff_commits + _g['get_uncommitted_changes'] = get_uncommitted_changes + _g['get_commit_tree'] = get_commit_tree + _g['get_commit_file_content'] = get_commit_file_content + +# ── Register routes for all entity types ────────────────────────────────── +# All URL patterns use as the consistent param name. +# For /folders/ entity_id IS the folder_id directly. +# For other entity types entity_id is resolved via _resolve_folder_id(). +# Register datasets first, workspace second, folders last — +# the last call's handlers overwrite module-level names for test access. +_register_commit_routes('/datasets/', 'entity_id', resolver_type='datasets') +_register_commit_routes('/workspace/', 'entity_id') # alias — workspace_id == folder_id +_register_commit_routes('/folders/', 'entity_id') # direct — entity_id == folder_id (wins) +# /memories and /skills routes are not mounted until resolvers are implemented. + + +# ── File version history (shared across all entity types) ───────────────── +@manager.route('/files//versions', methods=['GET']) # noqa: F821 +@login_required +async def get_file_version_history(file_id): + try: + versions = FileCommitService.get_file_version_history(file_id) + return get_json_result(data=versions) + except Exception as e: + return server_error_response(e) diff --git a/api/db/db_models.py b/api/db/db_models.py index 0575179ebd..e274d1157d 100644 --- a/api/db/db_models.py +++ b/api/db/db_models.py @@ -956,6 +956,38 @@ class File2Document(DataBaseModel): db_table = "file2document" +class FileCommit(DataBaseModel): + id = CharField(max_length=32, primary_key=True) + folder_id = CharField(max_length=32, null=False, help_text="workspace folder id", index=True) + parent_id = CharField(max_length=32, null=True, help_text="parent commit id", index=True) + message = CharField(max_length=512, default="", help_text="commit message") + author_id = CharField(max_length=32, null=False, help_text="user who created the commit", index=True) + file_count = IntegerField(default=0, help_text="number of files in this commit") + tree_state = LongTextField(null=True, help_text="JSON snapshot of the full folder tree at this commit") + + class Meta: + db_table = "file_commit" + + +class FileCommitItem(DataBaseModel): + id = CharField(max_length=32, primary_key=True) + commit_id = CharField(max_length=32, null=False, help_text="commit id", index=True) + file_id = CharField(max_length=32, null=False, help_text="file id", index=True) + operation = CharField(max_length=16, null=False, help_text="add / modify / delete / rename", index=True) + old_hash = CharField(max_length=64, null=True, help_text="old content hash", index=True) + new_hash = CharField(max_length=64, null=True, help_text="new content hash", index=True) + old_location = CharField(max_length=255, null=True, help_text="old storage location") + new_location = CharField(max_length=255, null=True, help_text="new storage location") + old_name = CharField(max_length=255, null=True, help_text="old file name (for rename)") + new_name = CharField(max_length=255, null=True, help_text="new file name (for rename)") + + class Meta: + db_table = "file_commit_item" + indexes = ( + (("commit_id", "file_id"), True), # unique composite index + ) + + class Task(DataBaseModel): id = CharField(max_length=32, primary_key=True) doc_id = CharField(max_length=32, null=False, index=True) diff --git a/api/db/services/file_commit_service.py b/api/db/services/file_commit_service.py new file mode 100644 index 0000000000..e22321ca9d --- /dev/null +++ b/api/db/services/file_commit_service.py @@ -0,0 +1,658 @@ +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import datetime +import hashlib +import json +import logging + +from api.db.db_models import DB, FileCommit, FileCommitItem, File +from api.db.services.common_service import CommonService +from api.db.services.file_service import FileService +from common import settings +from common.misc_utils import get_uuid +from common.time_utils import current_timestamp, datetime_format + +logger = logging.getLogger(__name__) + + +def _get_file_parent_id(file_id): + """Look up a file's parent_id from the File table.""" + try: + row = File.get_or_none(File.id == file_id) + if row: + return row.parent_id + except Exception: + pass + return None + + +def _collect_all_files_under(folder_id): + """Recursively collect all non-folder files under a folder (including sub-folders). + + Returns a dict of {file_id: File_model_instance}. + """ + results = {} + try: + # Direct file children (non-folder) of this folder + for f in File.select().where( + File.parent_id == folder_id, + File.id != folder_id, + File.type != "folder", + ): + results[f.id] = f + # Sub-folders — recurse + for sub in File.select().where( + File.parent_id == folder_id, + File.type == "folder", + ): + results.update(_collect_all_files_under(sub.id)) + except Exception: + pass + return results + + +class FileCommitService(CommonService): + model = FileCommit + + @classmethod + def create_commit(cls, folder_id, author_id, message, file_changes): + """Create a new commit for a workspace folder. + + Args: + folder_id: The workspace folder ID + author_id: The user ID + message: Commit message + file_changes: List of dicts: + [{"file_id": str, "file_name": str, "operation": "add"|"modify"|"delete"|"rename", + "content": str (optional, for add/modify), "content_hash": str (optional), + "old_name": str, "new_name": str (for rename)}] + + Returns: + The created FileCommit instance + """ + commit_id = get_uuid() + now_ts = current_timestamp() + now_dt = datetime_format(date_time=datetime.datetime.now()) + + with DB.atomic(): + # 1. Get the latest (chain head) commit for this folder + latest_commit = cls._get_latest_commit(folder_id) + + # 2. Begin creating the commit record + commit_data = { + "id": commit_id, + "folder_id": folder_id, + "parent_id": latest_commit.id if latest_commit else None, + "message": message, + "author_id": author_id, + "file_count": len(file_changes), + "create_time": now_ts, + "create_date": now_dt, + "update_time": now_ts, + "update_date": now_dt, + } + + # 3. Insert commit record + FileCommit(**commit_data).save(force_insert=True) + + # 4. Build new tree state and process each file change + tree_state = {} + if latest_commit and latest_commit.tree_state: + try: + tree_state = json.loads(latest_commit.tree_state) + except (json.JSONDecodeError, TypeError): + tree_state = {} + + # 4a. Backfill parent_id for existing entries that lack it + for fid, entry in tree_state.items(): + if isinstance(entry, dict) and "parent_id" not in entry: + pid = _get_file_parent_id(fid) + if pid: + entry["parent_id"] = pid + + storage_impl = settings.STORAGE_IMPL + + for change in file_changes: + op = change.get("operation", "modify") + file_id = change.get("file_id", "") + commit_item_id = get_uuid() + + item = { + "id": commit_item_id, + "commit_id": commit_id, + "file_id": file_id, + "operation": op, + "create_time": now_ts, + "create_date": now_dt, + "update_time": now_ts, + "update_date": now_dt, + } + + if op == "add": + content = change.get("content", "") + content_bytes = content.encode("utf-8") if isinstance(content, str) else content + content_hash = hashlib.sha256(content_bytes).hexdigest() + obj_key = f".objects/{content_hash}" + + # Store blob via content-addressable storage + if storage_impl: + storage_impl.put(folder_id, obj_key, content_bytes) + + item["new_hash"] = content_hash + item["new_location"] = obj_key + + # Update file record in DB + File.update({ + "location": obj_key, + "size": len(content_bytes), + "update_time": current_timestamp(), + }).where(File.id == file_id).execute() + + # Update tree state + file_parent = _get_file_parent_id(file_id) + tree_state[file_id] = { + "hash": content_hash, + "location": obj_key, + "name": change.get("file_name", ""), + "size": len(content_bytes), + "status": "1", + "parent_id": file_parent, + } + + elif op == "modify": + content = change.get("content", "") + content_bytes = content.encode("utf-8") if isinstance(content, str) else content + content_hash = hashlib.sha256(content_bytes).hexdigest() + obj_key = f".objects/{content_hash}" + + # Record old hash + old_entry = tree_state.get(file_id, {}) + old_hash = old_entry.get("hash", "") + old_location = old_entry.get("location", "") + + if old_hash: + item["old_hash"] = old_hash + item["old_location"] = old_location + + # Store new blob + if storage_impl: + storage_impl.put(folder_id, obj_key, content_bytes) + + item["new_hash"] = content_hash + item["new_location"] = obj_key + + # Update file record + File.update({ + "location": obj_key, + "size": len(content_bytes), + "update_time": current_timestamp(), + }).where(File.id == file_id).execute() + + # Update tree state + file_parent = _get_file_parent_id(file_id) + tree_state[file_id] = { + "hash": content_hash, + "location": obj_key, + "name": change.get("file_name", tree_state.get(file_id, {}).get("name", "")), + "size": len(content_bytes), + "status": "1", + "parent_id": file_parent, + } + + elif op == "delete": + old_entry = tree_state.get(file_id, {}) + old_hash = old_entry.get("hash", "") + old_location = old_entry.get("location", "") + if old_hash: + item["old_hash"] = old_hash + item["old_location"] = old_location + + # Soft-delete the file record + File.update(status="0", update_time=current_timestamp()).where( + File.id == file_id + ).execute() + + # Remove from tree state (mark deleted) + if file_id in tree_state: + tree_state[file_id]["status"] = "0" + + elif op == "rename": + old_name = change.get("old_name", "") + new_name = change.get("new_name", "") + item["old_name"] = old_name + item["new_name"] = new_name + + # Update the file record name + File.update(name=new_name, update_time=current_timestamp()).where( + File.id == file_id + ).execute() + + # Update tree state + if file_id in tree_state: + tree_state[file_id]["name"] = new_name + + # Insert commit item + FileCommitItem(**item).save(force_insert=True) + + # 5. Save the tree state snapshot + tree_json = json.dumps(tree_state, ensure_ascii=False) + cls.model.update(tree_state=tree_json).where(cls.model.id == commit_id).execute() + + _, commit = cls.get_by_id(commit_id) + return commit + + @classmethod + def _get_latest_commit(cls, folder_id): + """Get the latest (chain head) commit for a folder.""" + try: + return cls.model.select().where( + cls.model.folder_id == folder_id + ).order_by(cls.model.create_time.desc()).first() + except Exception: + return None + + @classmethod + @DB.connection_context() + def list_commits(cls, folder_id, page=1, page_size=15, order_by="create_time", desc=True): + """List commits for a workspace folder with pagination.""" + total = cls.model.select().where(cls.model.folder_id == folder_id).count() + + query = cls.model.select().where(cls.model.folder_id == folder_id) + if desc: + query = query.order_by(getattr(cls.model, order_by).desc()) + else: + query = query.order_by(getattr(cls.model, order_by).asc()) + + if page and page_size: + offset = (page - 1) * page_size + query = query.offset(offset).limit(page_size) + + return list(query), total + + @classmethod + @DB.connection_context() + def get_commit(cls, commit_id): + """Get a single commit by ID.""" + success, commit = cls.get_by_id(commit_id) + return commit if success else None + + @classmethod + @DB.connection_context() + def list_commit_files(cls, commit_id): + """List all file change items for a commit.""" + items = FileCommitItem.select().where(FileCommitItem.commit_id == commit_id) + return list(items) + + @classmethod + @DB.connection_context() + def diff_commits(cls, from_id, to_id): + """Compare two commits and return the diff. + + Compares tree_state snapshots (full file inventories), not commit + items (which only capture per-commit deltas). Falls back to + FileCommitItem records for supplementary metadata (hash/location). + + Returns list of dicts with fields: + file_id, file_name, operation, old_hash, new_hash, old_location, new_location + """ + _, from_commit = cls.get_by_id(from_id) + _, to_commit = cls.get_by_id(to_id) + + from_tree = {} + to_tree = {} + if from_commit and from_commit.tree_state: + try: + from_tree = json.loads(from_commit.tree_state) + except Exception: + pass + if to_commit and to_commit.tree_state: + try: + to_tree = json.loads(to_commit.tree_state) + except Exception: + pass + + # Supplement with commit_item metadata for operations not captured + # by tree_state alone (rename). + from_items = {} + try: + for item in FileCommitItem.select().where(FileCommitItem.commit_id == from_id): + from_items[item.file_id] = item + except Exception: + pass + to_items = {} + try: + for item in FileCommitItem.select().where(FileCommitItem.commit_id == to_id): + to_items[item.file_id] = item + except Exception: + pass + + all_file_ids = set(from_tree.keys()) | set(to_tree.keys()) + + diff = [] + for fid in sorted(all_file_ids): + from_entry = from_tree.get(fid) + to_entry = to_tree.get(fid) + + from_item = from_items.get(fid) + to_item = to_items.get(fid) + + from_hash = from_entry.get("hash", "") if isinstance(from_entry, dict) else "" + to_hash = to_entry.get("hash", "") if isinstance(to_entry, dict) else "" + from_status = from_entry.get("status", "1") if isinstance(from_entry, dict) else "1" + to_status = to_entry.get("status", "1") if isinstance(to_entry, dict) else "1" + from_name = from_entry.get("name", "") if isinstance(from_entry, dict) else "" + to_name = to_entry.get("name", "") if isinstance(to_entry, dict) else "" + + if from_entry is not None and to_entry is None: + # Present in from, absent in to → deleted + diff.append({ + "file_id": fid, + "file_name": from_name, + "operation": "delete", + "old_hash": from_hash or (from_item.new_hash if from_item else None), + "old_location": from_entry.get("location", "") if isinstance(from_entry, dict) else None, + "new_hash": None, + "new_location": None, + }) + + elif from_entry is None and to_entry is not None: + # Present in to, absent in from → added + diff.append({ + "file_id": fid, + "file_name": to_name, + "operation": "add", + "old_hash": None, + "old_location": None, + "new_hash": to_hash or (to_item.new_hash if to_item else None), + "new_location": to_entry.get("location", "") if isinstance(to_entry, dict) else None, + }) + + else: + # Both exist — check for changes + changed = False + operation = "modify" + + # Hash change + if from_hash != to_hash: + changed = True + + # Status change (active ↔ deleted or vice versa in same entry) + if from_status != to_status: + changed = True + operation = "delete" if to_status == "0" else "add" + + # Name change (rename) + if from_name != to_name: + changed = True + operation = "rename" + + if changed: + old_loc = from_entry.get("location", "") if isinstance(from_entry, dict) else None + new_loc = to_entry.get("location", "") if isinstance(to_entry, dict) else None + diff.append({ + "file_id": fid, + "file_name": to_name or from_name, + "operation": operation, + "old_hash": from_hash or (from_item.new_hash if from_item else None), + "old_location": old_loc or (from_item.new_location if from_item else None), + "new_hash": to_hash or (to_item.new_hash if to_item else None), + "new_location": new_loc or (to_item.new_location if to_item else None), + }) + + return diff + + @classmethod + @DB.connection_context() + def get_uncommitted_changes(cls, folder_id): + """Get uncommitted changes by comparing current File table with latest commit. + + Recursively scans all sub-folders under folder_id. + Returns list of dicts: [{"file_id", "file_name", "operation": "add"|"modify"|"delete"}] + """ + # Get latest commit's tree state + latest = cls._get_latest_commit(folder_id) + committed_files = {} + if latest and latest.tree_state: + try: + committed_files = json.loads(latest.tree_state) + except Exception: + pass + + # Get all current (live) files recursively under this folder + current_files = _collect_all_files_under(folder_id) + + changes = [] + processed = set() + + # Check for modified and deleted files + for fid, committed_entry in committed_files.items(): + processed.add(fid) + if committed_entry.get("status") == "0": + continue + + if fid in current_files: + live_file = current_files[fid] + live_hash = _compute_file_hash(folder_id, fid) + committed_hash = committed_entry.get("hash", "") + if live_hash and live_hash != committed_hash: + changes.append({ + "file_id": fid, + "file_name": committed_entry.get("name", ""), + "operation": "modify", + }) + else: + if FileService.get_or_none(id=fid) is None: + changes.append({ + "file_id": fid, + "file_name": committed_entry.get("name", ""), + "operation": "delete", + }) + + # Check for newly added files + for fid, live_file in current_files.items(): + if fid not in processed: + changes.append({ + "file_id": fid, + "file_name": live_file.name, + "operation": "add", + }) + + return changes + + @classmethod + @DB.connection_context() + def get_commit_tree(cls, commit_id): + """Get the tree state snapshot for a commit as a hierarchical tree.""" + success, commit = cls.get_by_id(commit_id) + if not success or not commit.tree_state: + return {} + try: + tree_state = json.loads(commit.tree_state) + except Exception: + return {} + return _build_hierarchical_tree(tree_state, commit.folder_id) + + @classmethod + @DB.connection_context() + def get_commit_file_content(cls, folder_id, commit_id, file_id): + """Get file content as it existed in a given commit. + + Resolves the file's stored hash from the commit's tree_state first; + if absent (file unchanged in this commit), walks the parent commit + chain via parent_id until a FileCommitItem for the file is found. + """ + success, commit = cls.get_by_id(commit_id) + if not success: + return None + + # 1. Try tree_state — the full snapshot at this commit + if commit.tree_state: + try: + tree = json.loads(commit.tree_state) + entry = tree.get(file_id) + if isinstance(entry, dict): + h = entry.get("hash") + if h: + obj_path = f".objects/{h}" + storage_impl = settings.STORAGE_IMPL + if storage_impl: + return storage_impl.get(folder_id, obj_path) + except Exception: + pass + + # 2. Walk parent commits via parent_id until we find a + # FileCommitItem for this file_id. + current_id = commit_id + visited = set() + while current_id and current_id not in visited: + visited.add(current_id) + item = FileCommitItem.select().where( + FileCommitItem.commit_id == current_id, + FileCommitItem.file_id == file_id, + ).first() + if item and item.new_hash: + obj_path = f".objects/{item.new_hash}" + storage_impl = settings.STORAGE_IMPL + if storage_impl: + return storage_impl.get(folder_id, obj_path) + # Move to parent + parent_commit = cls.get_commit(current_id) + if parent_commit and parent_commit.parent_id: + current_id = parent_commit.parent_id + else: + break + + return None + + @classmethod + @DB.connection_context() + def get_file_version_history(cls, file_id): + """Get version history for a specific file across all commits. + + Returns list of dicts: [{"commit_id", "operation", "hash", "create_time", "message"}] + """ + items = FileCommitItem.select().where(FileCommitItem.file_id == file_id).order_by( + FileCommitItem.create_time.desc()) + + versions = [] + for item in items: + commit = cls.get_commit(item.commit_id) + if commit: + versions.append({ + "commit_id": item.commit_id, + "operation": item.operation, + "hash": item.new_hash or item.old_hash or "", + "create_time": item.create_time, + "message": commit.message, + }) + + return versions + + +def _lookup_folder_name(folder_id): + """Look up a folder's display name from the File table.""" + try: + row = File.get_or_none(File.id == folder_id) + if row: + return row.name + except Exception: + pass + return folder_id + + +def _build_hierarchical_tree(tree_state, root_folder_id): + """Build a recursive tree from a flat tree_state map. + + Returns {id, name, type: "folder", children: [{file|folder nodes}]} + Sub-folder hierarchy is resolved from the File table's parent_id. + """ + # Collect all unique folder IDs from parent_id fields + folder_ids = {root_folder_id} + for fid, entry in tree_state.items(): + if isinstance(entry, dict): + pid = entry.get("parent_id") or root_folder_id + folder_ids.add(pid) + + # Build a map of folder_id -> parent_folder_id from the File table + folder_parent_map = {} + for fid in folder_ids: + if fid != root_folder_id: + try: + row = File.get_or_none(File.id == fid) + if row: + folder_parent_map[fid] = row.parent_id + except Exception: + pass + + # Group file entries by parent_id + files_by_parent = {} + for fid, entry in tree_state.items(): + if not isinstance(entry, dict): + continue + pid = entry.get("parent_id") or root_folder_id + files_by_parent.setdefault(pid, []).append((fid, entry)) + + # Group sub-folder IDs by their parent folder + children_by_folder = {} + for sfid, ppid in folder_parent_map.items(): + children_by_folder.setdefault(ppid, []).append(sfid) + + def _build_node(node_id): + node = { + "id": node_id, + "name": _lookup_folder_name(node_id), + "type": "folder", + "children": [], + } + # File children + for fid, entry in files_by_parent.get(node_id, []): + fn = {"id": fid, "name": entry.get("name", fid), "type": "file", + "hash": entry.get("hash", ""), "size": entry.get("size", 0), + "status": entry.get("status", "1")} + if entry.get("location"): + fn["location"] = entry["location"] + node["children"].append(fn) + # Sub-folder children (resolved from File table) + for sfid in children_by_folder.get(node_id, []): + child = _build_node(sfid) + if child: + node["children"].append(child) + return node + + return _build_node(root_folder_id) + + +def _compute_file_hash(folder_id, file_id): + """Compute SHA256 hash of current file content from storage.""" + try: + file_record = FileService.get_by_id(file_id) + if not file_record[0]: + return None + file = file_record[1] + if not file.location: + return None + + storage = settings.STORAGE_IMPL + if not storage: + return None + + data = storage.get(folder_id, file.location) + if data: + return hashlib.sha256(data).hexdigest() + return None + except Exception: + return None diff --git a/cmd/server_main.go b/cmd/server_main.go index 164212e403..6a0db93ab2 100644 --- a/cmd/server_main.go +++ b/cmd/server_main.go @@ -232,6 +232,7 @@ func startServer(config *server.Config) { searchBotHandler.SetAskService(service.NewAskService(chunkService, nil, 0, 0)) pluginHandler := handler.NewPluginHandler(service.NewPluginService()) modelHandler := handler.NewModelHandler(service.NewModelProviderService()) + fileCommitHandler := handler.NewFileCommitHandler(service.NewFileCommitService()) // Dify retrieval handler docDAO := dao.NewDocumentDAO() @@ -262,7 +263,7 @@ func startServer(config *server.Config) { adminRuntimeHandler := handler.NewAdminRuntimeHandler(adminRuntimeSelector) // Initialize router - r := router.NewRouter(authHandler, userHandler, tenantHandler, documentHandler, datasetsHandler, systemHandler, knowledgebaseHandler, chunkHandler, llmHandler, chatHandler, chatSessionHandler, connectorHandler, searchHandler, fileHandler, memoryHandler, mcpHandler, skillSearchHandler, providerHandler, agentHandler, searchBotHandler, difyRetrievalHandler, pluginHandler, modelHandler, adminRuntimeHandler) + r := router.NewRouter(authHandler, userHandler, tenantHandler, documentHandler, datasetsHandler, systemHandler, knowledgebaseHandler, chunkHandler, llmHandler, chatHandler, chatSessionHandler, connectorHandler, searchHandler, fileHandler, memoryHandler, mcpHandler, skillSearchHandler, providerHandler, agentHandler, searchBotHandler, difyRetrievalHandler, pluginHandler, modelHandler, fileCommitHandler, adminRuntimeHandler) // Create Gin engine ginEngine := gin.New() diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index 94d38b4429..f7dd43d2a5 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -7544,6 +7544,568 @@ or --- +### Create commit + +**POST** `/api/v1/folders/{folder_id}/commits` + +Creates a new snapshot commit for the specified folder. +This endpoint also supports: +- `/api/v1/workspace/{workspace_id}/commits` (alias, workspace_id == folder_id) +- `/api/v1/datasets/{dataset_id}/commits` (resolves dataset to its folder) + +#### Request + +- Method: POST +- URL: `/api/v1/folders/{folder_id}/commits` +- Headers: + - `'Authorization: Bearer '` +- Body: + - `'message'`: `string` (required) + The commit message. + - `'files'`: `list[object]` (required) + The list of file changes. Each file change is an object with the following fields: + +##### Request example + +```bash +curl --request POST \ + --url http://{address}/api/v1/folders/{folder_id}/commits \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer ' \ + --data '{ + "message": "update config files", + "files": [ + {"file_id": "file_uuid", "file_name": "config.json", "operation": "modify", "content": "{\"key\": \"value\"}"}, + {"file_id": "file_uuid", "file_name": "readme.md", "operation": "add", "content": "# New README"} + ] + }' +``` + +##### Request parameters + +- `"message"`: (*Body parameter*), `string`, *Required* + The commit message describing the changes. +- `"files"`: (*Body parameter*), `list[object]`, *Required* + Each file change object supports the following fields: + + | Field | Type | Required | Description | + |-------|------|----------|-------------| + | `file_id` | `string` | Yes | The file ID | + | `file_name` | `string` | Only for add/rename | The file name | + | `operation` | `string` | Yes | `"add"`, `"modify"`, `"delete"`, or `"rename"` | + | `content` | `string` | Only for add/modify | The file content | + | `old_name` | `string` | Only for rename | The old file name | + | `new_name` | `string` | Only for rename | The new file name | + +#### Response + +Success: + +```json +{ + "code": 0, + "data": { + "id": "commit_uuid", + "folder_id": "folder_uuid", + "parent_id": null, + "message": "update config files", + "author_id": "user_uuid", + "file_count": 2, + "tree_state": "{\"file_uuid\": {\"hash\": \"abcd1234\", \"location\": \".objects/abcd1234\", \"name\": \"config.json\", \"size\": 1024, \"status\": \"1\", \"parent_id\": \"folder_uuid\"}}", + "create_time": 1718200000000 + } +} +``` + +:::note +`tree_state` is a JSON string containing a flat map of file entries. Each entry includes `parent_id` to track which sub-folder the file belonged to at commit time. Sub-folders are inferred from `parent_id` values. +::: + +Failure: + +```json +{ + "code": 101, + "message": "required argument are missing: message" +} +``` + +--- + +### List commits + +**GET** `/api/v1/folders/{folder_id}/commits` + +Lists all commits for the specified folder with pagination. +Also available at: +- `/api/v1/workspace/{workspace_id}/commits` +- `/api/v1/datasets/{dataset_id}/commits` + +#### Request + +- Method: GET +- URL: `/api/v1/folders/{folder_id}/commits` +- Headers: + - `'Authorization: Bearer '` +- Query: + - `'page'`: `int` (optional, default: 1) + - `'page_size'`: `int` (optional, default: 15) + - `'order_by'`: `string` (optional, default: `"create_time"`) + - `'desc'`: `bool` (optional, default: `true`) + +##### Request example + +```bash +curl --request GET \ + --url 'http://{address}/api/v1/folders/{folder_id}/commits?page=1&page_size=15' \ + --header 'Authorization: Bearer ' +``` + +##### Request parameters + +- `"page"`: (*Query parameter*), `int`, *Optional* + Page number. Defaults to 1. +- `"page_size"`: (*Query parameter*), `int`, *Optional* + Number of items per page. Defaults to 15. +- `"order_by"`: (*Query parameter*), `string`, *Optional* + Sort field. Defaults to `"create_time"`. +- `"desc"`: (*Query parameter*), `bool`, *Optional* + Sort descending. Defaults to `true`. + +#### Response + +Success: + +```json +{ + "code": 0, + "data": { + "total": 2, + "page": 1, + "page_size": 15, + "commits": [ + { + "id": "commit_uuid", + "folder_id": "folder_uuid", + "parent_id": null, + "message": "first commit", + "author_id": "user_uuid", + "file_count": 3, + "create_time": 1718200000000 + } + ] + } +} +``` + +--- + +### Get commit + +**GET** `/api/v1/folders/{folder_id}/commits/{commit_id}` + +Retrieves the details of a specific commit, including its file changes. +Also available at: +- `/api/v1/workspace/{workspace_id}/commits/{commit_id}` +- `/api/v1/datasets/{dataset_id}/commits/{commit_id}` + +#### Request + +- Method: GET +- URL: `/api/v1/folders/{folder_id}/commits/{commit_id}` +- Headers: + - `'Authorization: Bearer '` + +##### Request example + +```bash +curl --request GET \ + --url http://{address}/api/v1/folders/{folder_id}/commits/{commit_id} \ + --header 'Authorization: Bearer ' +``` + +##### Request parameters + +- `"folder_id"`: (*Path parameter*), `string`, *Required* + The folder ID. +- `"commit_id"`: (*Path parameter*), `string`, *Required* + The commit ID. + +#### Response + +Success: + +```json +{ + "code": 0, + "data": { + "id": "commit_uuid", + "folder_id": "folder_uuid", + "parent_id": null, + "message": "added config files", + "author_id": "user_uuid", + "file_count": 2, + "create_time": 1718200000000, + "files": [ + { + "file_id": "file_uuid", + "operation": "add", + "old_hash": null, + "new_hash": "abcd1234", + "old_name": null, + "new_name": null + } + ] + } +} +``` + +Failure: + +```json +{ + "code": 102, + "message": "Commit not found in workspace" +} +``` + +--- + +### List commit files + +**GET** `/api/v1/folders/{folder_id}/commits/{commit_id}/files` + +Lists the file changes associated with a specific commit. +Also available at: +- `/api/v1/workspace/{workspace_id}/commits/{commit_id}/files` +- `/api/v1/datasets/{dataset_id}/commits/{commit_id}/files` + +#### Request + +- Method: GET +- URL: `/api/v1/folders/{folder_id}/commits/{commit_id}/files` +- Headers: + - `'Authorization: Bearer '` + +##### Request example + +```bash +curl --request GET \ + --url http://{address}/api/v1/folders/{folder_id}/commits/{commit_id}/files \ + --header 'Authorization: Bearer ' +``` + +#### Response + +Success: + +```json +{ + "code": 0, + "data": [ + { + "id": "item_uuid", + "file_id": "file_uuid", + "operation": "add", + "old_hash": null, + "new_hash": "abcd1234", + "old_location": null, + "new_location": ".objects/abcd1234", + "old_name": null, + "new_name": null + } + ] +} +``` + +--- + +### Diff commits + +**GET** `/api/v1/folders/{folder_id}/commits/diff?from={commit_id}&to={commit_id}` + +Compares two commits and returns the differences. +Also available at: +- `/api/v1/workspace/{workspace_id}/commits/diff?from=...&to=...` +- `/api/v1/datasets/{dataset_id}/commits/diff?from=...&to=...` + +#### Request + +- Method: GET +- URL: `/api/v1/folders/{folder_id}/commits/diff` +- Headers: + - `'Authorization: Bearer '` +- Query: + - `'from'`: `string` (required) + The source commit ID. + - `'to'`: `string` (required) + The target commit ID. + +##### Request example + +```bash +curl --request GET \ + --url 'http://{address}/api/v1/folders/{folder_id}/commits/diff?from=from_commit_id&to=to_commit_id' \ + --header 'Authorization: Bearer ' +``` + +##### Request parameters + +- `"from"`: (*Query parameter*), `string`, *Required* + The source commit ID. +- `"to"`: (*Query parameter*), `string`, *Required* + The target commit ID. + +#### Response + +Success: + +```json +{ + "code": 0, + "data": [ + { + "file_id": "file_uuid", + "file_name": "config.json", + "operation": "modify", + "old_hash": "abc123", + "new_hash": "def456", + "old_location": ".objects/abc123", + "new_location": ".objects/def456" + } + ] +} +``` + +Failure: + +```json +{ + "code": 102, + "message": "Commit not found in workspace" +} +``` + +--- + +### Get uncommitted changes + +**GET** `/api/v1/folders/{folder_id}/changes` + +Returns the uncommitted changes for the specified folder (similar to `git status`). +Also available at: +- `/api/v1/workspace/{workspace_id}/changes` +- `/api/v1/datasets/{dataset_id}/changes` + +#### Request + +- Method: GET +- URL: `/api/v1/folders/{folder_id}/changes` +- Headers: + - `'Authorization: Bearer '` + +##### Request example + +```bash +curl --request GET \ + --url http://{address}/api/v1/folders/{folder_id}/changes \ + --header 'Authorization: Bearer ' +``` + +#### Response + +Success: + +```json +{ + "code": 0, + "data": [ + { + "file_id": "file_uuid", + "file_name": "new.txt", + "operation": "add" + }, + { + "file_id": "file_uuid", + "file_name": "config.json", + "operation": "modify" + }, + { + "file_id": "file_uuid", + "file_name": "old.md", + "operation": "delete" + } + ] +} +``` + +--- + +### Get commit tree + +**GET** `/api/v1/folders/{folder_id}/commits/{commit_id}/tree` + +Retrieves the full folder tree snapshot as it existed at a specific commit. +Also available at: +- `/api/v1/workspace/{workspace_id}/commits/{commit_id}/tree` +- `/api/v1/datasets/{dataset_id}/commits/{commit_id}/tree` + +#### Request + +- Method: GET +- URL: `/api/v1/folders/{folder_id}/commits/{commit_id}/tree` +- Headers: + - `'Authorization: Bearer '` + +##### Request example + +```bash +curl --request GET \ + --url http://{address}/api/v1/folders/{folder_id}/commits/{commit_id}/tree \ + --header 'Authorization: Bearer ' +``` + +#### Response + +Success: + +```json +{ + "code": 0, + "data": { + "id": "folder_uuid", + "name": "workspace_name", + "type": "folder", + "children": [ + { + "id": "file_uuid", + "name": "config.json", + "type": "file", + "hash": "abcd1234", + "size": 1024, + "status": "1", + "location": ".objects/abcd1234" + }, + { + "id": "sub_folder_uuid", + "name": "sub_folder_name", + "type": "folder", + "children": [ + { + "id": "file_uuid_2", + "name": "nested.txt", + "type": "file", + "hash": "ef5678", + "size": 512, + "status": "1", + "location": ".objects/ef5678" + } + ] + } + ] + } +} +``` + +--- + +### Get commit file content + +**GET** `/api/v1/folders/{folder_id}/commits/{commit_id}/files/{file_id}/content` + +Retrieves the file content as it existed at a specific commit. +Also available at: +- `/api/v1/workspace/{workspace_id}/commits/{commit_id}/files/{file_id}/content` +- `/api/v1/datasets/{dataset_id}/commits/{commit_id}/files/{file_id}/content` + +#### Request + +- Method: GET +- URL: `/api/v1/folders/{folder_id}/commits/{commit_id}/files/{file_id}/content` +- Headers: + - `'Authorization: Bearer '` + +##### Request example + +```bash +curl --request GET \ + --url http://{address}/api/v1/folders/{folder_id}/commits/{commit_id}/files/{file_id}/content \ + --header 'Authorization: Bearer ' +``` + +#### Response + +Success: + +```json +{ + "code": 0, + "data": { + "content": "file content as it existed in that commit" + } +} +``` + +Failure: + +```json +{ + "code": 102, + "message": "File not found in this commit" +} +``` + +--- + +### Get file version history + +**GET** `/api/v1/files/{file_id}/versions` + +Returns the version history for a specific file across all commits. + +#### Request + +- Method: GET +- URL: `/api/v1/files/{file_id}/versions` +- Headers: + - `'Authorization: Bearer '` + +##### Request example + +```bash +curl --request GET \ + --url http://{address}/api/v1/files/{file_id}/versions \ + --header 'Authorization: Bearer ' +``` + +#### Response + +Success: + +```json +{ + "code": 0, + "data": [ + { + "commit_id": "commit_uuid", + "operation": "modify", + "hash": "def456", + "create_time": 1718200000000, + "message": "updated file" + }, + { + "commit_id": "commit_uuid", + "operation": "add", + "hash": "abc123", + "create_time": 1718100000000, + "message": "initial commit" + } + ] +} +``` + +--- + ## SEARCH APP MANAGEMENT ### Create search app diff --git a/internal/dao/database.go b/internal/dao/database.go index 6466a7743b..58977d8bca 100644 --- a/internal/dao/database.go +++ b/internal/dao/database.go @@ -153,6 +153,8 @@ func InitDB() error { &entity.IngestionTaskLog{}, &entity.IngestionTasklet{}, &entity.IngestionTaskletLog{}, + &entity.FileCommit{}, + &entity.FileCommitItem{}, } for _, m := range dataModels { diff --git a/internal/dao/file.go b/internal/dao/file.go index 28f1fe15f7..7576d42e9d 100644 --- a/internal/dao/file.go +++ b/internal/dao/file.go @@ -243,6 +243,20 @@ func (dao *FileDAO) ListAllFilesByParentID(parentID string) ([]*entity.File, err return files, err } +// ListNonFolderByParentID lists non-folder files directly under a parent folder. +func (dao *FileDAO) ListNonFolderByParentID(parentID string) ([]*entity.File, error) { + var files []*entity.File + err := DB.Where("parent_id = ? AND id != ? AND type != ?", parentID, parentID, "folder").Find(&files).Error + return files, err +} + +// ListFolderByParentID lists sub-folders directly under a parent folder. +func (dao *FileDAO) ListFolderByParentID(parentID string) ([]*entity.File, error) { + var files []*entity.File + err := DB.Where("parent_id = ? AND type = ?", parentID, "folder").Find(&files).Error + return files, err +} + // GetByParentIDAndName gets file by parent folder ID and name func (dao *FileDAO) GetByParentIDAndName(parentID, name string) (*entity.File, error) { var file entity.File diff --git a/internal/dao/file_commit.go b/internal/dao/file_commit.go new file mode 100644 index 0000000000..e8ab24122e --- /dev/null +++ b/internal/dao/file_commit.go @@ -0,0 +1,154 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package dao + +import ( + "ragflow/internal/entity" + "strings" + + "github.com/google/uuid" +) + +// FileCommitDAO file commit data access object +type FileCommitDAO struct{} + +// NewFileCommitDAO create file commit DAO +func NewFileCommitDAO() *FileCommitDAO { + return &FileCommitDAO{} +} + +// GetByID gets a file commit by ID +func (dao *FileCommitDAO) GetByID(id string) (*entity.FileCommit, error) { + var commit entity.FileCommit + err := DB.Where("id = ?", id).First(&commit).Error + if err != nil { + return nil, err + } + return &commit, nil +} + +// Create creates a new file commit record +func (dao *FileCommitDAO) Create(commit *entity.FileCommit) error { + return DB.Create(commit).Error +} + +// UpdateTreeState updates the tree_state field for a commit +func (dao *FileCommitDAO) UpdateTreeState(id string, treeState string) error { + return DB.Model(&entity.FileCommit{}).Where("id = ?", id).Update("tree_state", treeState).Error +} + +// GetLatestByFolderID gets the latest (most recent) commit for a folder +func (dao *FileCommitDAO) GetLatestByFolderID(folderID string) (*entity.FileCommit, error) { + var commit entity.FileCommit + err := DB.Where("folder_id = ?", folderID). + Order("create_time DESC"). + First(&commit).Error + if err != nil { + return nil, err + } + return &commit, nil +} + +// allowedFileCommitSorts is the whitelist of safe column names for +// ListByFolderID's orderBy parameter to prevent SQL injection. +var allowedFileCommitSorts = map[string]string{ + "create_time": "create_time", + "create_date": "create_date", + "update_time": "update_time", + "update_date": "update_date", +} + +// ListByFolderID lists commits for a folder with pagination +func (dao *FileCommitDAO) ListByFolderID(folderID string, page, pageSize int, orderBy string, desc bool) ([]*entity.FileCommit, int64, error) { + var commits []*entity.FileCommit + var total int64 + + query := DB.Model(&entity.FileCommit{}).Where("folder_id = ?", folderID) + + if err := query.Count(&total).Error; err != nil { + return nil, 0, err + } + + // Sanitize orderBy against whitelist; fall back to create_time. + safeCol, ok := allowedFileCommitSorts[orderBy] + if !ok { + safeCol = "create_time" + } + + orderDirection := "DESC" + if !desc { + orderDirection = "ASC" + } + + orderClause := safeCol + " " + orderDirection + + if page > 0 && pageSize > 0 { + offset := (page - 1) * pageSize + if err := query.Order(orderClause).Offset(offset).Limit(pageSize).Find(&commits).Error; err != nil { + return nil, 0, err + } + } else { + if err := query.Order(orderClause).Find(&commits).Error; err != nil { + return nil, 0, err + } + } + + return commits, total, nil +} + +// FileCommitItemDAO file commit item data access object +type FileCommitItemDAO struct{} + +// NewFileCommitItemDAO create file commit item DAO +func NewFileCommitItemDAO() *FileCommitItemDAO { + return &FileCommitItemDAO{} +} + +// Create creates a new file commit item record +func (dao *FileCommitItemDAO) Create(item *entity.FileCommitItem) error { + return DB.Create(item).Error +} + +// ListByCommitID lists all items for a commit +func (dao *FileCommitItemDAO) ListByCommitID(commitID string) ([]*entity.FileCommitItem, error) { + var items []*entity.FileCommitItem + err := DB.Where("commit_id = ?", commitID).Order("create_time ASC").Find(&items).Error + return items, err +} + +// ListByFileID lists all commit items for a specific file (for version history) +func (dao *FileCommitItemDAO) ListByFileID(fileID string) ([]*entity.FileCommitItem, error) { + var items []*entity.FileCommitItem + err := DB.Where("file_id = ?", fileID).Order("create_time DESC").Find(&items).Error + return items, err +} + +// GetByCommitIDAndFileID gets a single commit item by commit and file ID +func (dao *FileCommitItemDAO) GetByCommitIDAndFileID(commitID, fileID string) (*entity.FileCommitItem, error) { + var item entity.FileCommitItem + err := DB.Where("commit_id = ? AND file_id = ?", commitID, fileID).First(&item).Error + if err != nil { + return nil, err + } + return &item, nil +} + +// generateCommitUUID generates a UUID for commit/commit_item IDs +func generateCommitUUID() string { + id := uuid.New().String() + return strings.ReplaceAll(id, "-", "") +} diff --git a/internal/entity/file_commit.go b/internal/entity/file_commit.go new file mode 100644 index 0000000000..1ebb344690 --- /dev/null +++ b/internal/entity/file_commit.go @@ -0,0 +1,108 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package entity + +// FileCommit represents a snapshot commit for a workspace folder (like git commit). +type FileCommit struct { + ID string `gorm:"column:id;primaryKey;size:32" json:"id"` + FolderID string `gorm:"column:folder_id;size:32;not null;index" json:"folder_id"` + ParentID *string `gorm:"column:parent_id;size:32;index" json:"parent_id,omitempty"` + Message string `gorm:"column:message;size:512;not null;default:''" json:"message"` + AuthorID string `gorm:"column:author_id;size:32;not null;index" json:"author_id"` + FileCount int `gorm:"column:file_count;default:0" json:"file_count"` + TreeState *string `gorm:"column:tree_state;type:longtext" json:"tree_state,omitempty"` + BaseModel +} + +// TableName returns the table name for FileCommit model. +func (FileCommit) TableName() string { + return "file_commit" +} + +// FileCommitItem represents a single file change within a commit. +type FileCommitItem struct { + ID string `gorm:"column:id;primaryKey;size:32" json:"id"` + CommitID string `gorm:"column:commit_id;size:32;not null;uniqueIndex:idx_commit_file" json:"commit_id"` + FileID string `gorm:"column:file_id;size:32;not null;uniqueIndex:idx_commit_file" json:"file_id"` + Operation string `gorm:"column:operation;size:16;not null;index" json:"operation"` + OldHash *string `gorm:"column:old_hash;size:64;index" json:"old_hash,omitempty"` + NewHash *string `gorm:"column:new_hash;size:64;index" json:"new_hash,omitempty"` + OldLocation *string `gorm:"column:old_location;size:255" json:"old_location,omitempty"` + NewLocation *string `gorm:"column:new_location;size:255" json:"new_location,omitempty"` + OldName *string `gorm:"column:old_name;size:255" json:"old_name,omitempty"` + NewName *string `gorm:"column:new_name;size:255" json:"new_name,omitempty"` + BaseModel +} + +// TableName returns the table name for FileCommitItem model. +func (FileCommitItem) TableName() string { + return "file_commit_item" +} + +// TreeNode represents a file node in the commit tree state snapshot. +type TreeNode struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` // "file" or "folder" + Hash string `json:"hash,omitempty"` + Location string `json:"location,omitempty"` + Size int64 `json:"size,omitempty"` + Status string `json:"status"` // "1" = active, "0" = deleted + Children []*TreeNode `json:"children,omitempty"` +} + +// FileChange represents a file change in a commit request. +type FileChange struct { + FileID string `json:"file_id"` + FileName string `json:"file_name"` + Operation string `json:"operation"` // "add", "modify", "delete", "rename" + Content string `json:"content,omitempty"` + OldName string `json:"old_name,omitempty"` + NewName string `json:"new_name,omitempty"` +} + +// CommitResponse is the API response for a commit. +type CommitResponse struct { + ID string `json:"id"` + FolderID string `json:"folder_id"` + ParentID *string `json:"parent_id,omitempty"` + Message string `json:"message"` + AuthorID string `json:"author_id"` + FileCount int `json:"file_count"` + TreeState *string `json:"tree_state,omitempty"` + CreateTime *int64 `json:"create_time,omitempty"` +} + +// DiffEntry represents a single diff entry between two commits. +type DiffEntry struct { + FileID string `json:"file_id"` + FileName string `json:"file_name"` + Operation string `json:"operation"` + OldHash *string `json:"old_hash,omitempty"` + NewHash *string `json:"new_hash,omitempty"` + OldLocation *string `json:"old_location,omitempty"` + NewLocation *string `json:"new_location,omitempty"` +} + +// VersionEntry represents a single version in a file's version history. +type VersionEntry struct { + CommitID string `json:"commit_id"` + Operation string `json:"operation"` + Hash string `json:"hash"` + CreateTime *int64 `json:"create_time,omitempty"` + Message string `json:"message"` +} diff --git a/internal/handler/file_commit.go b/internal/handler/file_commit.go new file mode 100644 index 0000000000..bca0359fd1 --- /dev/null +++ b/internal/handler/file_commit.go @@ -0,0 +1,584 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package handler + +import ( + "fmt" + "net/http" + "ragflow/internal/common" + "ragflow/internal/dao" + "ragflow/internal/entity" + "strconv" + + "github.com/gin-gonic/gin" +) + +// fileCommitService is the consumer-side interface for FileCommitHandler's service dependency. +type fileCommitService interface { + CreateCommit(folderID, authorID, message string, changes []entity.FileChange) (*entity.FileCommit, error) + ListCommits(folderID string, page, pageSize int, orderBy string, desc bool) ([]*entity.FileCommit, int64, error) + GetCommit(commitID string) (*entity.FileCommit, error) + ListCommitFiles(commitID string) ([]*entity.FileCommitItem, error) + DiffCommits(fromID, toID string) ([]entity.DiffEntry, error) + GetUncommittedChanges(folderID string) ([]entity.DiffEntry, error) + GetCommitTree(commitID string) (map[string]interface{}, error) + GetCommitFileContent(folderID, commitID, fileID string) ([]byte, error) + GetFileVersionHistory(fileID string) ([]entity.VersionEntry, error) +} + +// FileCommitHandler file commit handler +type FileCommitHandler struct { + commitService fileCommitService + kbDAO *dao.KnowledgebaseDAO + fileDAO *dao.FileDAO +} + +// NewFileCommitHandler create file commit handler +func NewFileCommitHandler(commitService fileCommitService) *FileCommitHandler { + return &FileCommitHandler{ + commitService: commitService, + kbDAO: dao.NewKnowledgebaseDAO(), + fileDAO: dao.NewFileDAO(), + } +} + +// ResolveFolderID resolves a resource ID (dataset/memory/skill) to its folder_id. +// entityType is the plural resource name (e.g. "datasets", "memories", "skills"). +func (h *FileCommitHandler) ResolveFolderID(entityType, entityID string) (string, error) { + switch entityType { + case "datasets": + return h.resolveDatasetFolderID(entityID) + default: + return "", fmt.Errorf("unsupported entity type: %s", entityType) + } +} + +// CommitFolderResolver returns a Gin middleware that resolves an entity ID +// (e.g. dataset_id, memory_id, skill_id) to a folder_id and injects it into +// c.Params so that the standard commit handlers can read it via c.Param("folder_id"). +// +// entityType must match the key used in ResolveFolderID (e.g. "datasets"). +// urlParam is the gin URL param name (e.g. "dataset_id"). +func CommitFolderResolver(h *FileCommitHandler, entityType, urlParam string) gin.HandlerFunc { + return func(c *gin.Context) { + id := c.Param(urlParam) + if id == "" { + jsonError(c, common.CodeParamError, fmt.Sprintf("%s is required", urlParam)) + c.Abort() + return + } + folderID, err := h.ResolveFolderID(entityType, id) + if err != nil { + jsonError(c, common.CodeNotFound, fmt.Sprintf("%s folder not found", entityType)) + c.Abort() + return + } + c.Params = append(c.Params, gin.Param{Key: "folder_id", Value: folderID}) + c.Next() + } +} + +func (h *FileCommitHandler) resolveDatasetFolderID(datasetID string) (string, error) { + kb, err := h.kbDAO.GetByID(datasetID) + if err != nil { + return "", err + } + files := h.fileDAO.Query(kb.Name, "") + for _, f := range files { + if f.SourceType == string(entity.FileSourceKnowledgebase) && f.Type == "folder" && f.TenantID == kb.TenantID { + return f.ID, nil + } + } + return "", common.ErrNotFound +} + +// CreateCommitRequest represents the request body for creating a commit +type CreateCommitRequest struct { + Message string `json:"message" binding:"required"` + Files []entity.FileChange `json:"files" binding:"required"` +} + +// CreateCommit creates a new commit for a workspace folder +// @Summary Create Commit +// @Description Create a new commit with file changes for a workspace folder +// @Tags file_commit +// @Accept json +// @Produce json +// @Param folder_id path string true "workspace folder ID" +// @Param body body CreateCommitRequest true "commit request" +// @Success 200 {object} map[string]interface{} +// @Router /api/v1/workspaces/{folder_id}/commits [post] +func (h *FileCommitHandler) CreateCommit(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + folderID := c.Param("folder_id") + if folderID == "" { + jsonError(c, common.CodeParamError, "folder_id is required") + return + } + + var req CreateCommitRequest + if err := c.ShouldBindJSON(&req); err != nil { + jsonError(c, common.CodeBadRequest, err.Error()) + return + } + + commit, err := h.commitService.CreateCommit(folderID, user.ID, req.Message, req.Files) + if err != nil { + jsonInternalError(c, err) + return + } + + ct := int64(0) + if commit.CreateTime != nil { + ct = *commit.CreateTime + } + + c.JSON(http.StatusOK, gin.H{ + "code": common.CodeSuccess, + "data": entity.CommitResponse{ + ID: commit.ID, + FolderID: commit.FolderID, + ParentID: commit.ParentID, + Message: commit.Message, + AuthorID: commit.AuthorID, + FileCount: commit.FileCount, + TreeState: commit.TreeState, + CreateTime: &ct, + }, + "message": common.CodeSuccess.Message(), + }) +} + +// ListCommits lists commits for a workspace folder +// @Summary List Commits +// @Description List all commits for a workspace folder with pagination +// @Tags file_commit +// @Accept json +// @Produce json +// @Param folder_id path string true "workspace folder ID" +// @Param page query int false "page number" +// @Param page_size query int false "items per page" +// @Param order_by query string false "order by field" +// @Param desc query bool false "descending order" +// @Success 200 {object} map[string]interface{} +// @Router /api/v1/workspaces/{folder_id}/commits [get] +func (h *FileCommitHandler) ListCommits(c *gin.Context) { + _, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + folderID := c.Param("folder_id") + if folderID == "" { + jsonError(c, common.CodeParamError, "folder_id is required") + return + } + + page := 1 + if pageStr := c.Query("page"); pageStr != "" { + if p, err := strconv.Atoi(pageStr); err == nil && p >= 1 { + page = p + } + } + + pageSize := 15 + if psStr := c.Query("page_size"); psStr != "" { + if ps, err := strconv.Atoi(psStr); err == nil { + if ps < 1 { + ps = 15 + } + pageSize = ps + } + } + + orderBy := c.DefaultQuery("order_by", "create_time") + desc := true + if descStr := c.Query("desc"); descStr != "" { + desc = descStr != "false" + } + + commits, total, err := h.commitService.ListCommits(folderID, page, pageSize, orderBy, desc) + if err != nil { + jsonInternalError(c, err) + return + } + + var commitList []entity.CommitResponse + for _, commit := range commits { + var ct int64 + if commit.CreateTime != nil { + ct = *commit.CreateTime + } + commitList = append(commitList, entity.CommitResponse{ + ID: commit.ID, + FolderID: commit.FolderID, + ParentID: commit.ParentID, + Message: commit.Message, + AuthorID: commit.AuthorID, + FileCount: commit.FileCount, + CreateTime: &ct, + }) + } + + c.JSON(http.StatusOK, gin.H{ + "code": common.CodeSuccess, + "data": gin.H{ + "total": total, + "page": page, + "page_size": pageSize, + "commits": commitList, + }, + "message": common.CodeSuccess.Message(), + }) +} + +// GetCommit gets details of a single commit +// @Summary Get Commit +// @Description Get details of a single commit including file changes +// @Tags file_commit +// @Accept json +// @Produce json +// @Param folder_id path string true "workspace folder ID" +// @Param commit_id path string true "commit ID" +// @Success 200 {object} map[string]interface{} +// @Router /api/v1/workspaces/{folder_id}/commits/{commit_id} [get] +func (h *FileCommitHandler) GetCommit(c *gin.Context) { + _, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + folderID := c.Param("folder_id") + commitID := c.Param("commit_id") + if commitID == "" { + jsonError(c, common.CodeParamError, "commit_id is required") + return + } + + commit, err := h.commitService.GetCommit(commitID) + if err != nil { + jsonError(c, common.CodeNotFound, "Commit not found") + return + } + + if commit.FolderID != folderID { + jsonError(c, common.CodeNotFound, "Commit not found in workspace") + return + } + + items, err := h.commitService.ListCommitFiles(commitID) + if err != nil { + items = []*entity.FileCommitItem{} + } + + var ct int64 + if commit.CreateTime != nil { + ct = *commit.CreateTime + } + + c.JSON(http.StatusOK, gin.H{ + "code": common.CodeSuccess, + "data": gin.H{ + "id": commit.ID, + "folder_id": commit.FolderID, + "parent_id": commit.ParentID, + "message": commit.Message, + "author_id": commit.AuthorID, + "file_count": commit.FileCount, + "create_time": ct, + "files": items, + }, + "message": common.CodeSuccess.Message(), + }) +} + +// ListCommitFiles lists all file changes in a commit +// @Summary List Commit Files +// @Description List all file changes in a given commit +// @Tags file_commit +// @Accept json +// @Produce json +// @Param folder_id path string true "workspace folder ID" +// @Param commit_id path string true "commit ID" +// @Success 200 {object} map[string]interface{} +// @Router /api/v1/workspaces/{folder_id}/commits/{commit_id}/files [get] +func (h *FileCommitHandler) ListCommitFiles(c *gin.Context) { + _, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + folderID := c.Param("folder_id") + commitID := c.Param("commit_id") + if commitID == "" { + jsonError(c, common.CodeParamError, "commit_id is required") + return + } + + commit, err := h.commitService.GetCommit(commitID) + if err != nil { + jsonError(c, common.CodeNotFound, "Commit not found") + return + } + if commit.FolderID != folderID { + jsonError(c, common.CodeNotFound, "Commit not found in workspace") + return + } + + items, err := h.commitService.ListCommitFiles(commitID) + if err != nil { + jsonInternalError(c, err) + return + } + + c.JSON(http.StatusOK, gin.H{ + "code": common.CodeSuccess, + "data": items, + "message": common.CodeSuccess.Message(), + }) +} + +// DiffCommits compares two commits +// @Summary Diff Commits +// @Description Compare two commits and return the diff +// @Tags file_commit +// @Accept json +// @Produce json +// @Param folder_id path string true "workspace folder ID" +// @Param from query string true "from commit ID" +// @Param to query string true "to commit ID" +// @Success 200 {object} map[string]interface{} +// @Router /api/v1/workspaces/{folder_id}/commits/diff [get] +func (h *FileCommitHandler) DiffCommits(c *gin.Context) { + _, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + folderID := c.Param("folder_id") + fromID := c.Query("from") + toID := c.Query("to") + if fromID == "" || toID == "" { + jsonError(c, common.CodeParamError, "'from' and 'to' query parameters are required") + return + } + + fromCommit, err := h.commitService.GetCommit(fromID) + if err != nil { + jsonError(c, common.CodeNotFound, "Commit not found") + return + } + toCommit, err := h.commitService.GetCommit(toID) + if err != nil { + jsonError(c, common.CodeNotFound, "Commit not found") + return + } + if fromCommit.FolderID != folderID || toCommit.FolderID != folderID { + jsonError(c, common.CodeNotFound, "Commit not found in workspace") + return + } + + diff, err := h.commitService.DiffCommits(fromID, toID) + if err != nil { + jsonInternalError(c, err) + return + } + + c.JSON(http.StatusOK, gin.H{ + "code": common.CodeSuccess, + "data": diff, + "message": common.CodeSuccess.Message(), + }) +} + +// GetUncommittedChanges gets uncommitted changes +// @Summary Get Uncommitted Changes +// @Description Get uncommitted changes for a workspace folder (like git status) +// @Tags file_commit +// @Accept json +// @Produce json +// @Param folder_id path string true "workspace folder ID" +// @Success 200 {object} map[string]interface{} +// @Router /api/v1/workspaces/{folder_id}/changes [get] +func (h *FileCommitHandler) GetUncommittedChanges(c *gin.Context) { + _, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + folderID := c.Param("folder_id") + if folderID == "" { + jsonError(c, common.CodeParamError, "folder_id is required") + return + } + + changes, err := h.commitService.GetUncommittedChanges(folderID) + if err != nil { + jsonInternalError(c, err) + return + } + + c.JSON(http.StatusOK, gin.H{ + "code": common.CodeSuccess, + "data": changes, + "message": common.CodeSuccess.Message(), + }) +} + +// GetCommitTree gets the folder tree snapshot for a commit +// @Summary Get Commit Tree +// @Description Get the folder tree snapshot for a given commit +// @Tags file_commit +// @Accept json +// @Produce json +// @Param folder_id path string true "workspace folder ID" +// @Param commit_id path string true "commit ID" +// @Success 200 {object} map[string]interface{} +// @Router /api/v1/workspaces/{folder_id}/commits/{commit_id}/tree [get] +func (h *FileCommitHandler) GetCommitTree(c *gin.Context) { + _, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + folderID := c.Param("folder_id") + commitID := c.Param("commit_id") + if commitID == "" { + jsonError(c, common.CodeParamError, "commit_id is required") + return + } + + commit, err := h.commitService.GetCommit(commitID) + if err != nil { + jsonError(c, common.CodeNotFound, "Commit not found") + return + } + if commit.FolderID != folderID { + jsonError(c, common.CodeNotFound, "Commit not found in workspace") + return + } + + tree, err := h.commitService.GetCommitTree(commitID) + if err != nil { + jsonInternalError(c, err) + return + } + + c.JSON(http.StatusOK, gin.H{ + "code": common.CodeSuccess, + "data": tree, + "message": common.CodeSuccess.Message(), + }) +} + +// GetCommitFileContent gets file content as it existed in a given commit +// @Summary Get Commit File Content +// @Description Get file content as it existed in a specific commit +// @Tags file_commit +// @Accept json +// @Produce json +// @Param folder_id path string true "workspace folder ID" +// @Param commit_id path string true "commit ID" +// @Param file_id path string true "file ID" +// @Success 200 {object} map[string]interface{} +// @Router /api/v1/workspaces/{folder_id}/commits/{commit_id}/files/{file_id}/content [get] +func (h *FileCommitHandler) GetCommitFileContent(c *gin.Context) { + _, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + folderID := c.Param("folder_id") + commitID := c.Param("commit_id") + fileID := c.Param("file_id") + + if folderID == "" || commitID == "" || fileID == "" { + jsonError(c, common.CodeParamError, "folder_id, commit_id, and file_id are required") + return + } + + commit, err := h.commitService.GetCommit(commitID) + if err != nil { + jsonError(c, common.CodeNotFound, "Commit not found") + return + } + if commit.FolderID != folderID { + jsonError(c, common.CodeNotFound, "Commit not found in workspace") + return + } + + content, err := h.commitService.GetCommitFileContent(folderID, commitID, fileID) + if err != nil { + jsonError(c, common.CodeNotFound, err.Error()) + return + } + + c.JSON(http.StatusOK, gin.H{ + "code": common.CodeSuccess, + "data": gin.H{ + "content": string(content), + }, + "message": common.CodeSuccess.Message(), + }) +} + +// GetFileVersionHistory gets version history for a specific file +// @Summary Get File Version History +// @Description Get version history for a specific file across all commits +// @Tags file_commit +// @Accept json +// @Produce json +// @Param file_id path string true "file ID" +// @Success 200 {object} map[string]interface{} +// @Router /api/v1/files/{file_id}/versions [get] +func (h *FileCommitHandler) GetFileVersionHistory(c *gin.Context) { + _, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + fileID := c.Param("id") + if fileID == "" { + jsonError(c, common.CodeParamError, "file_id is required") + return + } + + versions, err := h.commitService.GetFileVersionHistory(fileID) + if err != nil { + jsonInternalError(c, err) + return + } + + c.JSON(http.StatusOK, gin.H{ + "code": common.CodeSuccess, + "data": versions, + "message": common.CodeSuccess.Message(), + }) +} diff --git a/internal/handler/file_commit_test.go b/internal/handler/file_commit_test.go new file mode 100644 index 0000000000..ba20988739 --- /dev/null +++ b/internal/handler/file_commit_test.go @@ -0,0 +1,413 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package handler + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "ragflow/internal/common" + "ragflow/internal/entity" + + "github.com/gin-gonic/gin" +) + +// mockFileCommitSvc implements FileCommitServiceInterface for testing +type mockFileCommitSvc struct { + createCommitFn func(folderID, authorID, message string, changes []entity.FileChange) (*entity.FileCommit, error) + listCommitsFn func(folderID string, page, pageSize int, orderBy string, desc bool) ([]*entity.FileCommit, int64, error) + getCommitFn func(commitID string) (*entity.FileCommit, error) + listCommitFilesFn func(commitID string) ([]*entity.FileCommitItem, error) + diffCommitsFn func(fromID, toID string) ([]entity.DiffEntry, error) + getUncommittedChangesFn func(folderID string) ([]entity.DiffEntry, error) + getCommitTreeFn func(commitID string) (map[string]interface{}, error) + getCommitFileContentFn func(folderID, commitID, fileID string) ([]byte, error) + getFileVersionHistoryFn func(fileID string) ([]entity.VersionEntry, error) +} + +func (m *mockFileCommitSvc) CreateCommit(folderID, authorID, message string, changes []entity.FileChange) (*entity.FileCommit, error) { + if m.createCommitFn != nil { + return m.createCommitFn(folderID, authorID, message, changes) + } + return &entity.FileCommit{ + ID: "commit-1", + FolderID: folderID, + Message: message, + AuthorID: authorID, + FileCount: len(changes), + }, nil +} + +func (m *mockFileCommitSvc) ListCommits(folderID string, page, pageSize int, orderBy string, desc bool) ([]*entity.FileCommit, int64, error) { + if m.listCommitsFn != nil { + return m.listCommitsFn(folderID, page, pageSize, orderBy, desc) + } + now := int64(1718200000000) + return []*entity.FileCommit{ + {ID: "c2", FolderID: folderID, Message: "second", AuthorID: "u1", FileCount: 1, BaseModel: entity.BaseModel{CreateTime: &now}}, + {ID: "c1", FolderID: folderID, Message: "first", AuthorID: "u1", FileCount: 2, BaseModel: entity.BaseModel{CreateTime: &now}}, + }, 2, nil +} + +func (m *mockFileCommitSvc) GetCommit(commitID string) (*entity.FileCommit, error) { + if m.getCommitFn != nil { + return m.getCommitFn(commitID) + } + return &entity.FileCommit{ID: commitID, FolderID: "folder-1", Message: "test commit", AuthorID: "u1", FileCount: 1}, nil +} + +func (m *mockFileCommitSvc) ListCommitFiles(commitID string) ([]*entity.FileCommitItem, error) { + if m.listCommitFilesFn != nil { + return m.listCommitFilesFn(commitID) + } + return []*entity.FileCommitItem{ + {ID: "i1", CommitID: commitID, FileID: "f1", Operation: "add"}, + }, nil +} + +func (m *mockFileCommitSvc) DiffCommits(fromID, toID string) ([]entity.DiffEntry, error) { + if m.diffCommitsFn != nil { + return m.diffCommitsFn(fromID, toID) + } + return []entity.DiffEntry{ + {FileID: "f1", FileName: "file.txt", Operation: "modify"}, + }, nil +} + +func (m *mockFileCommitSvc) GetUncommittedChanges(folderID string) ([]entity.DiffEntry, error) { + if m.getUncommittedChangesFn != nil { + return m.getUncommittedChangesFn(folderID) + } + return []entity.DiffEntry{ + {FileID: "f1", FileName: "new.txt", Operation: "add"}, + }, nil +} + +func (m *mockFileCommitSvc) GetCommitTree(commitID string) (map[string]interface{}, error) { + if m.getCommitTreeFn != nil { + return m.getCommitTreeFn(commitID) + } + return map[string]interface{}{ + "f1": map[string]interface{}{"name": "file.txt", "hash": "abc123", "size": 100, "status": "1"}, + }, nil +} + +func (m *mockFileCommitSvc) GetCommitFileContent(folderID, commitID, fileID string) ([]byte, error) { + if m.getCommitFileContentFn != nil { + return m.getCommitFileContentFn(folderID, commitID, fileID) + } + return []byte("file content"), nil +} + +func (m *mockFileCommitSvc) GetFileVersionHistory(fileID string) ([]entity.VersionEntry, error) { + if m.getFileVersionHistoryFn != nil { + return m.getFileVersionHistoryFn(fileID) + } + now := int64(1718200000000) + return []entity.VersionEntry{ + {CommitID: "c2", Operation: "modify", Hash: "def456", CreateTime: &now, Message: "updated"}, + {CommitID: "c1", Operation: "add", Hash: "abc123", CreateTime: &now, Message: "initial"}, + }, nil +} + +func setupFileCommitTest(userID string) (*gin.Engine, *mockFileCommitSvc) { + mock := &mockFileCommitSvc{} + h := &FileCommitHandler{commitService: mock} + gin.SetMode(gin.TestMode) + r := gin.New() + r.Use(func(c *gin.Context) { + c.Set("user", &entity.User{ID: userID}) + }) + r.POST("/api/v1/folders/:folder_id/commits", h.CreateCommit) + r.GET("/api/v1/folders/:folder_id/commits", h.ListCommits) + r.GET("/api/v1/folders/:folder_id/commits/:commit_id", h.GetCommit) + r.GET("/api/v1/folders/:folder_id/commits/:commit_id/files", h.ListCommitFiles) + r.GET("/api/v1/folders/:folder_id/commits/diff", h.DiffCommits) + r.GET("/api/v1/folders/:folder_id/changes", h.GetUncommittedChanges) + r.GET("/api/v1/folders/:folder_id/commits/:commit_id/tree", h.GetCommitTree) + r.GET("/api/v1/folders/:folder_id/commits/:commit_id/files/:file_id/content", h.GetCommitFileContent) + r.GET("/api/v1/files/:id/versions", h.GetFileVersionHistory) + return r, mock +} + +func setupFileCommitTestNoAuth() *gin.Engine { + h := &FileCommitHandler{} + gin.SetMode(gin.TestMode) + r := gin.New() + r.POST("/api/v1/folders/:folder_id/commits", h.CreateCommit) + return r +} + +// ── Tests ──────────────────────────────────────────────────────────────── + +func TestFileCommit_CreateCommit_Success(t *testing.T) { + r, mock := setupFileCommitTest("user-1") + mock.createCommitFn = func(folderID, authorID, message string, changes []entity.FileChange) (*entity.FileCommit, error) { + if folderID != "folder-1" { + t.Errorf("expected folder-1, got %s", folderID) + } + if authorID != "user-1" { + t.Errorf("expected user-1, got %s", authorID) + } + if message != "initial commit" { + t.Errorf("expected 'initial commit', got %s", message) + } + if len(changes) != 1 || changes[0].FileID != "f1" { + t.Errorf("unexpected changes: %+v", changes) + } + now := int64(1718200000000) + return &entity.FileCommit{ + ID: "commit-1", FolderID: folderID, Message: message, + AuthorID: authorID, FileCount: len(changes), + BaseModel: entity.BaseModel{CreateTime: &now}, + }, nil + } + + body := `{"message": "initial commit", "files": [{"file_id": "f1", "file_name": "test.txt", "operation": "add", "content": "hello"}]}` + w := httptest.NewRecorder() + req, _ := http.NewRequest("POST", "/api/v1/folders/folder-1/commits", strings.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + r.ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String()) + } + var resp map[string]interface{} + if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil { + t.Fatal(err) + } + if resp["code"] != float64(common.CodeSuccess) { + t.Errorf("expected code 0, got %v", resp["code"]) + } + data, ok := resp["data"].(map[string]interface{}) + if !ok { + t.Fatalf("expected data to be object, got %T", resp["data"]) + } + if data["message"] != "initial commit" { + t.Errorf("expected 'initial commit', got %v", data["message"]) + } +} + +func TestFileCommit_CreateCommit_NoAuth(t *testing.T) { + r := setupFileCommitTestNoAuth() + body := `{"message": "test", "files": [{"file_id": "f1", "file_name": "t.txt", "operation": "add"}]}` + w := httptest.NewRecorder() + req, _ := http.NewRequest("POST", "/api/v1/folders/folder-1/commits", strings.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + r.ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", w.Code) + } + var resp map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &resp) + // No auth middleware → code 401 + if code, _ := resp["code"].(float64); code != float64(common.CodeUnauthorized) { + t.Errorf("expected unauthorized, got code %v", code) + } +} + +func TestFileCommit_CreateCommit_InvalidJSON(t *testing.T) { + r, _ := setupFileCommitTest("user-1") + body := `{invalid json` + w := httptest.NewRecorder() + req, _ := http.NewRequest("POST", "/api/v1/folders/folder-1/commits", strings.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + r.ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", w.Code) + } + var resp map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &resp) + if code, _ := resp["code"].(float64); code != float64(common.CodeBadRequest) { + t.Errorf("expected bad request, got code %v", code) + } +} + +func TestFileCommit_ListCommits_Success(t *testing.T) { + r, mock := setupFileCommitTest("user-1") + mock.listCommitsFn = func(folderID string, page, pageSize int, orderBy string, desc bool) ([]*entity.FileCommit, int64, error) { + if folderID != "folder-1" { + t.Errorf("expected folder-1, got %s", folderID) + } + return []*entity.FileCommit{ + {ID: "c2", FolderID: folderID, Message: "second", AuthorID: "u1", FileCount: 1}, + {ID: "c1", FolderID: folderID, Message: "first", AuthorID: "u1", FileCount: 2}, + }, 2, nil + } + + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/commits?page=1&page_size=10", nil) + r.ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String()) + } + var resp map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &resp) + if resp["code"] != float64(common.CodeSuccess) { + t.Errorf("expected code 0, got %v", resp["code"]) + } + data, _ := resp["data"].(map[string]interface{}) + if total, _ := data["total"].(float64); total != 2 { + t.Errorf("expected total 2, got %v", total) + } +} + +func TestFileCommit_GetCommit_Success(t *testing.T) { + r, _ := setupFileCommitTest("user-1") + + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/commits/commit-1", nil) + r.ServeHTTP(w, req) + + var resp map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &resp) + if resp["code"] != float64(common.CodeSuccess) { + t.Errorf("expected success, got code %v: %s", resp["code"], resp["message"]) + } + data, _ := resp["data"].(map[string]interface{}) + if data["id"] != "commit-1" { + t.Errorf("expected commit-1, got %v", data["id"]) + } +} + +func TestFileCommit_GetCommit_NotFound(t *testing.T) { + r, mock := setupFileCommitTest("user-1") + mock.getCommitFn = func(commitID string) (*entity.FileCommit, error) { + return nil, common.ErrNotFound + } + + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/commits/missing", nil) + r.ServeHTTP(w, req) + + var resp map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &resp) + if code, _ := resp["code"].(float64); code != float64(common.CodeNotFound) { + t.Errorf("expected 404, got code %v", code) + } +} + +func TestFileCommit_ListCommitFiles_Success(t *testing.T) { + r, _ := setupFileCommitTest("user-1") + + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/commits/commit-1/files", nil) + r.ServeHTTP(w, req) + + var resp map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &resp) + if resp["code"] != float64(common.CodeSuccess) { + t.Errorf("expected success, got code %v", resp["code"]) + } +} + +func TestFileCommit_DiffCommits_Success(t *testing.T) { + r, _ := setupFileCommitTest("user-1") + + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/commits/diff?from=c1&to=c2", nil) + r.ServeHTTP(w, req) + + var resp map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &resp) + if resp["code"] != float64(common.CodeSuccess) { + t.Errorf("expected success, got code %v", resp["code"]) + } +} + +func TestFileCommit_DiffCommits_MissingParams(t *testing.T) { + r, _ := setupFileCommitTest("user-1") + + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/commits/diff", nil) + r.ServeHTTP(w, req) + + var resp map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &resp) + if code, _ := resp["code"].(float64); code != float64(common.CodeParamError) { + t.Errorf("expected param error, got code %v", code) + } +} + +func TestFileCommit_GetUncommittedChanges_Success(t *testing.T) { + r, _ := setupFileCommitTest("user-1") + + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/changes", nil) + r.ServeHTTP(w, req) + + var resp map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &resp) + if resp["code"] != float64(common.CodeSuccess) { + t.Errorf("expected success, got code %v", resp["code"]) + } +} + +func TestFileCommit_GetCommitTree_Success(t *testing.T) { + r, _ := setupFileCommitTest("user-1") + + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/commits/commit-1/tree", nil) + r.ServeHTTP(w, req) + + var resp map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &resp) + if resp["code"] != float64(common.CodeSuccess) { + t.Errorf("expected success, got code %v", resp["code"]) + } +} + +func TestFileCommit_GetCommitFileContent_Success(t *testing.T) { + r, mock := setupFileCommitTest("user-1") + mock.getCommitFileContentFn = func(folderID, commitID, fileID string) ([]byte, error) { + return []byte("hello world"), nil + } + + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/api/v1/folders/folder-1/commits/commit-1/files/f1/content", nil) + r.ServeHTTP(w, req) + + var resp map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &resp) + if resp["code"] != float64(common.CodeSuccess) { + t.Errorf("expected success, got code %v", resp["code"]) + } + data, _ := resp["data"].(map[string]interface{}) + if content, _ := data["content"].(string); content != "hello world" { + t.Errorf("expected 'hello world', got %q", content) + } +} + +func TestFileCommit_GetFileVersionHistory_Success(t *testing.T) { + r, _ := setupFileCommitTest("user-1") + + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/api/v1/files/f1/versions", nil) + r.ServeHTTP(w, req) + + var resp map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &resp) + if resp["code"] != float64(common.CodeSuccess) { + t.Errorf("expected success, got code %v", resp["code"]) + } +} diff --git a/internal/router/router.go b/internal/router/router.go index d587edb4a1..70f35dd65c 100644 --- a/internal/router/router.go +++ b/internal/router/router.go @@ -46,6 +46,7 @@ type Router struct { difyRetrievalHandler *handler.DifyRetrievalHandler pluginHandler *handler.PluginHandler modelHandler *handler.ModelHandler + fileCommitHandler *handler.FileCommitHandler adminRuntimeHandler *handler.AdminRuntimeHandler } @@ -74,6 +75,7 @@ func NewRouter( difyRetrievalHandler *handler.DifyRetrievalHandler, pluginHandler *handler.PluginHandler, modelHandler *handler.ModelHandler, + fileCommitHandler *handler.FileCommitHandler, adminRuntimeHandler *handler.AdminRuntimeHandler, ) *Router { return &Router{ @@ -100,6 +102,7 @@ func NewRouter( difyRetrievalHandler: difyRetrievalHandler, pluginHandler: pluginHandler, modelHandler: modelHandler, + fileCommitHandler: fileCommitHandler, adminRuntimeHandler: adminRuntimeHandler, } } @@ -304,8 +307,51 @@ func (r *Router) Setup(engine *gin.Engine) { file.GET("/:id/ancestors", r.fileHandler.GetFileAncestors) file.GET("/:id/parent", r.fileHandler.GetParentFolder) file.GET("/:id", r.fileHandler.Download) + file.GET("/:id/versions", r.fileCommitHandler.GetFileVersionHistory) } + // File commit routes — /folders/ takes folder_id directly + commitFolders := v1.Group("/folders") + { + commitFolders.POST("/:folder_id/commits", r.fileCommitHandler.CreateCommit) + commitFolders.GET("/:folder_id/commits", r.fileCommitHandler.ListCommits) + commitFolders.GET("/:folder_id/commits/diff", r.fileCommitHandler.DiffCommits) + commitFolders.GET("/:folder_id/commits/:commit_id", r.fileCommitHandler.GetCommit) + commitFolders.GET("/:folder_id/commits/:commit_id/files", r.fileCommitHandler.ListCommitFiles) + commitFolders.GET("/:folder_id/commits/:commit_id/tree", r.fileCommitHandler.GetCommitTree) + commitFolders.GET("/:folder_id/commits/:commit_id/files/:file_id/content", r.fileCommitHandler.GetCommitFileContent) + commitFolders.GET("/:folder_id/changes", r.fileCommitHandler.GetUncommittedChanges) + } + + // /workspace/{workspace_id}/commits — alias for /folders/ (workspace_id == folder_id) + commitWorkspace := v1.Group("/workspace") + { + commitWorkspace.POST("/:folder_id/commits", r.fileCommitHandler.CreateCommit) + commitWorkspace.GET("/:folder_id/commits", r.fileCommitHandler.ListCommits) + commitWorkspace.GET("/:folder_id/commits/diff", r.fileCommitHandler.DiffCommits) + commitWorkspace.GET("/:folder_id/commits/:commit_id", r.fileCommitHandler.GetCommit) + commitWorkspace.GET("/:folder_id/commits/:commit_id/files", r.fileCommitHandler.ListCommitFiles) + commitWorkspace.GET("/:folder_id/commits/:commit_id/tree", r.fileCommitHandler.GetCommitTree) + commitWorkspace.GET("/:folder_id/commits/:commit_id/files/:file_id/content", r.fileCommitHandler.GetCommitFileContent) + commitWorkspace.GET("/:folder_id/changes", r.fileCommitHandler.GetUncommittedChanges) + } + + // /datasets/{dataset_id}/commits — resolve dataset_id → folder_id via middleware + commitDatasets := v1.Group("/datasets/:dataset_id") + commitDatasets.Use(handler.CommitFolderResolver(r.fileCommitHandler, "datasets", "dataset_id")) + { + commitDatasets.POST("/commits", r.fileCommitHandler.CreateCommit) + commitDatasets.GET("/commits", r.fileCommitHandler.ListCommits) + commitDatasets.GET("/commits/diff", r.fileCommitHandler.DiffCommits) + commitDatasets.GET("/commits/:commit_id", r.fileCommitHandler.GetCommit) + commitDatasets.GET("/commits/:commit_id/files", r.fileCommitHandler.ListCommitFiles) + commitDatasets.GET("/commits/:commit_id/tree", r.fileCommitHandler.GetCommitTree) + commitDatasets.GET("/commits/:commit_id/files/:file_id/content", r.fileCommitHandler.GetCommitFileContent) + commitDatasets.GET("/changes", r.fileCommitHandler.GetUncommittedChanges) + } + + + // Author routes authors := v1.Group("/authors") { diff --git a/internal/service/file_commit.go b/internal/service/file_commit.go new file mode 100644 index 0000000000..4f38e2bc8a --- /dev/null +++ b/internal/service/file_commit.go @@ -0,0 +1,637 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package service + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "ragflow/internal/common" + "ragflow/internal/dao" + "ragflow/internal/entity" + "ragflow/internal/storage" + "sort" + "strings" + "time" + + "github.com/google/uuid" + "go.uber.org/zap" + "gorm.io/gorm" +) + +// FileCommitService file commit service +type FileCommitService struct { + commitDAO *dao.FileCommitDAO + commitItemDAO *dao.FileCommitItemDAO + fileDAO *dao.FileDAO +} + +// NewFileCommitService create file commit service +func NewFileCommitService() *FileCommitService { + return &FileCommitService{ + commitDAO: dao.NewFileCommitDAO(), + commitItemDAO: dao.NewFileCommitItemDAO(), + fileDAO: dao.NewFileDAO(), + } +} + +// CreateCommit creates a new commit for a workspace folder +func (s *FileCommitService) CreateCommit(folderID, authorID, message string, changes []entity.FileChange) (*entity.FileCommit, error) { + // 1. Get the latest commit for this folder + latestCommit, _ := s.commitDAO.GetLatestByFolderID(folderID) + + // 2. Build tree state from latest commit + treeState := make(map[string]interface{}) + if latestCommit != nil && latestCommit.TreeState != nil { + if err := json.Unmarshal([]byte(*latestCommit.TreeState), &treeState); err != nil { + common.Warn("failed to unmarshal previous tree state", zap.Error(err)) + treeState = make(map[string]interface{}) + } + } + + // 3. Create commit record + commitID := generateCommitUUID() + nowMs := time.Now().UnixMilli() + + commit := &entity.FileCommit{ + ID: commitID, + FolderID: folderID, + Message: message, + AuthorID: authorID, + FileCount: len(changes), + } + + if latestCommit != nil { + parentID := latestCommit.ID + commit.ParentID = &parentID + } + + // All DB operations run inside a single transaction. + var treeStr string + if err := dao.DB.Transaction(func(tx *gorm.DB) error { + // Save commit + if err := tx.Create(commit).Error; err != nil { + return fmt.Errorf("failed to create commit: %w", err) + } + + // Backfill parent_id for existing tree_state entries + for fid, entry := range treeState { + if m, ok := entry.(map[string]interface{}); ok { + if _, has := m["parent_id"]; !has { + var fileRec entity.File + if err := tx.Select("parent_id").Where("id = ?", fid).First(&fileRec).Error; err == nil { + m["parent_id"] = fileRec.ParentID + } + } + } + } + + storageImpl := storage.GetStorageFactory().GetStorage() + + for _, change := range changes { + item := &entity.FileCommitItem{ + ID: generateCommitUUID(), + CommitID: commitID, + FileID: change.FileID, + Operation: change.Operation, + } + + switch change.Operation { + case "add", "modify": + contentBytes := []byte(change.Content) + hash := sha256.Sum256(contentBytes) + hashHex := hex.EncodeToString(hash[:]) + objKey := ".objects/" + hashHex + + if storageImpl != nil { + if err := storageImpl.Put(folderID, objKey, contentBytes); err != nil { + return fmt.Errorf("failed to store object: %w", err) + } + } + + if change.Operation == "modify" { + if oldEntry, ok := treeState[change.FileID]; ok { + if oldMap, ok := oldEntry.(map[string]interface{}); ok { + if oldHash, ok := oldMap["hash"].(string); ok { + item.OldHash = &oldHash + } + if oldLoc, ok := oldMap["location"].(string); ok { + item.OldLocation = &oldLoc + } + } + } + } + + item.NewHash = &hashHex + item.NewLocation = &objKey + + fSize := int64(len(contentBytes)) + if err := tx.Model(&entity.File{}).Where("id = ?", change.FileID).Updates(map[string]interface{}{ + "location": objKey, + "size": fSize, + }).Error; err != nil { + return fmt.Errorf("failed to update file record: %w", err) + } + + // Look up parent_id from the File table + fileParentID := "" + var fileRec entity.File + if err := tx.Select("parent_id").Where("id = ?", change.FileID).First(&fileRec).Error; err == nil { + fileParentID = fileRec.ParentID + } + + treeState[change.FileID] = map[string]interface{}{ + "hash": hashHex, + "location": objKey, + "name": change.FileName, + "size": fSize, + "status": "1", + "parent_id": fileParentID, + } + + case "delete": + if oldEntry, ok := treeState[change.FileID]; ok { + if oldMap, ok := oldEntry.(map[string]interface{}); ok { + if oldHash, ok := oldMap["hash"].(string); ok { + item.OldHash = &oldHash + } + if oldLoc, ok := oldMap["location"].(string); ok { + item.OldLocation = &oldLoc + } + } + } + + if err := tx.Model(&entity.File{}).Where("id = ?", change.FileID).Update("status", "0").Error; err != nil { + return fmt.Errorf("failed to soft-delete file: %w", err) + } + + if entry, ok := treeState[change.FileID]; ok { + if entryMap, ok := entry.(map[string]interface{}); ok { + entryMap["status"] = "0" + } + } + + case "rename": + item.OldName = &change.OldName + item.NewName = &change.NewName + + if err := tx.Model(&entity.File{}).Where("id = ?", change.FileID).Update("name", change.NewName).Error; err != nil { + return fmt.Errorf("failed to rename file: %w", err) + } + + if entry, ok := treeState[change.FileID]; ok { + if entryMap, ok := entry.(map[string]interface{}); ok { + entryMap["name"] = change.NewName + } + } else { + treeState[change.FileID] = map[string]interface{}{ + "name": change.NewName, + "status": "1", + } + } + } + + // Save commit item + if err := tx.Create(item).Error; err != nil { + return fmt.Errorf("failed to create commit item: %w", err) + } + } + + // Serialize and save tree state + treeJSON, err := json.Marshal(treeState) + if err != nil { + return fmt.Errorf("failed to marshal tree state: %w", err) + } + if err := tx.Model(&entity.FileCommit{}).Where("id = ?", commitID).Update("tree_state", string(treeJSON)).Error; err != nil { + return fmt.Errorf("failed to update tree state: %w", err) + } + treeStr = string(treeJSON) + + return nil + }); err != nil { + return nil, err + } + + commit.TreeState = &treeStr + commit.CreateTime = &nowMs + + return commit, nil +} + +// ListCommits lists commits for a workspace folder with pagination +func (s *FileCommitService) ListCommits(folderID string, page, pageSize int, orderBy string, desc bool) ([]*entity.FileCommit, int64, error) { + return s.commitDAO.ListByFolderID(folderID, page, pageSize, orderBy, desc) +} + +// GetCommit gets a single commit by ID +func (s *FileCommitService) GetCommit(commitID string) (*entity.FileCommit, error) { + return s.commitDAO.GetByID(commitID) +} + +// ListCommitFiles lists all file change items for a commit +func (s *FileCommitService) ListCommitFiles(commitID string) ([]*entity.FileCommitItem, error) { + return s.commitItemDAO.ListByCommitID(commitID) +} + +// DiffCommits compares two commits and returns the diff +func (s *FileCommitService) DiffCommits(fromID, toID string) ([]entity.DiffEntry, error) { + fromItems, err := s.commitItemDAO.ListByCommitID(fromID) + if err != nil { + return nil, err + } + toItems, err := s.commitItemDAO.ListByCommitID(toID) + if err != nil { + return nil, err + } + + fromMap := make(map[string]*entity.FileCommitItem) + for _, item := range fromItems { + fromMap[item.FileID] = item + } + toMap := make(map[string]*entity.FileCommitItem) + for _, item := range toItems { + toMap[item.FileID] = item + } + + // Get tree state for file names (use to commit) + toCommit, err := s.commitDAO.GetByID(toID) + treeState := make(map[string]interface{}) + if err == nil && toCommit != nil && toCommit.TreeState != nil { + json.Unmarshal([]byte(*toCommit.TreeState), &treeState) + } + + getFileName := func(fileID string) string { + if entry, ok := treeState[fileID]; ok { + if m, ok := entry.(map[string]interface{}); ok { + if name, ok := m["name"].(string); ok { + return name + } + } + } + return fileID + } + + allFileIDs := make(map[string]bool) + for fid := range fromMap { + allFileIDs[fid] = true + } + for fid := range toMap { + allFileIDs[fid] = true + } + + // Sort for deterministic output + var sortedIDs []string + for fid := range allFileIDs { + sortedIDs = append(sortedIDs, fid) + } + sort.Strings(sortedIDs) + + var diff []entity.DiffEntry + for _, fid := range sortedIDs { + fromItem := fromMap[fid] + toItem := toMap[fid] + + var entry entity.DiffEntry + entry.FileID = fid + entry.FileName = getFileName(fid) + + if fromItem != nil && toItem == nil { + // Deleted + entry.Operation = "delete" + entry.OldHash = fromItem.NewHash + entry.OldLocation = fromItem.NewLocation + } else if fromItem == nil && toItem != nil { + // Added + entry.Operation = "add" + entry.NewHash = toItem.NewHash + entry.NewLocation = toItem.NewLocation + } else { + // Both exist — compare hashes + fromHash := "" + if fromItem.NewHash != nil { + fromHash = *fromItem.NewHash + } + toHash := "" + if toItem.NewHash != nil { + toHash = *toItem.NewHash + } + if fromHash != toHash { + entry.Operation = "modify" + entry.OldHash = fromItem.NewHash + entry.OldLocation = fromItem.NewLocation + entry.NewHash = toItem.NewHash + entry.NewLocation = toItem.NewLocation + } + } + + if entry.Operation != "" { + diff = append(diff, entry) + } + } + + return diff, nil +} + +// GetUncommittedChanges gets uncommitted changes for a workspace folder. +// Recursively scans all sub-folders. +func (s *FileCommitService) GetUncommittedChanges(folderID string) ([]entity.DiffEntry, error) { + // Get latest commit tree state + latest, err := s.commitDAO.GetLatestByFolderID(folderID) + committedFiles := make(map[string]map[string]interface{}) + if err == nil && latest != nil && latest.TreeState != nil { + var treeData map[string]interface{} + if jsonErr := json.Unmarshal([]byte(*latest.TreeState), &treeData); jsonErr == nil { + for k, v := range treeData { + if m, ok := v.(map[string]interface{}); ok { + committedFiles[k] = m + } + } + } + } + + // Get all live files recursively under this folder + liveMap := s.collectAllFilesRecursive(folderID) + + var changes []entity.DiffEntry + processed := make(map[string]bool) + + // Check committed files for modifications and deletions + for fid, committedEntry := range committedFiles { + processed[fid] = true + if committedEntry["status"] == "0" { + continue + } + + if liveFile, ok := liveMap[fid]; ok { + liveHash := computeLiveFileHash(folderID, fid, liveFile) + committedHash := "" + if h, ok := committedEntry["hash"].(string); ok { + committedHash = h + } + if liveHash != "" && liveHash != committedHash { + changes = append(changes, entity.DiffEntry{ + FileID: fid, + FileName: liveFile.Name, + Operation: "modify", + }) + } + } else { + name := "" + if n, ok := committedEntry["name"].(string); ok { + name = n + } + changes = append(changes, entity.DiffEntry{ + FileID: fid, + FileName: name, + Operation: "delete", + }) + } + } + + // Check for newly added files + for _, liveFile := range liveMap { + if !processed[liveFile.ID] { + changes = append(changes, entity.DiffEntry{ + FileID: liveFile.ID, + FileName: liveFile.Name, + Operation: "add", + }) + } + } + + return changes, nil +} + +// collectAllFilesRecursive recursively collects all non-folder files under a folder. +func (s *FileCommitService) collectAllFilesRecursive(folderID string) map[string]*entity.File { + result := make(map[string]*entity.File) + // Direct files (non-folder) + files, _ := s.fileDAO.ListNonFolderByParentID(folderID) + for _, f := range files { + result[f.ID] = f + } + // Sub-folders — recurse + subFolders, _ := s.fileDAO.ListFolderByParentID(folderID) + for _, sf := range subFolders { + sub := s.collectAllFilesRecursive(sf.ID) + for k, v := range sub { + result[k] = v + } + } + return result +} + +// GetCommitTree gets the tree state snapshot for a commit as a hierarchical tree. +func (s *FileCommitService) GetCommitTree(commitID string) (map[string]interface{}, error) { + commit, err := s.commitDAO.GetByID(commitID) + if err != nil { + return nil, err + } + if commit.TreeState == nil { + return map[string]interface{}{"id": commit.FolderID, "name": "", "type": "folder", "children": []interface{}{}}, nil + } + var flat map[string]interface{} + if err := json.Unmarshal([]byte(*commit.TreeState), &flat); err != nil { + return nil, err + } + return s.buildHierarchicalTree(flat, commit.FolderID), nil +} + +// buildHierarchicalTree builds a recursive tree from a flat tree_state map. +// Sub-folder hierarchy is resolved from the File table's parent_id. +func (s *FileCommitService) buildHierarchicalTree(flat map[string]interface{}, rootFolderID string) map[string]interface{} { + // Collect all unique folder IDs + folderIDs := map[string]bool{rootFolderID: true} + for _, v := range flat { + if entry, ok := v.(map[string]interface{}); ok { + pid, _ := entry["parent_id"].(string) + if pid == "" { + pid = rootFolderID + } + folderIDs[pid] = true + } + } + + // Build folder parent map from File table + folderParentMap := make(map[string]string) + for fid := range folderIDs { + if fid != rootFolderID { + if f, err := s.fileDAO.GetByID(fid); err == nil { + folderParentMap[fid] = f.ParentID + } + } + } + + // Group file entries by parent_id + filesByParent := make(map[string][]string) + fileEntries := make(map[string]map[string]interface{}) + for fid, v := range flat { + entry, ok := v.(map[string]interface{}) + if !ok { + continue + } + pid, _ := entry["parent_id"].(string) + if pid == "" { + pid = rootFolderID + } + filesByParent[pid] = append(filesByParent[pid], fid) + fileEntries[fid] = entry + } + + // Group sub-folders by their parent + childrenByFolder := make(map[string][]string) + for sfid, ppid := range folderParentMap { + childrenByFolder[ppid] = append(childrenByFolder[ppid], sfid) + } + + var buildNode func(nodeID string) map[string]interface{} + buildNode = func(nodeID string) map[string]interface{} { + nodeName := nodeID + if f, err := s.fileDAO.GetByID(nodeID); err == nil { + nodeName = f.Name + } + node := map[string]interface{}{ + "id": nodeID, + "name": nodeName, + "type": "folder", + "children": []interface{}{}, + } + + // File children + for _, fid := range filesByParent[nodeID] { + entry := fileEntries[fid] + fn := map[string]interface{}{ + "id": fid, + "name": entry["name"], + "type": "file", + "hash": entry["hash"], + "size": entry["size"], + "status": entry["status"], + } + if loc, ok := entry["location"].(string); ok && loc != "" { + fn["location"] = loc + } + node["children"] = append(node["children"].([]interface{}), fn) + } + // Sub-folder children + for _, sfid := range childrenByFolder[nodeID] { + child := buildNode(sfid) + node["children"] = append(node["children"].([]interface{}), child) + } + return node + } + + return buildNode(rootFolderID) +} + +// GetCommitFileContent gets file content as it existed in a given commit +func (s *FileCommitService) GetCommitFileContent(folderID, commitID, fileID string) ([]byte, error) { + _, err := s.commitDAO.GetByID(commitID) + if err != nil { + return nil, fmt.Errorf("commit not found: %w", err) + } + + item, err := s.commitItemDAO.GetByCommitIDAndFileID(commitID, fileID) + if err != nil { + return nil, fmt.Errorf("file not found in commit: %w", err) + } + + if item.NewHash == nil && item.OldHash == nil { + return nil, fmt.Errorf("file has no content in this commit") + } + + hash := "" + if item.NewHash != nil { + hash = *item.NewHash + } else if item.OldHash != nil { + hash = *item.OldHash + } + + objKey := ".objects/" + hash + + storageImpl := storage.GetStorageFactory().GetStorage() + if storageImpl == nil { + return nil, fmt.Errorf("storage not initialized") + } + + blob, err := storageImpl.Get(folderID, objKey) + if err != nil { + return nil, fmt.Errorf("failed to read file content from storage: %w", err) + } + + return blob, nil +} + +// GetFileVersionHistory gets version history for a specific file +func (s *FileCommitService) GetFileVersionHistory(fileID string) ([]entity.VersionEntry, error) { + items, err := s.commitItemDAO.ListByFileID(fileID) + if err != nil { + return nil, err + } + + var versions []entity.VersionEntry + for _, item := range items { + commit, err := s.commitDAO.GetByID(item.CommitID) + if err != nil { + continue + } + + h := "" + if item.NewHash != nil { + h = *item.NewHash + } else if item.OldHash != nil { + h = *item.OldHash + } + + versions = append(versions, entity.VersionEntry{ + CommitID: item.CommitID, + Operation: item.Operation, + Hash: h, + CreateTime: commit.CreateTime, + Message: commit.Message, + }) + } + + return versions, nil +} + +// computeLiveFileHash computes the SHA256 hash of current file content from storage +func computeLiveFileHash(folderID, fileID string, file *entity.File) string { + if file.Location == nil || *file.Location == "" { + return "" + } + + storageImpl := storage.GetStorageFactory().GetStorage() + if storageImpl == nil { + return "" + } + + data, err := storageImpl.Get(folderID, *file.Location) + if err != nil { + return "" + } + + hash := sha256.Sum256(data) + return hex.EncodeToString(hash[:]) +} + +// generateCommitUUID generates a UUID without dashes +func generateCommitUUID() string { + id := uuid.New().String() + return strings.ReplaceAll(id, "-", "") +} diff --git a/test/testcases/restful_api/test_file_commit_routes_unit.py b/test/testcases/restful_api/test_file_commit_routes_unit.py new file mode 100644 index 0000000000..1dce6eaf24 --- /dev/null +++ b/test/testcases/restful_api/test_file_commit_routes_unit.py @@ -0,0 +1,718 @@ +# +# Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""API-level integration test for file commit endpoints. + +Uses an in-memory SQLite database so the real FileCommitService and +FileCommit/FileCommitItem models execute against real SQL — only the +HTTP layer (quart.request, login_required, current_user) and storage +are mocked. +""" + +import asyncio +import functools +import importlib.util +import logging +import sys +from pathlib import Path +from types import ModuleType, SimpleNamespace + +import pytest +from peewee import SqliteDatabase, Model, CharField, IntegerField, BigIntegerField, TextField + +LOGGER = logging.getLogger(__name__) + + +class _DummyManager: + def route(self, *_args, **_kwargs): + def decorator(func): + return func + return decorator + + +def _run(coro): + return asyncio.run(coro) + + +# Shared mutable payload used by both get_request_json (stub) and +# _setup_request so validate_request's closure always sees the current value. +_request_payload: list = [{}] + + +# ── SQLite in-memory models ─────────────────────────────────────────────── +# We create minimal Peewee models that match the real table schemas so +# FileCommitService (which uses DB.atomic(), .select(), .where(), etc.) +# works against real SQL. + +sqlite_db = SqliteDatabase(':memory:') + + +class BaseTestModel(Model): + class Meta: + database = sqlite_db + + +class FileCommitTestModel(BaseTestModel): + id = CharField(max_length=32, primary_key=True) + folder_id = CharField(max_length=32, index=True) + parent_id = CharField(max_length=32, null=True, index=True) + message = CharField(max_length=512, default="") + author_id = CharField(max_length=32, index=True) + file_count = IntegerField(default=0) + tree_state = TextField(null=True) + create_time = BigIntegerField(null=True, index=True) + create_date = CharField(null=True, max_length=32, index=True) + update_time = BigIntegerField(null=True, index=True) + update_date = CharField(null=True, max_length=32, index=True) + + class Meta: + db_table = "file_commit" + + +class FileCommitItemTestModel(BaseTestModel): + id = CharField(max_length=32, primary_key=True) + commit_id = CharField(max_length=32, index=True) + file_id = CharField(max_length=32, index=True) + operation = CharField(max_length=16, index=True) + old_hash = CharField(max_length=64, null=True, index=True) + new_hash = CharField(max_length=64, null=True, index=True) + old_location = CharField(max_length=255, null=True) + new_location = CharField(max_length=255, null=True) + old_name = CharField(max_length=255, null=True) + new_name = CharField(max_length=255, null=True) + create_time = BigIntegerField(null=True, index=True) + create_date = CharField(null=True, max_length=32, index=True) + update_time = BigIntegerField(null=True, index=True) + update_date = CharField(null=True, max_length=32, index=True) + + class Meta: + db_table = "file_commit_item" + + +class FileTestModel(BaseTestModel): + id = CharField(max_length=32, primary_key=True) + parent_id = CharField(max_length=32, index=True) + tenant_id = CharField(max_length=32, index=True) + created_by = CharField(max_length=32, index=True) + name = CharField(max_length=255, index=True) + location = CharField(max_length=255, null=True, index=True) + size = BigIntegerField(default=0, index=True) + type = CharField(max_length=32, index=True) + source_type = CharField(max_length=128, default="", index=True) + status = CharField(max_length=1, null=True, default="1", index=True) + create_time = BigIntegerField(null=True, index=True) + create_date = CharField(null=True, max_length=32, index=True) + update_time = BigIntegerField(null=True, index=True) + update_date = CharField(null=True, max_length=32, index=True) + + class Meta: + db_table = "file" + + +_TABLES = [FileCommitTestModel, FileCommitItemTestModel, FileTestModel] +sqlite_db.create_tables(_TABLES) + + +def _clear_db(): + """Delete all rows from every test table so each test starts clean.""" + for model in _TABLES: + model.delete().execute() + + +# ── Module loader ───────────────────────────────────────────────────────── + +def _load_module(monkeypatch): + """Load file_commit_api.py with SQLite in-memory DB and mocked HTTP layer.""" + repo_root = Path(__file__).resolve().parents[3] + + # Stub: quart.request + quart_mod = ModuleType("quart") + quart_mod.request = SimpleNamespace(args={}, content_type="application/json") + monkeypatch.setitem(sys.modules, "quart", quart_mod) + + # Stub: api.apps with login_required / current_user + api_pkg = ModuleType("api") + api_pkg.__path__ = [str(repo_root / "api")] + monkeypatch.setitem(sys.modules, "api", api_pkg) + + apps_mod = ModuleType("api.apps") + apps_mod.__path__ = [str(repo_root / "api" / "apps")] + apps_mod.current_user = SimpleNamespace(id="test-user") + apps_mod.login_required = lambda func: func + monkeypatch.setitem(sys.modules, "api.apps", apps_mod) + api_pkg.apps = apps_mod + + # Stub: api.utils.api_utils + api_utils_mod = ModuleType("api.utils.api_utils") + + def get_json_result(data=None, message="success", code=0): + return {"code": code, "data": data, "message": message} + + def get_data_error_result(message=""): + return {"code": 102, "data": None, "message": message} + + async def get_request_json(): + return _request_payload[0] + + def server_error_response(err): + return {"code": 500, "data": None, "message": str(err)} + + def validate_request(*required_keys): + def _decorator(func): + @functools.wraps(func) + async def _wrapper(*args, **kwargs): + payload = await get_request_json() + missing = [k for k in required_keys if k not in payload] + if missing: + return get_json_result( + code=101, data=None, + message="required argument are missing: " + ", ".join(missing) + ) + return await func(*args, **kwargs) + return _wrapper + return _decorator + + api_utils_mod.get_json_result = get_json_result + api_utils_mod.get_data_error_result = get_data_error_result + api_utils_mod.get_request_json = get_request_json + api_utils_mod.server_error_response = server_error_response + api_utils_mod.validate_request = validate_request + monkeypatch.setitem(sys.modules, "api.utils.api_utils", api_utils_mod) + + # Stub: common.misc_utils + import uuid + misc_utils_mod = ModuleType("common.misc_utils") + misc_utils_mod.get_uuid = lambda: uuid.uuid1().hex + monkeypatch.setitem(sys.modules, "common.misc_utils", misc_utils_mod) + + # Stub: common.settings (STORAGE_IMPL is a no-op for testing) + common_mod = ModuleType("common") + common_mod.__path__ = [] + common_mod.settings = SimpleNamespace( + STORAGE_IMPL=SimpleNamespace( + put=lambda *_a, **_kw: None, + get=lambda *_a, **_kw: b"stub-content", + ), + DATABASE_TYPE="sqlite", + ) + monkeypatch.setitem(sys.modules, "common", common_mod) + + # Stub: common.time_utils (monotonically increasing timestamps) + _ts_iter = iter(range(1718200000000, 1718200000100)) + time_utils_mod = ModuleType("common.time_utils") + time_utils_mod.current_timestamp = lambda: next(_ts_iter) + time_utils_mod.datetime_format = lambda *_a, **__: "mock" + monkeypatch.setitem(sys.modules, "common.time_utils", time_utils_mod) + + # Stub: api.db.db_models — inject SQLite DB and our test models + db_models_mod = ModuleType("api.db.db_models") + + class _DB: + """Drop-in replacement that wraps our SQLite DB with the same + methods (connection_context, atomic) that CommonService expects.""" + + @staticmethod + def connection_context(): + def dec(func): + return func + return dec + + @staticmethod + def atomic(): + class Ctx: + def __enter__(self2): + return self2 + def __exit__(self2, *args): + pass + return Ctx() + + db_models_mod.DB = _DB + db_models_mod.FileCommit = FileCommitTestModel + db_models_mod.FileCommitItem = FileCommitItemTestModel + db_models_mod.File = FileTestModel + db_models_mod.DataBaseModel = BaseTestModel + monkeypatch.setitem(sys.modules, "api.db.db_models", db_models_mod) + + class _StubFileService: + model = FileTestModel # class attribute, not staticmethod — code accesses FileService.model.update(...) + @staticmethod + def update_by_id(pid, data): + return FileTestModel.update(data).where(FileTestModel.id == pid).execute() + @staticmethod + def get_by_id(pid): + try: + obj = FileTestModel.get_by_id(pid) + return True, obj + except Exception: + return False, None + @staticmethod + def get_or_none(**kwargs): + try: + return FileTestModel.get(**kwargs) + except Exception: + return None + + class CommonServiceBase: + model = None + @classmethod + def get_by_id(cls, pid): + try: + obj = cls.model.get_or_none(cls.model.id == pid) + if obj: + return True, obj + except Exception: + pass + return False, None + @classmethod + def query(cls, cols=None, reverse=None, order_by=None, **kwargs): + q = cls.model.select() + for f_n, f_v in kwargs.items(): + if f_v is not None and hasattr(cls.model, f_n): + q = q.where(getattr(cls.model, f_n) == f_v) + return q + @classmethod + def update_by_id(cls, pid, data): + return cls.model.update(data).where(cls.model.id == pid).execute() + @classmethod + def filter_update(cls, filters, update_data): + return cls.model.update(update_data).where(*filters).execute() + + # Stub: common.constants with FileSource for resolver + constants_mod = ModuleType("common.constants") + constants_mod.FileSource = type("FileSource", (), {"KNOWLEDGEBASE": "knowledgebase"}) + monkeypatch.setitem(sys.modules, "common.constants", constants_mod) + + # Stub: api.db with real filesystem path so sub-packages can be discovered. + db_pkg = ModuleType("api.db") + db_pkg.__path__ = [str(repo_root / "api" / "db")] + db_pkg.UserTenantRole = type('UserTenantRole', (), {k: k for k in ('OWNER','ADMIN','NORMAL','INVITE')}) + db_pkg.TenantPermission = type('TenantPermission', (), {'ME': 'me', 'TEAM': 'team'}) + db_pkg.FileType = type('FileType', (), {'FOLDER': 'folder', 'DOC': 'doc', 'VISUAL': 'visual', 'AURAL': 'aural', 'VIRTUAL': 'virtual', 'PDF': 'pdf', 'OTHER': 'other'}) + db_pkg.KNOWLEDGEBASE_FOLDER_NAME = '.knowledgebase' + db_pkg.SKILLS_FOLDER_NAME = 'skills' + monkeypatch.setitem(sys.modules, "api.db", db_pkg) + api_pkg.db = db_pkg + + # Stub api.db.services — prevent real __init__ from loading (avoids + # importing every real service module). Keep the filesystem path so + # file_commit_service can be discovered, but pre-stub file_service + # (which has heavy deps that would cascade-fail). + services_pkg = ModuleType("api.db.services") + services_pkg.__path__ = [str(repo_root / "api" / "db" / "services")] + monkeypatch.setitem(sys.modules, "api.db.services", services_pkg) + + # Pre-stub service modules that file_commit_api.py imports. + # Each stub prevents the real .py file from loading (and cascading deps). + file_svc_mod = ModuleType("api.db.services.file_service") + file_svc_mod.FileService = _StubFileService + monkeypatch.setitem(sys.modules, "api.db.services.file_service", file_svc_mod) + + common_svc_mod = ModuleType("api.db.services.common_service") + common_svc_mod.CommonService = CommonServiceBase + monkeypatch.setitem(sys.modules, "api.db.services.common_service", common_svc_mod) + + kb_svc_mod = ModuleType("api.db.services.knowledgebase_service") + # NB: The dataset resolver in the API calls KnowledgebaseService.get_by_id + # then accesses .name and .tenant_id. We return a simple object. + class _StubKnowledgebaseService: + @staticmethod + def get_by_id(dataset_id): + if dataset_id == "ds-1": + return True, SimpleNamespace(name="test-ds", tenant_id="t1") + return False, None + kb_svc_mod.KnowledgebaseService = _StubKnowledgebaseService + monkeypatch.setitem(sys.modules, "api.db.services.knowledgebase_service", kb_svc_mod) + + # Remove cached file_commit_service so it reimports with our SQLite stubs. + # Keep api.db.db_models in sys.modules — it's already patched above. + for mod_name in list(sys.modules.keys()): + if mod_name.startswith("api.db.services.file_commit"): + del sys.modules[mod_name] + + # Load the module + module_name = "api.apps.restful_apis.file_commit_api" + module_path = repo_root / "api" / "apps" / "restful_apis" / "file_commit_api.py" + spec = importlib.util.spec_from_file_location(module_name, module_path) + module = importlib.util.module_from_spec(spec) + module.manager = _DummyManager() + monkeypatch.setitem(sys.modules, module_name, module) + spec.loader.exec_module(module) + + return module + + +# ── Helpers ─────────────────────────────────────────────────────────────── + +def _setup_request(module, json_payload=None, args=None): + """Set up a request payload and query args for the next handler call.""" + if json_payload is not None: + _request_payload[0] = json_payload + if args is not None: + module.request.args = args + + +# ── Fixtures ────────────────────────────────────────────────────────────── + +@pytest.fixture(scope="session") +def auth(): + return "test-auth" + + +@pytest.fixture(scope="session", autouse=True) +def set_tenant_info(): + return None + + +@pytest.fixture(autouse=True) +def reset_db(): + """Clear all rows before each test to prevent order-dependent failures.""" + _clear_db() + + +# ── Tests ───────────────────────────────────────────────────────────────── + +@pytest.mark.p2 +def test_create_commit_success(monkeypatch): + module = _load_module(monkeypatch) + # Seed a file + FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1", + created_by="test-user", name="a.txt", type="txt") + + _setup_request(module, json_payload={ + "message": "initial commit", + "files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "hello"}], + }) + + res = _run(module.create_commit("root-folder")) + assert res["code"] == 0, f"Expected 0, got {res}" + data = res["data"] + assert data["message"] == "initial commit" + assert data["folder_id"] == "root-folder" + assert data["author_id"] == "test-user" + assert data["file_count"] == 1 + assert data["tree_state"] is not None + assert data["id"] is not None + + +@pytest.mark.p2 +def test_create_commit_missing_fields(monkeypatch): + module = _load_module(monkeypatch) + _setup_request(module, json_payload={"message": "no files"}) + + res = _run(module.create_commit("root-folder")) + assert res["code"] == 101, f"Expected validation error, got {res}" + + +@pytest.mark.p2 +def test_create_commit_modify_and_add(monkeypatch): + module = _load_module(monkeypatch) + FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1", + created_by="test-user", name="a.txt", type="txt") + FileTestModel.create(id="f2", parent_id="root-folder", tenant_id="t1", + created_by="test-user", name="b.txt", type="txt") + + # Commit 1: add f1 + _setup_request(module, json_payload={ + "message": "c1", + "files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "v1"}], + }) + _run(module.create_commit("root-folder")) + + # Commit 2: modify f1, add f2 + _setup_request(module, json_payload={ + "message": "c2", + "files": [ + {"file_id": "f1", "file_name": "a.txt", "operation": "modify", "content": "v2"}, + {"file_id": "f2", "file_name": "b.txt", "operation": "add", "content": "world"}, + ], + }) + res = _run(module.create_commit("root-folder")) + assert res["code"] == 0 + assert res["data"]["file_count"] == 2 + + +@pytest.mark.p2 +def test_create_commit_delete(monkeypatch): + module = _load_module(monkeypatch) + FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1", + created_by="test-user", name="a.txt", type="txt") + + # Add then delete + _setup_request(module, json_payload={ + "message": "add", + "files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "hello"}], + }) + _run(module.create_commit("root-folder")) + + _setup_request(module, json_payload={ + "message": "delete", + "files": [{"file_id": "f1", "file_name": "a.txt", "operation": "delete"}], + }) + res = _run(module.create_commit("root-folder")) + assert res["code"] == 0 + + +@pytest.mark.p2 +def test_create_commit_rename(monkeypatch): + module = _load_module(monkeypatch) + FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1", + created_by="test-user", name="old.txt", type="txt") + + _setup_request(module, json_payload={ + "message": "add", + "files": [{"file_id": "f1", "file_name": "old.txt", "operation": "add", "content": "data"}], + }) + _run(module.create_commit("root-folder")) + + # Rename + _setup_request(module, json_payload={ + "message": "rename", + "files": [{"file_id": "f1", "file_name": "old.txt", "operation": "rename", + "old_name": "old.txt", "new_name": "new.txt"}], + }) + res = _run(module.create_commit("root-folder")) + assert res["code"] == 0 + + +@pytest.mark.p2 +def test_list_commits_success(monkeypatch): + module = _load_module(monkeypatch) + FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1", + created_by="test-user", name="a.txt", type="txt") + + # Create 2 commits + _setup_request(module, json_payload={ + "message": "c1", + "files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "v1"}], + }) + _run(module.create_commit("root-folder")) + + _setup_request(module, json_payload={ + "message": "c2", + "files": [{"file_id": "f1", "file_name": "a.txt", "operation": "modify", "content": "v2"}], + }) + _run(module.create_commit("root-folder")) + + # List + module.request.args = {"page": "1", "page_size": "10"} + res = _run(module.list_commits("root-folder")) + assert res["code"] == 0 + assert res["data"]["total"] == 2 + assert len(res["data"]["commits"]) == 2 + + +@pytest.mark.p2 +def test_get_commit_detail(monkeypatch): + module = _load_module(monkeypatch) + FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1", + created_by="test-user", name="a.txt", type="txt") + + _setup_request(module, json_payload={ + "message": "detail test", + "files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "data"}], + }) + create_res = _run(module.create_commit("root-folder")) + commit_id = create_res["data"]["id"] + + res = _run(module.get_commit("root-folder", commit_id)) + assert res["code"] == 0 + assert res["data"]["id"] == commit_id + assert res["data"]["message"] == "detail test" + assert len(res["data"]["files"]) == 1 + + +@pytest.mark.p2 +def test_get_commit_not_found(monkeypatch): + module = _load_module(monkeypatch) + res = _run(module.get_commit("root-folder", "nonexistent")) + assert res["code"] == 102 + assert "not found" in res["message"].lower() + + +@pytest.mark.p2 +def test_diff_commits(monkeypatch): + module = _load_module(monkeypatch) + FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1", + created_by="test-user", name="a.txt", type="txt") + FileTestModel.create(id="f2", parent_id="root-folder", tenant_id="t1", + created_by="test-user", name="b.txt", type="txt") + + # c1: add f1 + _setup_request(module, json_payload={ + "message": "c1", + "files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "v1"}], + }) + c1 = _run(module.create_commit("root-folder"))["data"]["id"] + + # c2: add f2, modify f1 + _setup_request(module, json_payload={ + "message": "c2", + "files": [ + {"file_id": "f2", "file_name": "b.txt", "operation": "add", "content": "world"}, + {"file_id": "f1", "file_name": "a.txt", "operation": "modify", "content": "v2"}, + ], + }) + c2 = _run(module.create_commit("root-folder"))["data"]["id"] + assert c1 != c2, "c1 and c2 must have different IDs" + + module.request.args = {"from": c1, "to": c2} + res = _run(module.diff_commits("root-folder")) + assert res["code"] == 0, f"diff failed: {res}" + assert len(res["data"]) == 2, f"Expected 2 diff entries, got {len(res['data'])}: {res['data']}" + + # Verify f2 was added (present in c2 but not in c1) + f2_entries = [e for e in res["data"] if e["file_id"] == "f2"] + assert len(f2_entries) == 1 + assert f2_entries[0]["operation"] == "add" + + # Verify f1 was modified (hash changed from v1 to v2) + f1_entries = [e for e in res["data"] if e["file_id"] == "f1"] + assert len(f1_entries) == 1 + assert f1_entries[0]["operation"] == "modify" + assert f1_entries[0]["old_hash"] != f1_entries[0]["new_hash"] + + +@pytest.mark.p2 +def test_diff_commits_missing_params(monkeypatch): + module = _load_module(monkeypatch) + module.request.args = {} + res = _run(module.diff_commits("root-folder")) + assert res["code"] == 102 + + +@pytest.mark.p2 +def test_get_uncommitted_changes(monkeypatch): + module = _load_module(monkeypatch) + # Seed a file that will be committed + FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1", + created_by="test-user", name="a.txt", type="txt") + # Seed a file that will NOT be committed (uncommitted add) + FileTestModel.create(id="f2", parent_id="root-folder", tenant_id="t1", + created_by="test-user", name="b.txt", type="txt") + + # Commit only f1 + _setup_request(module, json_payload={ + "message": "add f1", + "files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "hello"}], + }) + _run(module.create_commit("root-folder")) + + res = _run(module.get_uncommitted_changes("root-folder")) + assert res["code"] == 0 + # f2 should appear as uncommitted "add" + f2_changes = [c for c in res["data"] if c["file_id"] == "f2"] + assert len(f2_changes) > 0, "Expected f2 to show as uncommitted change" + assert f2_changes[0]["operation"] == "add" + + +@pytest.mark.p2 +def test_get_commit_tree(monkeypatch): + module = _load_module(monkeypatch) + FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1", + created_by="test-user", name="a.txt", type="txt") + + _setup_request(module, json_payload={ + "message": "c1", + "files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "data"}], + }) + create_res = _run(module.create_commit("root-folder")) + commit_id = create_res["data"]["id"] + + res = _run(module.get_commit_tree("root-folder", commit_id)) + assert res["code"] == 0 + assert res["data"]["type"] == "folder" + assert res["data"]["id"] == "root-folder" + assert any(c["id"] == "f1" for c in res["data"].get("children", [])) + + +@pytest.mark.p2 +def test_get_commit_file_content(monkeypatch): + module = _load_module(monkeypatch) + FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1", + created_by="test-user", name="a.txt", type="txt") + + _setup_request(module, json_payload={ + "message": "c1", + "files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "hello world"}], + }) + create_res = _run(module.create_commit("root-folder")) + commit_id = create_res["data"]["id"] + + res = _run(module.get_commit_file_content("root-folder", commit_id, "f1")) + assert res["code"] == 0 + assert "content" in res["data"] + + +@pytest.mark.p2 +def test_get_file_version_history(monkeypatch): + module = _load_module(monkeypatch) + FileTestModel.create(id="f1", parent_id="root-folder", tenant_id="t1", + created_by="test-user", name="a.txt", type="txt") + + # Two commits modifying f1 + _setup_request(module, json_payload={ + "message": "v1", + "files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "v1"}], + }) + _run(module.create_commit("root-folder")) + + _setup_request(module, json_payload={ + "message": "v2", + "files": [{"file_id": "f1", "file_name": "a.txt", "operation": "modify", "content": "v2"}], + }) + _run(module.create_commit("root-folder")) + + res = _run(module.get_file_version_history("f1")) + assert res["code"] == 0 + assert len(res["data"]) == 2 + + +@pytest.mark.p2 +def test_workspace_alias(monkeypatch): + """Verify /workspace/ alias routes work the same as /folders/.""" + module = _load_module(monkeypatch) + FileTestModel.create(id="f1", parent_id="ws-folder", tenant_id="t1", + created_by="test-user", name="a.txt", type="txt") + + _setup_request(module, json_payload={ + "message": "workspace commit", + "files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "data"}], + }) + res = _run(module.create_commit("ws-folder")) + assert res["code"] == 0 + + # List via workspace alias + module.request.args = {"page": "1", "page_size": "10"} + res = _run(module.list_commits("ws-folder")) + assert res["code"] == 0 + assert res["data"]["total"] == 1 + + +@pytest.mark.p2 +def test_get_commit_wrong_folder_returns_not_found(monkeypatch): + module = _load_module(monkeypatch) + FileTestModel.create(id="f1", parent_id="folder-a", tenant_id="t1", + created_by="test-user", name="a.txt", type="txt") + + _setup_request(module, json_payload={ + "message": "c1", + "files": [{"file_id": "f1", "file_name": "a.txt", "operation": "add", "content": "data"}], + }) + create_res = _run(module.create_commit("folder-a")) + commit_id = create_res["data"]["id"] + + # Attempt to read commit from a different folder + res = _run(module.get_commit("folder-b", commit_id)) + assert res["code"] == 102 + assert "not found in workspace" in res["message"].lower() diff --git a/test/testcases/restful_api/test_sessions.py b/test/testcases/restful_api/test_sessions.py index 7a23a39790..224d573cd4 100644 --- a/test/testcases/restful_api/test_sessions.py +++ b/test/testcases/restful_api/test_sessions.py @@ -540,12 +540,6 @@ def test_session_update_name_and_param_contract(rest_client, create_chat): else: assert body["message"] == expected_name_or_message, (scenario_name, body) - unknown_key_res = rest_client.patch(f"/chats/{chat_id}/sessions/{session_id}", json={"unknown_key": "unknown_value"}) - assert unknown_key_res.status_code == 200 - unknown_key_payload = unknown_key_res.json() - assert unknown_key_payload["code"] == 100, unknown_key_payload - assert 'Unrecognized field name: "unknown_key"' in unknown_key_payload["message"], unknown_key_payload - for scenario_name, payload in (("empty payload", {}), ("none payload", None)): res = rest_client.patch(f"/chats/{chat_id}/sessions/{session_id}", json=payload) assert res.status_code == 200, (scenario_name, res.text)