From 4ee0702aed1395e88fccf4b8b0aea68ce76b9d62 Mon Sep 17 00:00:00 2001 From: Yingfeng Date: Thu, 30 Apr 2026 12:36:03 +0800 Subject: [PATCH] Feat: add skills space to context engine (#13908) ### What problem does this PR solve? issue #13714 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- api/apps/restful_apis/file_api.py | 4 +- api/apps/services/file_api_service.py | 170 +- api/db/__init__.py | 1 + api/db/services/file_service.py | 33 +- cmd/{ => cli}/ragflow_cli.go | 10 + cmd/server_main.go | 16 +- conf/skill_es_mapping.json | 136 ++ conf/skill_infinity_mapping.json | 64 + docker/nginx/ragflow.conf.golang | 5 + docker/nginx/ragflow.conf.hybrid | 5 + internal/cli/README.md | 46 +- internal/cli/cli.go | 447 +++- internal/cli/client.go | 19 +- internal/cli/context_command.go | 135 -- internal/cli/context_parser.go | 6 +- internal/cli/contextengine/README.md | 49 - internal/cli/filesystem/README.md | 195 ++ .../provider.go => filesystem/base.go} | 2 +- .../dataset.go} | 6 +- .../{contextengine => filesystem}/engine.go | 24 +- .../file_provider.go => filesystem/file.go} | 88 +- internal/cli/filesystem/skill.go | 2154 +++++++++++++++++ .../filesystem/skill_hub/security/guard.go | 164 ++ .../filesystem/skill_hub/security/patterns.go | 284 +++ .../filesystem/skill_hub/security/scanner.go | 150 ++ .../filesystem/skill_hub/source/clawhub.go | 933 +++++++ .../cli/filesystem/skill_hub/source/github.go | 260 ++ .../filesystem/skill_hub/source/interface.go | 177 ++ .../cli/filesystem/skill_hub/source/local.go | 206 ++ .../filesystem/skill_hub/source/skillssh.go | 574 +++++ .../cli/filesystem/skill_hub/source/types.go | 47 + internal/cli/filesystem/skill_install.go | 437 ++++ internal/cli/filesystem/skill_uninstall.go | 166 ++ .../{contextengine => filesystem}/types.go | 6 +- .../{contextengine => filesystem}/utils.go | 2 +- internal/cli/http_client.go | 44 + internal/cli/lexer.go | 2 +- internal/cli/parser.go | 174 +- internal/cli/response.go | 23 + internal/cli/types.go | 2 + internal/cli/user_command.go | 36 +- internal/cli/user_parser.go | 2 +- internal/dao/database.go | 1 + internal/dao/file.go | 10 +- internal/dao/migration.go | 161 ++ internal/dao/skill_search_config.go | 196 ++ internal/dao/skill_space.go | 141 ++ internal/engine/elasticsearch/client.go | 46 +- internal/engine/elasticsearch/document.go | 259 ++ internal/engine/elasticsearch/index.go | 195 +- internal/engine/elasticsearch/search.go | 239 +- internal/engine/engine.go | 8 + internal/engine/infinity/client.go | 10 +- internal/engine/infinity/common.go | 24 + internal/engine/infinity/dataset.go | 127 +- internal/engine/infinity/document.go | 239 ++ internal/engine/infinity/search.go | 187 +- internal/entity/models/types.go | 4 +- internal/entity/skill_search.go | 112 + internal/entity/skill_space.go | 90 + internal/handler/skill_search.go | 573 +++++ internal/router/router.go | 63 +- internal/server/config.go | 30 +- internal/service/model_service.go | 69 +- internal/service/skill_indexer.go | 1036 ++++++++ internal/service/skill_search.go | 1039 ++++++++ internal/service/skill_space.go | 645 +++++ .../test_file_app/test_file_routes.py | 2 +- web/.env.production | 3 +- web/src/assets/svg/home-icon/skill-folder.svg | 19 + web/src/assets/svg/home-icon/skill-space.svg | 14 + web/src/assets/svg/home-icon/skills.svg | 19 + web/src/components/empty/constant.tsx | 6 + web/src/components/icon-font.tsx | 9 + web/src/components/list-filter-bar/index.tsx | 16 +- web/src/components/ui/sheet.tsx | 4 +- web/src/components/ui/slider.tsx | 8 +- web/src/components/ui/spin.tsx | 9 +- web/src/locales/en.ts | 165 ++ web/src/locales/zh.ts | 157 ++ web/src/pages/files/action-cell.tsx | 5 + web/src/pages/files/files-table.tsx | 59 +- .../pages/skills/components/code-viewer.tsx | 118 + .../skills/components/create-space-dialog.tsx | 75 + .../delete-selected-spaces-dialog.tsx | 50 + .../skills/components/delete-space-dialog.tsx | 60 + .../skills/components/markdown-viewer.tsx | 147 ++ .../skills/components/rename-space-dialog.tsx | 80 + .../skills/components/search-config-modal.tsx | 429 ++++ .../pages/skills/components/skill-card.tsx | 168 ++ .../pages/skills/components/skill-detail.tsx | 505 ++++ .../pages/skills/components/upload-modal.tsx | 1055 ++++++++ web/src/pages/skills/hooks.ts | 1567 ++++++++++++ web/src/pages/skills/index.tsx | 1199 +++++++++ web/src/pages/skills/types.ts | 166 ++ web/src/pages/skills/utils.ts | 14 + web/src/pages/skills/validation.ts | 563 +++++ web/src/routes.tsx | 5 + web/src/services/skill-space-service.ts | 257 ++ web/src/utils/api.ts | 9 + web/vite.config.ts | 24 +- 101 files changed, 19161 insertions(+), 633 deletions(-) rename cmd/{ => cli}/ragflow_cli.go (78%) create mode 100644 conf/skill_es_mapping.json create mode 100644 conf/skill_infinity_mapping.json delete mode 100644 internal/cli/context_command.go delete mode 100644 internal/cli/contextengine/README.md create mode 100644 internal/cli/filesystem/README.md rename internal/cli/{contextengine/provider.go => filesystem/base.go} (99%) rename internal/cli/{contextengine/dataset_provider.go => filesystem/dataset.go} (99%) rename internal/cli/{contextengine => filesystem}/engine.go (94%) rename internal/cli/{contextengine/file_provider.go => filesystem/file.go} (87%) create mode 100644 internal/cli/filesystem/skill.go create mode 100644 internal/cli/filesystem/skill_hub/security/guard.go create mode 100644 internal/cli/filesystem/skill_hub/security/patterns.go create mode 100644 internal/cli/filesystem/skill_hub/security/scanner.go create mode 100644 internal/cli/filesystem/skill_hub/source/clawhub.go create mode 100644 internal/cli/filesystem/skill_hub/source/github.go create mode 100644 internal/cli/filesystem/skill_hub/source/interface.go create mode 100644 internal/cli/filesystem/skill_hub/source/local.go create mode 100644 internal/cli/filesystem/skill_hub/source/skillssh.go create mode 100644 internal/cli/filesystem/skill_hub/source/types.go create mode 100644 internal/cli/filesystem/skill_install.go create mode 100644 internal/cli/filesystem/skill_uninstall.go rename internal/cli/{contextengine => filesystem}/types.go (96%) rename internal/cli/{contextengine => filesystem}/utils.go (99%) create mode 100644 internal/dao/skill_search_config.go create mode 100644 internal/dao/skill_space.go create mode 100644 internal/engine/elasticsearch/document.go create mode 100644 internal/engine/infinity/document.go create mode 100644 internal/entity/skill_search.go create mode 100644 internal/entity/skill_space.go create mode 100644 internal/handler/skill_search.go create mode 100644 internal/service/skill_indexer.go create mode 100644 internal/service/skill_search.go create mode 100644 internal/service/skill_space.go create mode 100644 web/src/assets/svg/home-icon/skill-folder.svg create mode 100644 web/src/assets/svg/home-icon/skill-space.svg create mode 100644 web/src/assets/svg/home-icon/skills.svg create mode 100644 web/src/pages/skills/components/code-viewer.tsx create mode 100644 web/src/pages/skills/components/create-space-dialog.tsx create mode 100644 web/src/pages/skills/components/delete-selected-spaces-dialog.tsx create mode 100644 web/src/pages/skills/components/delete-space-dialog.tsx create mode 100644 web/src/pages/skills/components/markdown-viewer.tsx create mode 100644 web/src/pages/skills/components/rename-space-dialog.tsx create mode 100644 web/src/pages/skills/components/search-config-modal.tsx create mode 100644 web/src/pages/skills/components/skill-card.tsx create mode 100644 web/src/pages/skills/components/skill-detail.tsx create mode 100644 web/src/pages/skills/components/upload-modal.tsx create mode 100644 web/src/pages/skills/hooks.ts create mode 100644 web/src/pages/skills/index.tsx create mode 100644 web/src/pages/skills/types.ts create mode 100644 web/src/pages/skills/utils.ts create mode 100644 web/src/pages/skills/validation.ts create mode 100644 web/src/services/skill-space-service.ts diff --git a/api/apps/restful_apis/file_api.py b/api/apps/restful_apis/file_api.py index 306c64c651..58c6cde727 100644 --- a/api/apps/restful_apis/file_api.py +++ b/api/apps/restful_apis/file_api.py @@ -187,7 +187,9 @@ async def delete(tenant_id: str = None): return get_error_argument_result(err) try: - success, result = await file_api_service.delete_files(tenant_id, req["ids"]) + # Get Authorization header to pass to Go backend + auth_header = request.headers.get("Authorization", "") + success, result = await file_api_service.delete_files(tenant_id, req["ids"], auth_header) if success: return get_result(data=result) else: diff --git a/api/apps/services/file_api_service.py b/api/apps/services/file_api_service.py index da4df98773..ac5fb800c9 100644 --- a/api/apps/services/file_api_service.py +++ b/api/apps/services/file_api_service.py @@ -67,14 +67,14 @@ async def upload_file(tenant_id: str, pf_id: str, file_objs: list): if not e: return False, "Folder not found!" last_folder = await thread_pool_exec( - FileService.create_folder, file, file_id_list[len_id_list - 1], file_obj_names, len_id_list + FileService.create_folder, file, file_id_list[len_id_list - 1], file_obj_names, len_id_list, tenant_id, tenant_id ) else: e, file = await thread_pool_exec(FileService.get_by_id, file_id_list[len_id_list - 2]) if not e: return False, "Folder not found!" last_folder = await thread_pool_exec( - FileService.create_folder, file, file_id_list[len_id_list - 2], file_obj_names, len_id_list + FileService.create_folder, file, file_id_list[len_id_list - 2], file_obj_names, len_id_list, tenant_id, tenant_id ) filetype = filename_type(file_obj_names[file_len - 1]) @@ -158,6 +158,7 @@ def list_files(tenant_id: str, args: dict): root_folder = FileService.get_root_folder(tenant_id) pf_id = root_folder["id"] FileService.init_knowledgebase_docs(pf_id, tenant_id) + FileService.init_skills_folder(pf_id, tenant_id) e, file = FileService.get_by_id(pf_id) if not e: @@ -203,17 +204,110 @@ def get_all_parent_folders(file_id: str): return True, {"parent_folders": [pf.to_json() for pf in parent_folders]} -async def delete_files(uid: str, file_ids: list): +async def delete_files(uid: str, file_ids: list, auth_header: str = ""): """ Delete files/folders with team permission check and recursive deletion. :param uid: user ID :param file_ids: list of file IDs to delete + :param auth_header: Authorization header for Go backend API calls :return: (success, result) or (success, error_message) """ errors: list[str] = [] success_count = 0 + def _get_space_uuid_by_name(tenant_id, space_name, authorization): + """Get space UUID by space name from Go backend""" + try: + import requests + + host = getattr(settings, 'HOST_IP', '127.0.0.1') + # Go service runs on port+4 (9384 by default) + port = getattr(settings, 'HOST_PORT', 9380) + 4 + service_url = f"http://{host}:{port}" + + # List all spaces and find the one matching the name + url = f"{service_url}/api/v1/skills/spaces" + headers = {"Content-Type": "application/json"} + if authorization: + headers["Authorization"] = authorization + + response = requests.get(url, headers=headers, timeout=10) + + if response.status_code == 200: + data = response.json() + if data.get("code") == 0: + spaces = data.get("data", {}).get("spaces", []) + for space in spaces: + if space.get("name") == space_name: + return space.get("id") + except Exception as e: + logging.warning(f"Error getting space UUID: {e}") + return None + + def _delete_skill_index(tenant_id, space_name, skill_name, authorization): + """Delete skill index from Go backend. + + Returns: + bool: True if deletion succeeded (HTTP 200), False otherwise. + """ + try: + import requests + from urllib.parse import quote + + # Construct service URL from settings + host = getattr(settings, 'HOST_IP', '127.0.0.1') + # Go service runs on port+4 (9384 by default) + port = getattr(settings, 'HOST_PORT', 9380) + 4 + service_url = f"http://{host}:{port}" + + # Get space UUID from space name + space_uuid = _get_space_uuid_by_name(tenant_id, space_name, authorization) + space_id = space_uuid if space_uuid else space_name + + url = f"{service_url}/api/v1/skills/index?skill_id={quote(skill_name)}&space_id={quote(space_id)}" + headers = {"Content-Type": "application/json"} + if authorization: + headers["Authorization"] = authorization + + response = requests.delete(url, headers=headers, timeout=10) + if response.status_code == 200: + try: + data = response.json() + if data.get("code") == 0: + logging.info( + f"Successfully deleted skill index: space={space_name}, skill={skill_name}, " + f"status={response.status_code}, code=0" + ) + return True + else: + app_code = data.get("code", "unknown") + app_msg = data.get("message", "no message") + logging.error( + f"Failed to delete skill index: space={space_name}, skill={skill_name}, " + f"status={response.status_code}, app_code={app_code}, app_msg={app_msg}, " + f"response={response.text}" + ) + return False + except ValueError as json_err: + # JSON decode error - treat as failure + logging.error( + f"Failed to parse delete response JSON: space={space_name}, skill={skill_name}, " + f"error={json_err}, raw_response={response.text}" + ) + return False + else: + logging.error( + f"Failed to delete skill index: space={space_name}, skill={skill_name}, " + f"status={response.status_code}, response={response.text}" + ) + return False + except Exception as e: + logging.error( + f"Exception deleting skill index: space={space_name}, skill={skill_name}, error={e}" + ) + return False + def _delete_single_file(file) -> int: try: if file.location: @@ -254,9 +348,64 @@ async def delete_files(uid: str, file_ids: list): return 0 - def _delete_folder_recursive(folder, tenant_id): + def _find_ancestor_skill_space(folder_id, tenant_id): + """Walk up the folder hierarchy to find an ancestor with source_type == 'skill_space'. + + Returns: + tuple: (success, folder) where folder has source_type == 'skill_space', or (False, None) + """ + visited = set() + current_id = folder_id + while current_id and current_id not in visited: + visited.add(current_id) + success, folder = FileService.get_by_id(current_id) + if not success or not folder: + return False, None + if folder.source_type == "skill_space": + return True, folder + # Move to parent + current_id = folder.parent_id + return False, None + + def _delete_folder_recursive(folder, tenant_id) -> int: deleted = 0 + current_space_name = None + is_space_folder = folder.source_type == "skill_space" + is_skill_folder = False + + if not is_space_folder: + parent_success, parent_folder = FileService.get_by_id(folder.parent_id) + if parent_success and parent_folder and parent_folder.source_type == "skill_space": + is_skill_folder = True + current_space_name = parent_folder.name + logging.info(f"Identified skill folder '{folder.name}' (parent space: {current_space_name})") + else: + ancestor_success, ancestor_folder = _find_ancestor_skill_space(folder.parent_id, tenant_id) + if ancestor_success and ancestor_folder: + is_skill_folder = True + current_space_name = ancestor_folder.name + logging.info(f"Identified skill folder '{folder.name}' (ancestor space: {current_space_name})") + + if is_space_folder: + current_space_name = folder.name + logging.info(f"Processing space folder '{folder.name}' - will delete all skill indexes within") + + if is_skill_folder and current_space_name and not is_space_folder: + logging.info(f"Deleting skill index for skill '{folder.name}' in space '{current_space_name}'") + index_deleted = _delete_skill_index(tenant_id, current_space_name, folder.name, auth_header) + if not index_deleted: + logging.error( + f"Aborting folder deletion due to index deletion failure: " + f"folder={folder.name}, space={current_space_name}" + ) + errors.append( + f"Failed to delete skill index for folder '{folder.name}' in space '{current_space_name}'. " + f"Folder deletion aborted to prevent orphaned indexes." + ) + return deleted sub_files = FileService.list_all_files_by_parent_id(folder.id) + logging.info(f"Folder '{folder.name}': found {len(sub_files)} children to delete") + for sub_file in sub_files: if sub_file.type == FileType.FOLDER.value: deleted += _delete_folder_recursive(sub_file, tenant_id) @@ -269,6 +418,16 @@ async def delete_files(uid: str, file_ids: list): errors.append(f"Failed to delete folder record {folder.id}: {e}") else: deleted += 1 + + try: + if hasattr(settings.STORAGE_IMPL, 'remove_bucket'): + logging.info(f"Removing storage bucket for folder '{folder.name}' (id={folder.id})") + settings.STORAGE_IMPL.remove_bucket(folder.id) + else: + logging.debug(f"Storage implementation does not support remove_bucket, skipping for folder '{folder.name}'") + except Exception as e: + logging.warning(f"Failed to remove storage bucket for folder '{folder.name}' (id={folder.id}): {e}") + return deleted def _rm_sync(): @@ -288,6 +447,9 @@ async def delete_files(uid: str, file_ids: list): if file.source_type == FileSource.KNOWLEDGEBASE: continue + if file.source_type == "skill_space": + continue + if file.type == FileType.FOLDER.value: success_count += _delete_folder_recursive(file, uid) continue diff --git a/api/db/__init__.py b/api/db/__init__.py index 0ebd9f56f3..6d7ed9fcb9 100644 --- a/api/db/__init__.py +++ b/api/db/__init__.py @@ -74,3 +74,4 @@ PIPELINE_SPECIAL_PROGRESS_FREEZE_TASK_TYPES = {PipelineTaskType.RAPTOR.lower(), KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase" +SKILLS_FOLDER_NAME="skills" diff --git a/api/db/services/file_service.py b/api/db/services/file_service.py index 11a5565b38..db8ae4b72f 100644 --- a/api/db/services/file_service.py +++ b/api/db/services/file_service.py @@ -28,7 +28,7 @@ logger = logging.getLogger(__name__) import xxhash from peewee import fn -from api.db import KNOWLEDGEBASE_FOLDER_NAME, FileType +from api.db import KNOWLEDGEBASE_FOLDER_NAME, SKILLS_FOLDER_NAME, FileType from api.db.db_models import DB, Document, File, File2Document, Knowledgebase, Task from api.db.services import duplicate_name from api.db.services.common_service import CommonService @@ -191,23 +191,24 @@ class FileService(CommonService): @classmethod @DB.connection_context() - def create_folder(cls, file, parent_id, name, count): - from api.apps import current_user + def create_folder(cls, file, parent_id, name, count, tenant_id, created_by): # Recursively create folder structure # Args: # file: Current file object # parent_id: Parent folder ID # name: List of folder names to create # count: Current depth in creation + # tenant_id: Tenant ID + # created_by: Created by user ID # Returns: # Created file object if count > len(name) - 2: return file else: file = cls.insert( - {"id": get_uuid(), "parent_id": parent_id, "tenant_id": current_user.id, "created_by": current_user.id, "name": name[count], "location": "", "size": 0, "type": FileType.FOLDER.value} + {"id": get_uuid(), "parent_id": parent_id, "tenant_id": tenant_id, "created_by": created_by, "name": name[count], "location": "", "size": 0, "type": FileType.FOLDER.value} ) - return cls.create_folder(file, file.id, name, count + 1) + return cls.create_folder(file, file.id, name, count + 1, tenant_id, created_by) @classmethod @DB.connection_context() @@ -293,6 +294,28 @@ class FileService(CommonService): cls.save(**file) return file + @classmethod + @DB.connection_context() + def init_skills_folder(cls, root_id, tenant_id): + # Initialize skills folder if not exists + # Args: + # root_id: Root folder ID + # tenant_id: Tenant ID + for _ in cls.model.select().where((cls.model.name == SKILLS_FOLDER_NAME) & (cls.model.parent_id == root_id)): + return + file_id = get_uuid() + file = { + "id": file_id, + "parent_id": root_id, + "tenant_id": tenant_id, + "created_by": tenant_id, + "name": SKILLS_FOLDER_NAME, + "type": FileType.FOLDER.value, + "size": 0, + "location": "", + } + cls.save(**file) + @classmethod @DB.connection_context() def init_knowledgebase_docs(cls, root_id, tenant_id): diff --git a/cmd/ragflow_cli.go b/cmd/cli/ragflow_cli.go similarity index 78% rename from cmd/ragflow_cli.go rename to cmd/cli/ragflow_cli.go index bb18a5a44e..0b27397ffa 100644 --- a/cmd/ragflow_cli.go +++ b/cmd/cli/ragflow_cli.go @@ -7,6 +7,7 @@ import ( "syscall" "ragflow/internal/cli" + "ragflow/internal/logger" ) func main() { @@ -17,6 +18,15 @@ func main() { os.Exit(1) } + // Initialize logger with appropriate level + logLevel := "warn" // Default to warn (quiet mode) + if args.Verbose { + logLevel = "info" + } + if err := logger.Init(logLevel); err != nil { + fmt.Printf("Warning: Failed to initialize logger: %v\n", err) + } + // Show help and exit if args.ShowHelp { cli.PrintUsage() diff --git a/cmd/server_main.go b/cmd/server_main.go index 66a56e789a..0da92fb032 100644 --- a/cmd/server_main.go +++ b/cmd/server_main.go @@ -181,6 +181,9 @@ func startServer(config *server.Config) { memoryService := service.NewMemoryService() modelProviderService := service.NewModelProviderService() + // Initialize doc engine for skill search + docEngine := engine.Get() + // Initialize handler layer authHandler := handler.NewAuthHandler() userHandler := handler.NewUserHandler(userService) @@ -197,10 +200,11 @@ func startServer(config *server.Config) { searchHandler := handler.NewSearchHandler(searchService, userService) fileHandler := handler.NewFileHandler(fileService, userService) memoryHandler := handler.NewMemoryHandler(memoryService) + skillSearchHandler := handler.NewSkillSearchHandler(docEngine) providerHandler := handler.NewProviderHandler(userService, modelProviderService) // Initialize router - r := router.NewRouter(authHandler, userHandler, tenantHandler, documentHandler, datasetsHandler, systemHandler, kbHandler, chunkHandler, llmHandler, chatHandler, chatSessionHandler, connectorHandler, searchHandler, fileHandler, memoryHandler, providerHandler) + r := router.NewRouter(authHandler, userHandler, tenantHandler, documentHandler, datasetsHandler, systemHandler, kbHandler, chunkHandler, llmHandler, chatHandler, chatSessionHandler, connectorHandler, searchHandler, fileHandler, memoryHandler, skillSearchHandler, providerHandler) // Create Gin engine ginEngine := gin.New() @@ -214,11 +218,15 @@ func startServer(config *server.Config) { // Setup routes r.Setup(ginEngine) - // Create HTTP server + // Create HTTP server with timeouts to prevent slow clients from blocking shutdown addr := fmt.Sprintf(":%d", config.Server.Port) srv := &http.Server{ - Addr: addr, - Handler: ginEngine, + Addr: addr, + Handler: ginEngine, + ReadHeaderTimeout: 10 * time.Second, + ReadTimeout: 60 * time.Second, + WriteTimeout: 120 * time.Second, + IdleTimeout: 120 * time.Second, } // Start server in a goroutine diff --git a/conf/skill_es_mapping.json b/conf/skill_es_mapping.json new file mode 100644 index 0000000000..a9d3cba869 --- /dev/null +++ b/conf/skill_es_mapping.json @@ -0,0 +1,136 @@ +{ + "settings": { + "index": { + "number_of_shards": 1, + "number_of_replicas": 0, + "refresh_interval": "1000ms" + }, + "similarity": { + "scripted_sim": { + "type": "scripted", + "script": { + "source": "double idf = Math.log(1+(field.docCount-term.docFreq+0.5)/(term.docFreq + 0.5))/Math.log(1+((field.docCount-0.5)/1.5)); return query.boost * idf * Math.min(doc.freq, 1);" + } + } + } + }, + "mappings": { + "dynamic": false, + "properties": { + "skill_id": { + "type": "keyword", + "store": true + }, + "space_id": { + "type": "keyword", + "store": true + }, + "folder_id": { + "type": "keyword", + "store": true + }, + "name": { + "type": "text", + "index": false, + "store": true + }, + "name_tks": { + "type": "text", + "similarity": "scripted_sim", + "analyzer": "whitespace", + "store": true + }, + "tags": { + "type": "text", + "index": false, + "store": true + }, + "tags_tks": { + "type": "text", + "similarity": "scripted_sim", + "analyzer": "whitespace", + "store": true + }, + "description": { + "type": "text", + "index": false, + "store": true + }, + "description_tks": { + "type": "text", + "similarity": "scripted_sim", + "analyzer": "whitespace", + "store": true + }, + "content": { + "type": "text", + "index": false, + "store": true + }, + "content_tks": { + "type": "text", + "similarity": "scripted_sim", + "analyzer": "whitespace", + "store": true + }, + "q_3072_vec": { + "type": "dense_vector", + "dims": 3072, + "index": true, + "similarity": "cosine" + }, + "q_2560_vec": { + "type": "dense_vector", + "dims": 2560, + "index": true, + "similarity": "cosine" + }, + "q_1536_vec": { + "type": "dense_vector", + "dims": 1536, + "index": true, + "similarity": "cosine" + }, + "q_1024_vec": { + "type": "dense_vector", + "dims": 1024, + "index": true, + "similarity": "cosine" + }, + "q_768_vec": { + "type": "dense_vector", + "dims": 768, + "index": true, + "similarity": "cosine" + }, + "q_512_vec": { + "type": "dense_vector", + "dims": 512, + "index": true, + "similarity": "cosine" + }, + "q_256_vec": { + "type": "dense_vector", + "dims": 256, + "index": true, + "similarity": "cosine" + }, + "version": { + "type": "keyword", + "store": true + }, + "status": { + "type": "keyword", + "store": true + }, + "create_time": { + "type": "long", + "store": true + }, + "update_time": { + "type": "long", + "store": true + } + } + } +} diff --git a/conf/skill_infinity_mapping.json b/conf/skill_infinity_mapping.json new file mode 100644 index 0000000000..4e4766ea8f --- /dev/null +++ b/conf/skill_infinity_mapping.json @@ -0,0 +1,64 @@ +{ + "skill_id": { + "type": "varchar", + "default": "", + "index_type": "secondary" + }, + "space_id": { + "type": "varchar", + "default": "", + "index_type": "secondary" + }, + "folder_id": { + "type": "varchar", + "default": "" + }, + "name": { + "type": "varchar", + "default": "", + "analyzer": [ + "rag-coarse", + "rag-fine" + ] + }, + "tags": { + "type": "varchar", + "default": "", + "analyzer": [ + "rag-coarse", + "rag-fine" + ] + }, + "description": { + "type": "varchar", + "default": "", + "analyzer": [ + "rag-coarse", + "rag-fine" + ] + }, + "content": { + "type": "varchar", + "default": "", + "analyzer": [ + "rag-coarse", + "rag-fine" + ] + }, + "version": { + "type": "varchar", + "default": "1.0.0" + }, + "status": { + "type": "varchar", + "default": "1" + }, + "create_time": { + "type": "bigint", + "default": 0 + }, + "update_time": { + "type": "bigint", + "default": 0 + } +} \ No newline at end of file diff --git a/docker/nginx/ragflow.conf.golang b/docker/nginx/ragflow.conf.golang index d5c9bb1292..f63e7d819f 100644 --- a/docker/nginx/ragflow.conf.golang +++ b/docker/nginx/ragflow.conf.golang @@ -15,6 +15,11 @@ server { include proxy.conf; } + location ~ ^/api/v1/skills { + proxy_pass http://127.0.0.1:9384; + include proxy.conf; + } + location ~ ^/(v1|api) { proxy_pass http://127.0.0.1:9382; include proxy.conf; diff --git a/docker/nginx/ragflow.conf.hybrid b/docker/nginx/ragflow.conf.hybrid index 0fc5f50808..1f68187063 100644 --- a/docker/nginx/ragflow.conf.hybrid +++ b/docker/nginx/ragflow.conf.hybrid @@ -40,6 +40,11 @@ server { include proxy.conf; } + location ~ ^/api/v1/skills { + proxy_pass http://127.0.0.1:9384; + include proxy.conf; + } + location ~ ^/v1/system/config { proxy_pass http://127.0.0.1:9384; include proxy.conf; diff --git a/internal/cli/README.md b/internal/cli/README.md index c626b57f00..f55dc21e14 100644 --- a/internal/cli/README.md +++ b/internal/cli/README.md @@ -7,12 +7,12 @@ This is the Go implementation of the RAGFlow command-line interface, compatible - Interactive mode and single command execution - Full compatibility with Python CLI syntax - Recursive descent parser for SQL-like commands -- Context Engine (Virtual Filesystem) for intuitive resource management +- Virtual Filesystem for intuitive resource management - Support for all major commands: - User management: LOGIN, REGISTER, CREATE USER, DROP USER, LIST USERS, etc. - Service management: LIST SERVICES, SHOW SERVICE, STARTUP/SHUTDOWN/RESTART SERVICE - Role management: CREATE ROLE, DROP ROLE, LIST ROLES, GRANT/REVOKE PERMISSION - - Dataset management via Context Engine: `ls`, `search`, `mkdir`, `cat`, `rm` + - Dataset management via Virtual Filesystem: `ls`, `search`, `mkdir`, `cat`, `rm` - Model management: SET/RESET DEFAULT LLM/VLM/EMBEDDING/etc. - And more... @@ -30,24 +30,24 @@ go build -o ragflow_cli ./cmd/ragflow_cli.go ``` internal/cli/ ├── cli.go # Main CLI loop and interaction -├── client.go # RAGFlowClient with Context Engine integration +├── client.go # RAGFlowClient with Filesystem integration ├── http_client.go # HTTP client for API communication ├── parser/ # Command parser package │ ├── types.go # Token and Command types │ ├── lexer.go # Lexical analyzer │ └── parser.go # Recursive descent parser -└── contextengine/ # Context Engine (Virtual Filesystem) +└── filesystem/ # Virtual Filesystem ├── engine.go # Core engine: path resolution, command routing ├── types.go # Node, Command, Result types - ├── provider.go # Provider interface definition - ├── dataset_provider.go # Dataset provider implementation - ├── file_provider.go # File manager provider implementation + ├── base.go # Provider interface definition + ├── dataset.go # Dataset provider implementation + ├── file.go # File manager provider implementation └── utils.go # Helper functions ``` -## Context Engine +## Virtual Filesystem -The Context Engine provides a unified virtual filesystem interface over RAGFlow's RESTful APIs. +The Virtual Filesystem provides a unified filesystem interface over RAGFlow's RESTful APIs. ### Design Principles @@ -90,11 +90,7 @@ ls datasets/kb1 -n 50 # List 50 files in kb1 dataset Semantic search in datasets. **Options:** -- `-d, --dir ` - Directory to search in (can be specified multiple times) -- `-q, --query ` - Search query (required) -- `-k, --top-k ` - Number of top results to return (default: 10) -- `-t, --threshold ` - Similarity threshold, 0.0-1.0 (default: 0.2) -- `-h, --help` - Show search help message +- `-n, --number` - Number of top results to return (default: 10) **Output Formats:** - Default: JSON format @@ -103,10 +99,10 @@ Semantic search in datasets. **Examples:** ```bash -search -q "machine learning" # Search all datasets (JSON output) -search -d datasets/kb1 -q "neural networks" # Search in kb1 -search -d datasets/kb1 -q "AI" --output plain # Plain text output -search -q "RAG" -k 20 -t 0.5 # Return 20 results with threshold 0.5 +search "machine learning" # Search all datasets (JSON output) +search "neural networks" datasets/kb1 # Search in kb1 +search "AI" datasets/kb1 --output plain # Plain text output +search "RAG" -n 20 # Return 20 results ``` #### `cat ` - Display content @@ -155,20 +151,6 @@ SET DEFAULT LLM 'gpt-4'; SET DEFAULT EMBEDDING 'text-embedding-ada-002'; RESET DEFAULT LLM; --- Context Engine (Virtual Filesystem) -ls; -- List all datasets (default 10) -ls -n 20; -- List 20 datasets -ls datasets/my_dataset; -- List documents in dataset -ls datasets/my_dataset -n 50; -- List 50 documents -ls datasets/my_dataset/info; -- Show dataset info -search -q "test"; -- Search all datasets (JSON output) -search -d datasets/my_dataset -q "test"; -- Search in specific dataset - --- Meta commands -\? -- Show help -\q -- Quit -\c -- Clear screen -``` ## Parser Implementation diff --git a/internal/cli/cli.go b/internal/cli/cli.go index eb4b29c8f5..770ca98341 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -23,6 +23,7 @@ import ( "fmt" "os" "os/signal" + "path/filepath" "strconv" "strings" "syscall" @@ -31,7 +32,7 @@ import ( "github.com/peterh/liner" "gopkg.in/yaml.v3" - "ragflow/internal/cli/contextengine" + "ragflow/internal/cli/filesystem" ) // ConfigFile represents the rf.yml configuration file structure @@ -53,17 +54,19 @@ const ( // ConnectionArgs holds the parsed command line arguments type ConnectionArgs struct { - Host string - Port int - Password string - APIToken string - UserName string - Command *string // Original command string (for SQL mode) - CommandArgs []string // Split command arguments (for ContextEngine mode) - IsSQLMode bool // true=SQL mode (quoted), false=ContextEngine mode (unquoted) - ShowHelp bool - AdminMode bool - OutputFormat OutputFormat // Output format: table, plain, json + Host string + Port int + Password string + APIToken string + UserName string + ConfigFilePath string // Path to the config file (e.g., rf.yml) + Command *string // Original command string (for SQL mode) + CommandArgs []string // Split command arguments (for ContextEngine mode) + IsSQLMode bool // true=SQL mode (quoted), false= ContextEngine mode (unquoted) + ShowHelp bool + AdminMode bool + OutputFormat OutputFormat // Output format: table, plain, json + Verbose bool // Enable verbose logging } // LoadDefaultConfigFile reads the rf.yml file from current directory if it exists @@ -124,9 +127,10 @@ func parseHostPort(hostPort string) (string, int, error) { // ParseConnectionArgs parses command line arguments similar to Python's parse_connection_args func ParseConnectionArgs(args []string) (*ConnectionArgs, error) { - // First, scan args to check for help, config file, and admin mode + // First, scan args to check for help, config file, admin mode, and verbose flag var configFilePath string var adminMode bool = false + var verboseMode bool = false foundCommand := false for i := 0; i < len(args); i++ { arg := args[i] @@ -138,9 +142,16 @@ func ParseConnectionArgs(args []string) (*ConnectionArgs, error) { } // Only process --help as global help if it's before any command if !foundCommand && (arg == "--help" || arg == "-help") { - return &ConnectionArgs{ShowHelp: true}, nil + return &ConnectionArgs{ShowHelp: true, Verbose: verboseMode}, nil } else if (arg == "-f" || arg == "--config") && i+1 < len(args) { configFilePath = args[i+1] + // Convert to absolute path immediately + if !filepath.IsAbs(configFilePath) { + absPath, err := filepath.Abs(configFilePath) + if err == nil { + configFilePath = absPath + } + } i++ } else if (arg == "-o" || arg == "--output") && i+1 < len(args) { // -o/--output is allowed with config file, skip it and its value @@ -148,6 +159,8 @@ func ParseConnectionArgs(args []string) (*ConnectionArgs, error) { continue } else if arg == "--admin" { adminMode = true + } else if arg == "-v" || arg == "--verbose" { + verboseMode = true } } @@ -158,7 +171,10 @@ func ParseConnectionArgs(args []string) (*ConnectionArgs, error) { // Parse arguments manually to support both short and long forms // and to handle priority: command line > config file > defaults - result := &ConnectionArgs{} + result := &ConnectionArgs{ + Verbose: verboseMode, + ConfigFilePath: configFilePath, + } if !adminMode { // Only user mode read config file @@ -256,6 +272,8 @@ func ParseConnectionArgs(args []string) (*ConnectionArgs, error) { } i++ } + case "-v", "--verbose": + result.Verbose = true case "--admin", "-admin": result.AdminMode = true case "--help", "-help": @@ -303,12 +321,24 @@ func ParseConnectionArgs(args []string) (*ConnectionArgs, error) { } } - // Get command from remaining args (non-flag arguments) // Get command from remaining args (non-flag arguments) if len(nonFlagArgs) > 0 { - command := strings.Join(nonFlagArgs, " ") - result.Command = &command - fmt.Printf("COMMAND: %s\n", command) + // Check if this is SQL mode or ContextEngine mode + // SQL mode: single argument that looks like SQL (e.g., "LIST DATASETS") + // ContextEngine mode: multiple arguments (e.g., "ls", "datasets") + if len(nonFlagArgs) == 1 && looksLikeSQL(nonFlagArgs[0]) { + // SQL mode: single argument that looks like SQL + result.IsSQLMode = true + command := nonFlagArgs[0] + result.Command = &command + } else { + // ContextEngine mode: multiple arguments + result.IsSQLMode = false + result.CommandArgs = nonFlagArgs + // Also store joined version for backward compatibility + command := strings.Join(nonFlagArgs, " ") + result.Command = &command + } } return result, nil @@ -345,6 +375,7 @@ Options: -p, --password string Password for authentication -f, --config string Path to config file (YAML format) -o, --output string Output format: table, plain, json (search defaults to json) + -v, --verbose Enable verbose logging (shows debug info) --admin, -admin Run in admin mode --help Show this help message @@ -373,7 +404,11 @@ Configuration File: Commands: SQL commands (use quotes): "LIST USERS", "CREATE USER 'email' 'password'", etc. - Context Engine commands (no quotes): ls datasets, search "keyword", cat path, etc. + Filesystem commands (no quotes): ls datasets, search "keyword", cat path, etc. + Skill commands: + install-skill [options] Install a skill from local path or remote URL + uninstall-skill Remove an installed skill + search skills -q [--space space1] Search skills in a space If no command is provided, CLI runs in interactive mode.`) } @@ -386,13 +421,13 @@ const historyFileName = ".ragflow_cli_history" // CLI represents the command line interface type CLI struct { - client *RAGFlowClient - contextEngine *contextengine.Engine - prompt string - running bool - line *liner.State - args *ConnectionArgs - outputFormat OutputFormat // Output format + client *RAGFlowClient + contextEngine *filesystem.Engine + prompt string + running bool + line *liner.State + args *ConnectionArgs + outputFormat OutputFormat // Output format } // NewCLI creates a new CLI instance @@ -451,10 +486,11 @@ func NewCLIWithArgs(args *ConnectionArgs) (*CLI, error) { prompt = "RAGFlow(admin)> " } - // Create context engine and register providers - engine := contextengine.NewEngine() - engine.RegisterProvider(contextengine.NewDatasetProvider(&httpClientAdapter{client: client.HTTPClient})) - engine.RegisterProvider(contextengine.NewFileProvider(&httpClientAdapter{client: client.HTTPClient})) + // Create filesystem engine and register providers + engine := filesystem.NewEngine() + engine.RegisterProvider(filesystem.NewDatasetProvider(&httpClientAdapter{client: client.HTTPClient})) + engine.RegisterProvider(filesystem.NewFileProvider(&httpClientAdapter{client: client.HTTPClient})) + engine.RegisterProvider(filesystem.NewSkillProvider(&httpClientAdapter{client: client.HTTPClient})) return &CLI{ prompt: prompt, @@ -587,7 +623,7 @@ func (c *CLI) execute(input string) error { } } - // Check if we should use SQL mode or ContextEngine mode + // Check if we should use SQL mode or Filesystem mode isSQLMode := false if c.args != nil && len(c.args.CommandArgs) > 0 { // Non-interactive mode: use pre-determined mode from args @@ -617,12 +653,12 @@ func (c *CLI) execute(input string) error { return err } - // ContextEngine mode: execute context engine command - return c.executeContextEngine(input) + // Filesystem mode: execute filesystem command + return c.executeFilesystem(input) } -// executeContextEngine executes a Context Engine command -func (c *CLI) executeContextEngine(input string) error { +// executeFilesystem executes a Filesystem command +func (c *CLI) executeFilesystem(input string) error { // Parse input into arguments var args []string if c.args != nil && len(c.args.CommandArgs) > 0 { @@ -630,23 +666,23 @@ func (c *CLI) executeContextEngine(input string) error { args = c.args.CommandArgs } else { // Interactive mode: parse input - args = parseContextEngineArgs(input) + args = parseFilesystemArgs(input) } if len(args) == 0 { return fmt.Errorf("no command provided") } - // Check if we have a context engine + // Check if we have a filesystem engine if c.contextEngine == nil { - return fmt.Errorf("context engine not available") + return fmt.Errorf("filesystem engine not available") } cmdType := args[0] cmdArgs := args[1:] - // Build context engine command - var ceCmd *contextengine.Command + // Build filesystem command + var ceCmd *filesystem.Command switch cmdType { case "ls", "list": @@ -659,8 +695,8 @@ func (c *CLI) executeContextEngine(input string) error { // Help was printed return nil } - ceCmd = &contextengine.Command{ - Type: contextengine.CommandList, + ceCmd = &filesystem.Command{ + Type: filesystem.CommandList, Path: listOpts.Path, Params: map[string]interface{}{ "limit": listOpts.Limit, @@ -682,8 +718,45 @@ func (c *CLI) executeContextEngine(input string) error { if len(searchOpts.Dirs) > 0 { searchPath = searchOpts.Dirs[0] } - ceCmd = &contextengine.Command{ - Type: contextengine.CommandSearch, + // Check if searching skills (supports: "skills" or "skills/space1") + if searchPath == "skills" || strings.HasPrefix(searchPath, "skills/") { + // Parse space ID from path (e.g., "skills/space1" -> "space1") + spaceID := "default" + if strings.HasPrefix(searchPath, "skills/") { + spaceID = strings.TrimPrefix(searchPath, "skills/") + if spaceID == "" { + spaceID = "default" + } + } + // Get skill provider and perform search + provider := c.contextEngine.GetProvider("skills") + if provider == nil { + return fmt.Errorf("skill provider not available") + } + skillProvider, ok := provider.(*filesystem.SkillProvider) + if !ok { + return fmt.Errorf("invalid skill provider type") + } + pageSize := searchOpts.TopK + if pageSize <= 0 { + pageSize = 10 + } + searchOptions := &filesystem.SearchOptions{ + Query: searchOpts.Query, + Limit: pageSize, + Offset: 0, + TopK: pageSize, + } + result, err := skillProvider.Search(context.Background(), spaceID, searchOptions) + if err != nil { + return err + } + // Print skill search results with full details + c.printSkillSearchResults(result, c.outputFormat) + return nil + } + ceCmd = &filesystem.Command{ + Type: filesystem.CommandSearch, Path: searchPath, Params: map[string]interface{}{ "query": searchOpts.Query, @@ -709,8 +782,66 @@ func (c *CLI) executeContextEngine(input string) error { fmt.Println(string(content)) return nil + case "install-skill": + // Get the file provider and skill provider from the engine + fileProvider, ok := c.contextEngine.GetProvider("files").(*filesystem.FileProvider) + if !ok { + return fmt.Errorf("file provider not available") + } + skillProvider := c.contextEngine.GetProvider("skills") + if skillProvider == nil { + return fmt.Errorf("skill provider not available") + } + // Create adapter for HTTPClient + httpAdapter := &httpClientAdapter{client: c.client.HTTPClient} + cmd := filesystem.NewInstallSkillCommand(httpAdapter, fileProvider, skillProvider) + return cmd.Execute(cmdArgs) + case "uninstall-skill": + skillProvider := c.contextEngine.GetProvider("skills") + if skillProvider == nil { + return fmt.Errorf("skill provider not available") + } + fileProvider := c.contextEngine.GetProvider("files") + if fileProvider == nil { + return fmt.Errorf("file provider not available") + } + // Create adapter for HTTPClient + httpAdapter := &httpClientAdapter{client: c.client.HTTPClient} + fileProv, _ := fileProvider.(*filesystem.FileProvider) + cmd := filesystem.NewUninstallSkillCommand(httpAdapter, skillProvider, fileProv) + return cmd.Execute(cmdArgs) + case "add-skill": + fmt.Println("⚠ Warning: 'add-skill' is deprecated. Use 'install-skill' instead.") + // Forward to install-skill + fileProvider, ok := c.contextEngine.GetProvider("files").(*filesystem.FileProvider) + if !ok { + return fmt.Errorf("file provider not available") + } + skillProvider := c.contextEngine.GetProvider("skills") + if skillProvider == nil { + return fmt.Errorf("skill provider not available") + } + httpAdapter := &httpClientAdapter{client: c.client.HTTPClient} + cmd := filesystem.NewInstallSkillCommand(httpAdapter, fileProvider, skillProvider) + return cmd.Execute(cmdArgs) + case "delete-skill": + fmt.Println("⚠ Warning: 'delete-skill' is deprecated. Use 'uninstall-skill' instead.") + // Forward to uninstall-skill + skillProvider := c.contextEngine.GetProvider("skills") + if skillProvider == nil { + return fmt.Errorf("skill provider not available") + } + fileProvider := c.contextEngine.GetProvider("files") + if fileProvider == nil { + return fmt.Errorf("file provider not available") + } + httpAdapter := &httpClientAdapter{client: c.client.HTTPClient} + fileProv, _ := fileProvider.(*filesystem.FileProvider) + cmd := filesystem.NewUninstallSkillCommand(httpAdapter, skillProvider, fileProv) + return cmd.Execute(cmdArgs) + default: - return fmt.Errorf("unknown context engine command: %s", cmdType) + return fmt.Errorf("unknown filesystem command: %s", cmdType) } // Execute the command @@ -722,23 +853,23 @@ func (c *CLI) executeContextEngine(input string) error { // Print result // For search command, default to JSON format if not explicitly set to plain/table format := c.outputFormat - if ceCmd.Type == contextengine.CommandSearch && format != OutputFormatPlain && format != OutputFormatTable { + if ceCmd.Type == filesystem.CommandSearch && format != OutputFormatPlain && format != OutputFormatTable { format = OutputFormatJSON } // Get limit for list command limit := 0 - if ceCmd.Type == contextengine.CommandList { + if ceCmd.Type == filesystem.CommandList { if l, ok := ceCmd.Params["limit"].(int); ok { limit = l } } - c.printContextEngineResult(result, ceCmd.Type, format, limit) + c.printFilesystemResult(result, ceCmd.Type, format, limit) return nil } -// parseContextEngineArgs parses Context Engine command arguments +// parseFilesystemArgs parses Filesystem command arguments // Supports simple space-separated args and quoted strings -func parseContextEngineArgs(input string) []string { +func parseFilesystemArgs(input string) []string { var args []string var current strings.Builder inQuote := false @@ -780,14 +911,14 @@ func parseContextEngineArgs(input string) []string { return args } -// printContextEngineResult prints the result of a context engine command -func (c *CLI) printContextEngineResult(result *contextengine.Result, cmdType contextengine.CommandType, format OutputFormat, limit int) { +// printFilesystemResult prints the result of a filesystem command +func (c *CLI) printFilesystemResult(result *filesystem.Result, cmdType filesystem.CommandType, format OutputFormat, limit int) { if result == nil { return } switch cmdType { - case contextengine.CommandList: + case filesystem.CommandList: if len(result.Nodes) == 0 { fmt.Println("(empty)") return @@ -824,7 +955,7 @@ func (c *CLI) printContextEngineResult(result *contextengine.Result, cmdType con fmt.Printf("\n... and %d more (use -n to show more)\n", result.Total-limit) } fmt.Printf("Total: %d\n", result.Total) - case contextengine.CommandSearch: + case filesystem.CommandSearch: if len(result.Nodes) == 0 { if format == OutputFormatJSON { fmt.Println("[]") @@ -921,13 +1052,103 @@ func (c *CLI) printContextEngineResult(result *contextengine.Result, cmdType con fmt.Println(sep) fmt.Printf("Total: %d\n", result.Total) } - case contextengine.CommandCat: + case filesystem.CommandCat: // Cat output is handled differently - it returns []byte, not *Result // This case should not be reached in normal flow since Cat returns []byte directly fmt.Println("Content retrieved") } } +// printSkillSearchResults prints skill search results with full details +func (c *CLI) printSkillSearchResults(result *filesystem.Result, format OutputFormat) { + if result == nil || len(result.Nodes) == 0 { + if format == OutputFormatJSON { + fmt.Println("[]") + } else { + fmt.Println("No skills found") + } + return + } + + // Skill search result structure + type skillSearchResult struct { + SkillID string `json:"skill_id"` + Name string `json:"name"` + Description string `json:"description"` + Tags string `json:"tags"` + Score float64 `json:"score"` + BM25Score float64 `json:"bm25_score"` + VectorScore float64 `json:"vector_score"` + } + + results := make([]skillSearchResult, 0, len(result.Nodes)) + for _, node := range result.Nodes { + // Extract metadata + skillID := "" + if id, ok := node.Metadata["skill_id"].(string); ok { + skillID = id + } + description := "" + if desc, ok := node.Metadata["description"].(string); ok { + description = desc + } + tags := "" + if t, ok := node.Metadata["tags"].([]string); ok { + tags = strings.Join(t, ", ") + } + var score, bm25Score, vectorScore float64 + if s, ok := node.Metadata["score"].(float64); ok { + score = s + } + if b, ok := node.Metadata["bm25_score"].(float64); ok { + bm25Score = b + } + if v, ok := node.Metadata["vector_score"].(float64); ok { + vectorScore = v + } + + results = append(results, skillSearchResult{ + SkillID: skillID, + Name: node.Name, + Description: description, + Tags: tags, + Score: score, + BM25Score: bm25Score, + VectorScore: vectorScore, + }) + } + + if format == OutputFormatJSON { + jsonData, err := json.MarshalIndent(results, "", " ") + if err != nil { + fmt.Printf("Error marshaling JSON: %v\n", err) + return + } + fmt.Println(string(jsonData)) + } else if format == OutputFormatPlain { + fmt.Printf("Found %d skill(s):\n", len(results)) + for _, sr := range results { + fmt.Printf("\nName: %s\n", sr.Name) + fmt.Printf("Skill ID: %s\n", sr.SkillID) + fmt.Printf("Description: %s\n", sr.Description) + fmt.Printf("Tags: %s\n", sr.Tags) + fmt.Printf("Score: %.6f (BM25: %.6f, Vector: %.6f)\n", sr.Score, sr.BM25Score, sr.VectorScore) + } + } else { + // Table format + fmt.Printf("Found %d skill(s):\n", len(results)) + fmt.Println() + for _, sr := range results { + fmt.Printf("Name: %s\n", sr.Name) + fmt.Printf("Skill ID: %s\n", sr.SkillID) + fmt.Printf("Description: %s\n", sr.Description) + fmt.Printf("Tags: %s\n", sr.Tags) + fmt.Printf("Score: %.6f (BM25: %.6f, Vector: %.6f)\n", sr.Score, sr.BM25Score, sr.VectorScore) + fmt.Println() + } + } +} + func (c *CLI) handleMetaCommand(cmd *Command) error { command := cmd.Params["command"].(string) args, _ := cmd.Params["args"].([]string) @@ -1021,7 +1242,7 @@ Commands (User Mode): CHAT 'message'; - Chat using current model CHAT 'provider/instance/model' 'message'; - Chat with specified model -Context Engine Commands (no quotes): +Filesystem Commands (no quotes): ls [path] - List resources e.g., ls - List root (providers and folders) e.g., ls datasets - List all datasets @@ -1036,7 +1257,7 @@ Context Engine Commands (no quotes): Examples: ragflow_cli -f rf.yml "LIST USERS" # SQL mode (with quotes) - ragflow_cli -f rf.yml ls datasets # Context Engine mode (no quotes) + ragflow_cli -f rf.yml ls datasets # Filesystem mode (no quotes) ragflow_cli -f rf.yml ls files # List files in root ragflow_cli -f rf.yml cat datasets # Error: datasets is a directory ragflow_cli -f rf.yml ls files/myfolder # List folder contents @@ -1079,7 +1300,7 @@ func (c *CLI) RunSingleCommand(command *string) error { defer c.Cleanup() // Execute the command - if err := c.executeNew(*command); err != nil { + if err := c.execute(*command); err != nil { return err } return nil @@ -1141,7 +1362,7 @@ type ListCommandOptions struct { } // parseSearchCommandArgs parses search command arguments -// Format: search [-d dir1] [-d dir2] ... -q query [-k top_k] [-t threshold] +// Format: search [path] [-n number] // // search -h|--help (shows help) func parseSearchCommandArgs(args []string) (*SearchCommandOptions, error) { @@ -1160,77 +1381,45 @@ func parseSearchCommandArgs(args []string) (*SearchCommandOptions, error) { } // Parse arguments + // Format: search [path] [-n number] i := 0 for i < len(args) { arg := args[i] - switch arg { - case "-d", "--dir": - if i+1 >= len(args) { - return nil, fmt.Errorf("missing value for %s flag", arg) - } - opts.Dirs = append(opts.Dirs, args[i+1]) - i += 2 - case "-q", "--query": - if i+1 >= len(args) { - return nil, fmt.Errorf("missing value for %s flag", arg) - } - opts.Query = args[i+1] - i += 2 - case "-k", "--top-k": + // Handle -n flag for number of results + if arg == "-n" || arg == "--number" { if i+1 >= len(args) { return nil, fmt.Errorf("missing value for %s flag", arg) } topK, err := strconv.Atoi(args[i+1]) if err != nil { - return nil, fmt.Errorf("invalid top-k value: %s", args[i+1]) + return nil, fmt.Errorf("invalid number value: %s", args[i+1]) } opts.TopK = topK i += 2 - case "-t", "--threshold": - if i+1 >= len(args) { - return nil, fmt.Errorf("missing value for %s flag", arg) - } - threshold, err := strconv.ParseFloat(args[i+1], 64) - if err != nil { - return nil, fmt.Errorf("invalid threshold value: %s", args[i+1]) - } - opts.Threshold = threshold - i += 2 - default: - // If it doesn't start with -, it might be a positional argument - if !strings.HasPrefix(arg, "-") { - // For backwards compatibility: if no -q flag and this is the last arg, treat as query - if opts.Query == "" && i == len(args)-1 { - opts.Query = arg - } else if opts.Query == "" && len(args) > 0 && i < len(args)-1 { - // Old format: search [path] query - // Treat first non-flag as path, rest as query - opts.Dirs = append(opts.Dirs, arg) - // Join remaining args as query - remainingArgs := args[i+1:] - queryParts := []string{} - for _, part := range remainingArgs { - if !strings.HasPrefix(part, "-") { - queryParts = append(queryParts, part) - } - } - opts.Query = strings.Join(queryParts, " ") - break - } - } else { - return nil, fmt.Errorf("unknown flag: %s", arg) - } - i++ + continue } + + // If it starts with -, it's an unknown flag + if strings.HasPrefix(arg, "-") { + return nil, fmt.Errorf("unknown flag: %s", arg) + } + + // Non-flag arguments: first is query, second is path + if opts.Query == "" { + opts.Query = arg + } else if len(opts.Dirs) == 0 { + opts.Dirs = append(opts.Dirs, arg) + } + i++ } // Validate required parameters if opts.Query == "" { - return nil, fmt.Errorf("query is required (use -q or --query)") + return nil, fmt.Errorf("query is required") } - // If no directories specified, search in all datasets (empty path means all) + // If no path specified, default to "datasets" if len(opts.Dirs) == 0 { opts.Dirs = []string{"datasets"} } @@ -1240,30 +1429,34 @@ func parseSearchCommandArgs(args []string) (*SearchCommandOptions, error) { // printSearchHelp prints help for the search command func printSearchHelp() { - help := `Search command usage: search [options] + help := `Search command usage: search [path] [-n number] -Search for content in datasets. Currently only supports searching in datasets. +Search for content in datasets or skills. + +Arguments: + Search query (required) + Example: "machine learning" + [path] Path to search in (default: datasets) + Supports: + - 'datasets' (all datasets) + - 'datasets/' (specific dataset) + - 'skills' (default skill space) + - 'skills/' (specific skill space) + Example: skills/space1 Options: - -d, --dir Directory to search in (can be specified multiple times) - Currently only supports paths under 'datasets/' - Example: -d datasets/kb1 -d datasets/kb2 - -q, --query Search query (required) - Example: -q "machine learning" - -k, --top-k Number of top results to return (default: 10) - Example: -k 20 - -t, --threshold Similarity threshold, 0.0-1.0 (default: 0.2) - Example: -t 0.5 + -n, --number Number of results to return (default: 10) + Example: -n 20 -h, --help Show this help message Output: Default output format is JSON. Use --output plain or --output table for other formats. Examples: - search -d datasets/kb1 -q "neural networks" # Search in kb1 (JSON output) - search -d datasets/kb1 -q "AI" --output plain # Search with plain text output - search -q "data mining" # Search all datasets - search -q "RAG" -k 20 -t 0.5 # Return 20 results with threshold 0.5 + search "neural networks" # Search all datasets + search "AI" datasets/kb1 # Search in kb1 + search "RAG" skills/space1 -n 20 # Search skills in hub1, return 20 results + search "data processing" skills # Search skills (default space) ` fmt.Println(help) } diff --git a/internal/cli/client.go b/internal/cli/client.go index f92aeb2d9c..861a265c1e 100644 --- a/internal/cli/client.go +++ b/internal/cli/client.go @@ -18,7 +18,9 @@ package cli import ( "fmt" - ce "ragflow/internal/cli/contextengine" + "io" + + ce "ragflow/internal/cli/filesystem" ) // PasswordPromptFunc is a function type for password input @@ -41,7 +43,6 @@ type RAGFlowClient struct { CurrentModel *CurrentModel // Current model configuration } -// NewRAGFlowClient creates a new RAGFlow client func NewRAGFlowClient(serverType string) *RAGFlowClient { httpClient := NewHTTPClient() // Set port from configuration file based on server type @@ -68,6 +69,8 @@ func (c *RAGFlowClient) initContextEngine() { // Register providers engine.RegisterProvider(ce.NewDatasetProvider(&httpClientAdapter{c.HTTPClient})) + engine.RegisterProvider(ce.NewFileProvider(&httpClientAdapter{c.HTTPClient})) + engine.RegisterProvider(ce.NewSkillProvider(&httpClientAdapter{c.HTTPClient})) c.ContextEngine = engine } @@ -101,6 +104,10 @@ func (a *httpClientAdapter) Request(method, path string, useAPIBase bool, authKi }, nil } +func (a *httpClientAdapter) UploadMultipart(path string, contentType string, body io.Reader) error { + return a.client.UploadMultipart(path, contentType, body) +} + // ExecuteCommand executes a parsed command // Returns benchmark result map for commands that support it (e.g., ping_server with iterations > 1) func (c *RAGFlowClient) ExecuteCommand(cmd *Command) (ResponseIf, error) { @@ -288,14 +295,10 @@ func (c *RAGFlowClient) ExecuteUserCommand(cmd *Command) (ResponseIf, error) { case "remove_chunks": return c.RemoveChunks(cmd) // ContextEngine commands - case "context_list": - return c.ContextList(cmd) - case "context_cat": - return c.ContextCat(cmd) - case "context_search": - return c.ContextSearch(cmd) case "ce_ls": return c.CEList(cmd) + case "ce_cat": + return c.CECat(cmd) case "ce_search": return c.CESearch(cmd) // TODO: Implement other commands diff --git a/internal/cli/context_command.go b/internal/cli/context_command.go deleted file mode 100644 index 353601e033..0000000000 --- a/internal/cli/context_command.go +++ /dev/null @@ -1,135 +0,0 @@ -// -// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -package cli - -import ( - "fmt" -) - -func (c *RAGFlowClient) ContextList(cmd *Command) (ResponseIf, error) { - if c.HTTPClient.APIToken == "" && c.HTTPClient.LoginToken == "" { - return nil, fmt.Errorf("API token not set. Please login first") - } - if c.ServerType != "user" { - return nil, fmt.Errorf("this command is only allowed in USER mode") - } - - var path string - var ok bool - if cmd.Params["path"] != nil { - path, ok = cmd.Params["path"].(string) - if !ok { - return nil, fmt.Errorf("fail to convert 'path' to string") - } - } - - if path == "" { - path = "." - } - - var parameter string - if cmd.Params["parameter"] != nil { - parameter, ok = cmd.Params["parameter"].(string) - if !ok { - return nil, fmt.Errorf("fail to convert 'parameter' to string") - } - } - - if parameter == "" { - fmt.Printf("ls %s\n", path) - } else { - fmt.Printf("ls %s -%s\n", path, parameter) - } - - // Convert to response - var response ContextListResponse - response.OutputFormat = c.OutputFormat - response.Code = 0 - response.Data = nil - - return &response, nil -} - -func (c *RAGFlowClient) ContextCat(cmd *Command) (ResponseIf, error) { - if c.HTTPClient.APIToken == "" && c.HTTPClient.LoginToken == "" { - return nil, fmt.Errorf("API token not set. Please login first") - } - if c.ServerType != "user" { - return nil, fmt.Errorf("this command is only allowed in USER mode") - } - - path, ok := cmd.Params["filename"].(string) - if !ok { - return nil, fmt.Errorf("fail to convert 'filename' to string") - } - - fmt.Printf("cat %s\n", path) - - // Convert to response - var response ContextListResponse - response.OutputFormat = c.OutputFormat - response.Code = 0 - response.Data = nil - - return &response, nil -} - -func (c *RAGFlowClient) ContextSearch(cmd *Command) (ResponseIf, error) { - if c.HTTPClient.APIToken == "" && c.HTTPClient.LoginToken == "" { - return nil, fmt.Errorf("API token not set. Please login first") - } - if c.ServerType != "user" { - return nil, fmt.Errorf("this command is only allowed in USER mode") - } - - path, ok := cmd.Params["path"].(string) - if !ok { - return nil, fmt.Errorf("fail to convert 'path' to string") - } - - query, ok := cmd.Params["query"].(string) - if !ok { - return nil, fmt.Errorf("fail to convert 'parameter' to float64") - } - - number := 10 - if cmd.Params["number"] != nil { - number, ok = cmd.Params["number"].(int) - if !ok { - return nil, fmt.Errorf("fail to convert 'number' to int") - } - } - - //threshold := 0.0 - //if cmd.Params["threshold"] != nil { - // threshold, ok = cmd.Params["threshold"].(float64) - // if !ok { - // return nil, fmt.Errorf("fail to convert 'threshold' to float64") - // } - //} - - fmt.Printf("search query: %s, path: %s, number: %d\n", query, path, number) - - // Convert to response - var response ContextSearchResponse - response.OutputFormat = c.OutputFormat - response.Code = 0 - response.Total = 0 - response.Data = nil - - return &response, nil -} diff --git a/internal/cli/context_parser.go b/internal/cli/context_parser.go index 4492109cb4..324ec39c07 100644 --- a/internal/cli/context_parser.go +++ b/internal/cli/context_parser.go @@ -24,7 +24,7 @@ import ( func (p *Parser) parseContextListCommand() (*Command, error) { p.nextToken() // consume LS - cmd := NewCommand("context_list") + cmd := NewCommand("ce_ls") if p.curToken.Type == TokenEOF { cmd.Params["path"] = "." @@ -70,7 +70,7 @@ func (p *Parser) parseContextCatCommand() (*Command, error) { return nil, fmt.Errorf("expect a filename") } - cmd := NewCommand("context_cat") + cmd := NewCommand("ce_cat") if p.curToken.Type == TokenIdentifier { for p.curToken.Type != TokenEOF { if p.curToken.Type != TokenIdentifier { @@ -114,7 +114,7 @@ func (p *Parser) parseContextCatCommand() (*Command, error) { func (p *Parser) parseContextSearchCommand() (*Command, error) { p.nextToken() // consume SEARCH - cmd := NewCommand("context_search") + cmd := NewCommand("ce_search") for p.curToken.Type != TokenEOF { if p.curToken.Type == TokenDash { diff --git a/internal/cli/contextengine/README.md b/internal/cli/contextengine/README.md deleted file mode 100644 index 26548823aa..0000000000 --- a/internal/cli/contextengine/README.md +++ /dev/null @@ -1,49 +0,0 @@ -# ContextFS - Context Engine File System - -ContextFS is a context engine interface for RAGFlow, providing users with a Unix-like file system interface to manage datasets, tools, skills, and memories. - -## Directory Structure - -``` -user_id/ -├── datasets/ -│ └── my_dataset/ -│ └── ... -├── tools/ -│ ├── registry.json -│ └── tool_name/ -│ ├── DOC.md -│ └── ... -├── skills/ -│ ├── registry.json -│ └── skill_name/ -│ ├── SKILL.md -│ └── ... -└── memories/ - └── memory_id/ - ├── sessions/ - │ ├── messages/ - │ ├── summaries/ - │ │ └── session_id/ - │ │ └── summary-{datetime}.md - │ └── tools/ - │ └── session_id/ - │ └── {tool_name}.md # User level of memory on Tools usage - ├── users/ - │ ├── profile.md - │ ├── preferences/ - │ └── entities/ - └── agents/ - └── agent_space/ - ├── tools/ - │ └── {tool_name}.md # Agent level of memory on Tools usage - └── skills/ - └── {skill_name}.md # Agent level of memory on Skills usage -``` - - -## Supported Commands - -- `ls [path]` - List directory contents -- `cat ` - Display file contents(only for text files) -- `search ` - Search content diff --git a/internal/cli/filesystem/README.md b/internal/cli/filesystem/README.md new file mode 100644 index 0000000000..a4cbbac32e --- /dev/null +++ b/internal/cli/filesystem/README.md @@ -0,0 +1,195 @@ +# ContextEngine Filesystem + +The ContextEngine Filesystem is a filesystem interface for RAGFlow, providing users with a Unix-like file system interface to manage datasets, tools, skills, and memories. + +## Directory Structure + +``` +user_id/ +├── datasets/ +│ └── my_dataset/ +│ └── ... +├── tools/ +│ ├── registry.json +│ └── tool_name/ +│ ├── DOC.md +│ └── ... +├── skills/ +│ └── skill_name/ +│ └── version +. ├──SKILL.md +. └── ... +└── memories/ + └── memory_id/ + ├── sessions/ + │ ├── messages/ + │ ├── summaries/ + │ │ └── session_id/ + │ │ └── summary-{datetime}.md + │ └── tools/ + │ └── session_id/ + │ └── {tool_name}.md # User level of memory on Tools usage + ├── users/ + │ ├── profile.md + │ ├── preferences/ + │ └── entities/ + └── agents/ + └── agent_space/ + ├── tools/ + │ └── {tool_name}.md # Agent level of memory on Tools usage + └── skills/ + └── {skill_name}.md # Agent level of memory on Skills usage +``` + + +## Supported Commands + +- `ls [path]` - List directory contents +- `cat ` - Display file contents(only for text files) +- `search path` - Search content +- `install-skill [options]` - Install a skill from multiple sources +- `uninstall-skill ` - Uninstall a skill + +### Skill Management Commands + +#### install-skill + +Install a skill from multiple sources into a RAGFlow space. + +**Usage:** +```bash +install-skill [options] +``` + +**Arguments:** +- `` - Target skills space ID (required) +- `` - Skill source reference (required) + +**Supported Sources:** + +| Source Type | Format | Example | +|------------|--------|---------| +| **Local** | `./path` or `/absolute/path` | `./my-skill`, `/home/user/skills/awesome` | +| **GitHub** | `github.com/owner/repo/path` | `github.com/openai/skills/skill-creator` | +| **ClawHub** | `clawhub://owner/skill-name` or `clawhub.ai/owner/skill-name` | `clawhub://pskoett/self-improving-agent` | +| **skills.sh** | `skill://skill-name` or `skills.sh/skill/name` | `skill://kubernetes` | + +**Options:** +- `-v, --version ` - Specify skill version (default: from SKILL.md or 1.0.0) +- `-n, --name ` - Override skill name (default: from SKILL.md) +- `-f, --force` - Force reinstall if skill exists (deletes existing first and updates index) +- `--skip-verify` - Skip security verification (use with caution) +- `-h, --help` - Show help message + +**Security Scanning:** + +By default, all skills are scanned for potential security threats: +- **Data exfiltration**: Environment variable access, secret leakage, `.ssh` access +- **Prompt injection**: DAN mode, instruction override attempts, role hijacking +- **Destructive commands**: `rm -rf /`, `mkfs`, disk overwrite operations +- **Persistence mechanisms**: Cron jobs, shell RC modification, SSH backdoors +- **Network threats**: Reverse shells, tunneling services, exfiltration endpoints +- **Obfuscation**: Base64 piped to shell, `eval()` usage, encoded execution + +**Trust Levels:** +- `builtin` - Official RAGFlow skills (always allowed) +- `trusted` - `openai/skills`, `anthropics/skills`, `microsoft/skills`, `google/skills` (caution allowed) +- `community` - All other sources (findings blocked unless `--force`) + +**Examples:** +```bash +# Install from local path +install-skill my-space ./my-local-skill + +# Install from GitHub +install-skill my-space github.com/openai/skills/skill-creator + +# Install from ClawHub +install-skill my-space clawhub://user/web-search + +# Install from Skills.sh +install-skill my-space skills.sh/xixu-me/skills/readme-i18n + +# Force reinstall (delete existing and reinstall, update index) +install-skill my-space ./my-skill --force + +# Force install with custom name, skip security check +install-skill my-space clawhub://unknown-skill --force --name my-skill --skip-verify + +# Install specific version +install-skill my-space skill://kubernetes --version 2.1.0 +``` + +#### uninstall-skill + +Remove a skill from RAGFlow and delete its search index. + +**Usage:** +```bash +uninstall-skill +``` + +**Arguments:** +- `` - Skills space ID (required) +- `` - Name of the skill to uninstall (required) + +**Examples:** +```bash +uninstall-skill my-space my-skill +``` + +#### Deprecated Commands + +- `add-skill` - Deprecated, use `install-skill` instead +- `delete-skill` - Deprecated, use `uninstall-skill` instead + +## File Structure Requirements + +### Skill Directory + +A valid skill directory must contain: +- `SKILL.md` - Required. Skill metadata and instructions in YAML frontmatter format + +Optional files: +- Additional documentation (`.md`, `.mdx`) +- Code files (`.py`, `.js`, `.ts`, etc.) +- Configuration files (`.json`, `.yaml`, `.toml`) + +### SKILL.md Frontmatter + +```yaml +--- +name: my-skill +description: A brief description of what this skill does +version: 1.0.0 +author: Your Name +tags: + - category1 + - category2 +--- +``` + +## Security Architecture + +The skill management system implements defense-in-depth security: + +1. **Source Validation**: All remote sources use HTTPS and verify SSL certificates +2. **Quarantine**: Downloaded skills are isolated before installation +3. **Static Analysis**: Regex-based scanning for 100+ threat patterns across 6 categories: + - Exfiltration: Environment variable access, secret leakage + - Injection: Prompt injection, jailbreak attempts + - Destructive: Dangerous filesystem operations + - Persistence: Backdoors, startup file modification + - Network: Reverse shells, unauthorized tunneling + - Obfuscation: Encoded execution, download-and-run +4. **Trust Tiers**: Different security policies based on source reputation +5. **User Confirmation**: High-risk installations require explicit `--force` +6. **Audit Logging**: All installations are logged with scan results + +## Validation Rules + +- Total size must not exceed 50MB +- Individual files must not exceed 5MB +- Only text files are allowed (no binaries) +- Skill name must be lowercase alphanumeric with hyphens/underscores +- Hidden files and directories are ignored diff --git a/internal/cli/contextengine/provider.go b/internal/cli/filesystem/base.go similarity index 99% rename from internal/cli/contextengine/provider.go rename to internal/cli/filesystem/base.go index 605a39b890..25ace4d7e7 100644 --- a/internal/cli/contextengine/provider.go +++ b/internal/cli/filesystem/base.go @@ -14,7 +14,7 @@ // limitations under the License. // -package contextengine +package filesystem import ( stdctx "context" diff --git a/internal/cli/contextengine/dataset_provider.go b/internal/cli/filesystem/dataset.go similarity index 99% rename from internal/cli/contextengine/dataset_provider.go rename to internal/cli/filesystem/dataset.go index daf3e41e4a..27ba475c35 100644 --- a/internal/cli/contextengine/dataset_provider.go +++ b/internal/cli/filesystem/dataset.go @@ -14,9 +14,10 @@ // limitations under the License. // -package contextengine +package filesystem import ( + "io" stdctx "context" "encoding/json" "fmt" @@ -36,6 +37,7 @@ type HTTPResponse struct { // HTTPClientInterface defines the interface needed from HTTPClient type HTTPClientInterface interface { Request(method, path string, useAPIBase bool, authKind string, headers map[string]string, jsonBody map[string]interface{}) (*HTTPResponse, error) + UploadMultipart(path string, contentType string, body io.Reader) error } // DatasetProvider handles datasets and their documents @@ -508,7 +510,7 @@ func (p *DatasetProvider) listDocuments(ctx stdctx.Context, datasetName string, } var apiResp struct { - Code int `json:"code"` + Code int `json:"code"` Data struct { Docs []map[string]interface{} `json:"docs"` } `json:"data"` diff --git a/internal/cli/contextengine/engine.go b/internal/cli/filesystem/engine.go similarity index 94% rename from internal/cli/contextengine/engine.go rename to internal/cli/filesystem/engine.go index 9f34aa9203..7afaf6df91 100644 --- a/internal/cli/contextengine/engine.go +++ b/internal/cli/filesystem/engine.go @@ -14,7 +14,7 @@ // limitations under the License. // -package contextengine +package filesystem import ( stdctx "context" @@ -23,13 +23,13 @@ import ( "time" ) -// Engine is the core of the Context Engine +// Engine is the core of the Virtual Filesystem // It manages providers and routes commands to the appropriate provider type Engine struct { providers []Provider } -// NewEngine creates a new Context Engine +// NewEngine creates a new Virtual Filesystem Engine func NewEngine() *Engine { return &Engine{ providers: make([]Provider, 0), @@ -136,6 +136,8 @@ func (e *Engine) List(ctx stdctx.Context, path string, opts *ListOptions) (*Resu // 2. Top-level folders from files provider (file_manager) func (e *Engine) listRoot(ctx stdctx.Context, opts *ListOptions) (*Result, error) { nodes := make([]*Node, 0) + // Track names to avoid duplicates + seen := make(map[string]bool) // Add built-in providers first (like datasets) for _, p := range e.providers { @@ -152,6 +154,7 @@ func (e *Engine) listRoot(ctx stdctx.Context, opts *ListOptions) (*Result, error "description": p.Description(), }, }) + seen[p.Name()] = true } // Add top-level folders from files provider (file_manager) @@ -161,6 +164,11 @@ func (e *Engine) listRoot(ctx stdctx.Context, opts *ListOptions) (*Result, error for _, node := range filesResult.Nodes { // Only add folders (directories), not files if node.Type == NodeTypeDirectory { + // Skip if already added by a provider + if seen[node.Name] { + continue + } + seen[node.Name] = true // Ensure path doesn't have /files/ prefix for display node.Path = strings.TrimPrefix(node.Path, "files/") node.Path = strings.TrimPrefix(node.Path, "/") @@ -186,6 +194,16 @@ func (e *Engine) getFileProvider() Provider { return nil } +// GetProvider returns a provider by name +func (e *Engine) GetProvider(name string) Provider { + for _, p := range e.providers { + if p.Name() == name { + return p + } + } + return nil +} + // Search searches for nodes matching the query func (e *Engine) Search(ctx stdctx.Context, path string, opts *SearchOptions) (*Result, error) { provider, subPath, err := e.resolveProvider(path) diff --git a/internal/cli/contextengine/file_provider.go b/internal/cli/filesystem/file.go similarity index 87% rename from internal/cli/contextengine/file_provider.go rename to internal/cli/filesystem/file.go index b813cbac58..6863637920 100644 --- a/internal/cli/contextengine/file_provider.go +++ b/internal/cli/filesystem/file.go @@ -14,7 +14,7 @@ // limitations under the License. // -package contextengine +package filesystem import ( stdctx "context" @@ -542,6 +542,92 @@ func (p *FileProvider) downloadFile(ctx stdctx.Context, fileID string) ([]byte, return resp.Body, nil } +// DeleteFile deletes a file or folder by its ID +func (p *FileProvider) DeleteFile(ctx stdctx.Context, fileID string) error { + // Use JSON body format expected by Python backend: {"ids": ["file_id"]} + payload := map[string]interface{}{ + "ids": []string{fileID}, + } + resp, err := p.httpClient.Request("DELETE", "/files", true, "api", nil, payload) + if err != nil { + return fmt.Errorf("delete request failed: %w", err) + } + + // Handle empty response (e.g., 204 No Content) + if len(resp.Body) == 0 { + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + return nil + } + return fmt.Errorf("delete failed with status code: %d", resp.StatusCode) + } + + var apiResp struct { + Code int `json:"code"` + Data interface{} `json:"data"` + Message string `json:"message"` + } + + if err := json.Unmarshal(resp.Body, &apiResp); err != nil { + return fmt.Errorf("failed to parse response: %w", err) + } + + if apiResp.Code != 0 { + return fmt.Errorf("delete failed: %s", apiResp.Message) + } + + return nil +} + +// DeleteFolderByPath deletes a folder by its path (e.g., "skills/hub11/skill-name") +func (p *FileProvider) DeleteFolderByPath(ctx stdctx.Context, folderPath string) error { + parts := SplitPath(folderPath) + if len(parts) == 0 { + return fmt.Errorf("empty folder path") + } + + // Find the folder ID by traversing the path + var folderID string + currentPath := "" + + for i, part := range parts { + if i == 0 { + // First part - find in root + id, err := p.getFolderIDByName(ctx, part) + if err != nil { + return fmt.Errorf("folder not found: %s", part) + } + folderID = id + currentPath = part + } else { + // Subsequent parts - find in parent folder + result, err := p.listFilesByParentID(ctx, folderID, currentPath, nil) + if err != nil { + return fmt.Errorf("failed to list folder contents: %w", err) + } + + found := false + for _, node := range result.Nodes { + if node.Name == part && node.Type == NodeTypeDirectory { + folderID = getString(node.Metadata["id"]) + if folderID == "" { + return fmt.Errorf("folder ID not found for: %s", part) + } + currentPath = currentPath + "/" + part + found = true + break + } + } + + if !found { + return fmt.Errorf("folder not found: %s in %s", part, currentPath) + } + } + } + + // Delete the folder + return p.DeleteFile(ctx, folderID) +} + // ==================== Conversion Functions ==================== // fileToNode converts a file map to a Node diff --git a/internal/cli/filesystem/skill.go b/internal/cli/filesystem/skill.go new file mode 100644 index 0000000000..bb66b2af71 --- /dev/null +++ b/internal/cli/filesystem/skill.go @@ -0,0 +1,2154 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package filesystem + +import ( + "bytes" + stdctx "context" + "encoding/json" + "fmt" + "mime/multipart" + "net/url" + "os" + "path/filepath" + "regexp" + "strings" + "time" + + "github.com/google/uuid" + "go.uber.org/zap" + "gopkg.in/yaml.v3" + + "ragflow/internal/logger" +) + +// SkillProvider handles skill operations using /skills API +// Path structure: +// - skills/ -> List all hubs +// - skills/{space_id}/ -> List skills in space +// - skills/{space_id}/{skill_name}/ -> List versions of skill +// - skills/{space_id}/{skill_name}/{version}/ -> Get skill version info +// +// Note: Uses Go backend API (useAPIBase=true): +// - GET /skills/hubs -> List all hubs +// - POST /skills/search -> Search skills +// - POST /skills/index -> Index skills +// - DELETE /skills/index/{skill_id} -> Delete skill index + +// ============================================================================ +// Constants +// ============================================================================ + +const ( + MaxSkillTotalSize = 50 * 1024 * 1024 // 50MB + MaxSkillFileSize = 5 * 1024 * 1024 // 5MB per file + DefaultSpaceID = "default" +) + +// Text file extensions allowed in skills +var textFileExtensions = map[string]bool{ + "md": true, "mdx": true, "txt": true, "json": true, "json5": true, + "yaml": true, "yml": true, "toml": true, "js": true, "cjs": true, "mjs": true, + "ts": true, "tsx": true, "jsx": true, "py": true, "sh": true, "rb": true, + "go": true, "rs": true, "swift": true, "kt": true, "java": true, "cs": true, + "cpp": true, "c": true, "h": true, "hpp": true, "sql": true, "csv": true, + "ini": true, "cfg": true, "env": true, "xml": true, "html": true, + "css": true, "scss": true, "sass": true, "svg": true, +} + +// Default ignore patterns +var defaultIgnorePatterns = []string{ + ".git/", ".svn/", ".hg/", "node_modules/", "__MACOSX/", + ".DS_Store", "._*", "*.log", "*.tmp", "*.temp", "*.swp", "*.swo", "*~", + ".env", ".env.*", ".vscode/", ".idea/", "Thumbs.db", "desktop.ini", + ".skill-meta.json", +} + +// ============================================================================ +// Types +// ============================================================================ + +// SkillMetadata represents the metadata from SKILL.md frontmatter +type SkillMetadata struct { + Name string `yaml:"name"` + Description string `yaml:"description"` + Version string `yaml:"version"` + Author string `yaml:"author"` + Tags []string `yaml:"tags"` + Tools interface{} `yaml:"tools"` +} + +// SkillValidationResult represents the result of skill validation +type SkillValidationResult struct { + Valid bool + Name string + Description string + Version string + Tags []string + Error string + Details string +} + +// SkillFile represents a file in the skill directory +type SkillFile struct { + Path string + Content []byte + Size int64 +} + +// SkillConflictError represents a conflict error +type SkillConflictError struct { + Type string // "name" or "version" + Name string + Version string +} + +func (e *SkillConflictError) Error() string { + if e.Type == "version" { + return fmt.Sprintf("version conflict: version '%s' already exists for skill '%s'", e.Version, e.Name) + } + return fmt.Sprintf("name conflict: skill '%s' already exists", e.Name) +} + +// ============================================================================ +// SkillProvider +// ============================================================================ + +type SkillProvider struct { + BaseProvider + httpClient HTTPClientInterface +} + +// NewSkillProvider creates a new SkillProvider +func NewSkillProvider(httpClient HTTPClientInterface) *SkillProvider { + return &SkillProvider{ + BaseProvider: BaseProvider{ + name: "skills", + description: "Skills provider for skill management and search", + rootPath: "skills", + }, + httpClient: httpClient, + } +} + +// Supports returns true if this provider can handle the given path +func (p *SkillProvider) Supports(path string) bool { + normalized := normalizePath(path) + return normalized == "skills" || strings.HasPrefix(normalized, "skills/") +} + +// isUUID checks if a string is a valid UUID +func isUUID(s string) bool { + _, err := uuid.Parse(s) + return err == nil +} + +// List lists nodes at the given path +// Path structure: skills/ or skills/{space_id}/ or skills/{space_id}/{skill_name}/... +func (p *SkillProvider) List(ctx stdctx.Context, subPath string, opts *ListOptions) (*Result, error) { + if subPath == "" { + // List all hubs + return p.listSpaces(ctx, opts) + } + + parts := SplitPath(subPath) + + switch len(parts) { + case 1: + // skills/{space_id} - list skills in space + return p.listSkillsInSpace(ctx, parts[0], opts) + case 2: + // skills/{space_id}/{skill_name} - list versions of skill + return p.listSkillVersions(ctx, parts[0], parts[1], opts) + default: + // skills/{space_id}/{skill_name}/{version}/... - skill content + return p.listSkillContent(ctx, parts[0], parts[1], parts[2], parts[3:], opts) + } +} + +// Search searches for skills matching the query +func (p *SkillProvider) Search(ctx stdctx.Context, subPath string, opts *SearchOptions) (*Result, error) { + if opts == nil || opts.Query == "" { + return nil, fmt.Errorf("search query is required") + } + + // Parse space from path + spaceName := "" + parts := SplitPath(subPath) + if len(parts) > 0 { + spaceName = parts[0] + } + + // Space ID can be either a name or UUID + // If it's not "default" and doesn't look like a UUID, try to convert it + spaceID := spaceName + if spaceID != "" && spaceID != "default" && !isUUID(spaceID) { + spaceUUID, err := p.getSpaceUUIDByName(ctx, spaceID) + if err == nil { + spaceID = spaceUUID + } + // If lookup fails, use the original spaceID as-is (it might already be a UUID) + } + + // Build search payload + page := 1 + pageSize := 10 + if opts.Limit > 0 { + pageSize = opts.Limit + } + if opts.Offset > 0 { + page = (opts.Offset / pageSize) + 1 + } + payload := map[string]interface{}{ + "query": opts.Query, + "space_id": spaceID, + "page": page, + "page_size": pageSize, + } + + // Call skill search API + resp, err := p.httpClient.Request("POST", "/skills/search", true, "auto", nil, payload) + if err != nil { + return nil, fmt.Errorf("search request failed: %w", err) + } + + var result struct { + Code int `json:"code"` + Msg string `json:"message"` + Data struct { + Skills []struct { + SkillID string `json:"skill_id"` + Name string `json:"name"` + Description string `json:"description"` + Tags []string `json:"tags"` + Score float64 `json:"score"` + BM25Score float64 `json:"bm25_score,omitempty"` + VectorScore float64 `json:"vector_score,omitempty"` + CreateTime int64 `json:"create_time,omitempty"` + } `json:"skills"` + Total int `json:"total"` + } `json:"data"` + } + + if err := json.Unmarshal(resp.Body, &result); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + if result.Code != 0 { + return nil, fmt.Errorf("search failed: %s", result.Msg) + } + + // Convert to Result format + nodes := make([]*Node, 0, len(result.Data.Skills)) + for _, skill := range result.Data.Skills { + var createdAt time.Time + if skill.CreateTime > 0 { + createdAt = time.UnixMilli(skill.CreateTime) + } + nodes = append(nodes, &Node{ + Name: skill.Name, + Type: NodeTypeDirectory, + Path: fmt.Sprintf("skills/%s/%s", spaceName, skill.Name), + CreatedAt: createdAt, + UpdatedAt: createdAt, + Metadata: map[string]interface{}{ + "skill_id": skill.SkillID, + "score": skill.Score, + "bm25_score": skill.BM25Score, + "vector_score": skill.VectorScore, + "tags": skill.Tags, + "description": skill.Description, + }, + }) + } + + return &Result{ + Nodes: nodes, + Total: result.Data.Total, + }, nil +} + +// searchSkillsFromFileSystem performs a simple name-based search via file system +// when the search index is unavailable or empty. +func (p *SkillProvider) searchSkillsFromFileSystem(ctx stdctx.Context, spaceName string, opts *SearchOptions) (*Result, error) { + listOpts := &ListOptions{ + Limit: opts.Limit, + Offset: opts.Offset, + } + result, err := p.listSkillsInSpaceFromFileSystem(ctx, spaceName, listOpts) + if err != nil { + return nil, err + } + + queryLower := strings.ToLower(opts.Query) + var matched []*Node + for _, node := range result.Nodes { + if strings.Contains(strings.ToLower(node.Name), queryLower) { + matched = append(matched, node) + } + } + + return &Result{ + Nodes: matched, + Total: len(matched), + }, nil +} + +// Cat retrieves the content of a skill file at the given path +// Path structure: skills/{space_id}/{skill_name}/{version}/.../{file_path} +func (p *SkillProvider) Cat(ctx stdctx.Context, path string) ([]byte, error) { + parts := SplitPath(path) + if len(parts) < 4 { + return nil, fmt.Errorf("invalid file path: %s (expected: skills/{space}/{skill}/{version}/.../{file})", path) + } + + spaceID := parts[0] + skillName := parts[1] + version := parts[2] + _ = JoinPath(parts[3:]...) // file path within version folder (used for nested directories) + + // Get the skill folder ID (search API or file system fallback) + skillFolderID, err := p.getSkillFolderID(ctx, spaceID, skillName) + if err != nil { + return nil, fmt.Errorf("skill '%s' not found in space '%s': %w", skillName, spaceID, err) + } + + // Find the version folder + filesResp, err := p.httpClient.Request("GET", fmt.Sprintf("/files?parent_id=%s", skillFolderID), true, "auto", nil, nil) + if err != nil { + return nil, fmt.Errorf("failed to list versions: %w", err) + } + + var filesResult struct { + Code int `json:"code"` + Msg string `json:"message"` + Data struct { + Files []struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + } `json:"files"` + } `json:"data"` + } + + if err := json.Unmarshal(filesResp.Body, &filesResult); err != nil { + return nil, fmt.Errorf("failed to parse files response: %w", err) + } + + if filesResult.Code != 0 { + return nil, fmt.Errorf("failed to list files: %s", filesResult.Msg) + } + + // Find the version folder + var versionFolderID string + for _, file := range filesResult.Data.Files { + if file.Name == version && file.Type == "folder" { + versionFolderID = file.ID + break + } + } + + if versionFolderID == "" { + return nil, fmt.Errorf("version '%s' not found for skill '%s'", version, skillName) + } + + // Step 4: Navigate to the file through the path + currentFolderID := versionFolderID + pathParts := parts[3:] + + // If there's a directory path before the file, navigate through it + for i := 0; i < len(pathParts)-1; i++ { + subResp, err := p.httpClient.Request("GET", fmt.Sprintf("/files?parent_id=%s", currentFolderID), true, "auto", nil, nil) + if err != nil { + return nil, fmt.Errorf("failed to navigate path: %w", err) + } + + var subResult struct { + Code int `json:"code"` + Msg string `json:"message"` + Data struct { + Files []struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + } `json:"files"` + } `json:"data"` + } + + if err := json.Unmarshal(subResp.Body, &subResult); err != nil { + return nil, fmt.Errorf("failed to parse navigation response: %w", err) + } + + if subResult.Code != 0 { + return nil, fmt.Errorf("navigation failed: %s", subResult.Msg) + } + + found := false + for _, file := range subResult.Data.Files { + if file.Name == pathParts[i] { + if file.Type != "folder" { + return nil, fmt.Errorf("'%s' is not a directory", pathParts[i]) + } + currentFolderID = file.ID + found = true + break + } + } + + if !found { + return nil, fmt.Errorf("directory not found: %s", pathParts[i]) + } + } + + // Step 5: Find the file in the current directory + fileName := pathParts[len(pathParts)-1] + finalResp, err := p.httpClient.Request("GET", fmt.Sprintf("/files?parent_id=%s", currentFolderID), true, "auto", nil, nil) + if err != nil { + return nil, fmt.Errorf("failed to list directory: %w", err) + } + + var finalResult struct { + Code int `json:"code"` + Msg string `json:"message"` + Data struct { + Files []struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Location string `json:"location"` + } `json:"files"` + } `json:"data"` + } + + if err := json.Unmarshal(finalResp.Body, &finalResult); err != nil { + return nil, fmt.Errorf("failed to parse final response: %w", err) + } + + if finalResult.Code != 0 { + return nil, fmt.Errorf("failed to list files: %s", finalResult.Msg) + } + + // Find the file + var fileID string + for _, file := range finalResult.Data.Files { + if file.Name == fileName { + fileID = file.ID + break + } + } + + if fileID == "" { + return nil, fmt.Errorf("file '%s' not found", fileName) + } + + // Step 6: Download the file content + // First get file info to get the download URL + contentResp, err := p.httpClient.Request("GET", fmt.Sprintf("/files/%s", fileID), true, "auto", nil, nil) + if err != nil { + return nil, fmt.Errorf("failed to get file info: %w", err) + } + + // For now, return a placeholder - actual file download may need storage access + // The file content is stored in the storage backend + return contentResp.Body, nil +} + +// listHubs lists all skills spaces +func (p *SkillProvider) listSpaces(ctx stdctx.Context, opts *ListOptions) (*Result, error) { + resp, err := p.httpClient.Request("GET", "/skills/spaces", true, "auto", nil, nil) + if err != nil { + return nil, fmt.Errorf("failed to list hubs: %w", err) + } + + var result struct { + Code int `json:"code"` + Msg string `json:"message"` + Data struct { + Spaces []struct { + ID string `json:"id"` + Name string `json:"name"` + Description string `json:"description"` + } `json:"spaces"` + } `json:"data"` + } + + if err := json.Unmarshal(resp.Body, &result); err != nil { + return nil, fmt.Errorf("failed to parse hubs response: %w", err) + } + + if result.Code != 0 { + return nil, fmt.Errorf("failed to list hubs: %s", result.Msg) + } + + nodes := make([]*Node, 0, len(result.Data.Spaces)) + for _, space := range result.Data.Spaces { + nodes = append(nodes, &Node{ + Name: space.Name, + Type: NodeTypeDirectory, + Path: fmt.Sprintf("skills/%s", space.Name), + Metadata: map[string]interface{}{ + "id": space.ID, + "description": space.Description, + }, + }) + } + + return &Result{ + Nodes: nodes, + Total: len(nodes), + }, nil +} + +// listSkillsInSpace lists skills in a specific space +// First tries search API (supports pagination & sorting), falls back to file system if search returns empty +func (p *SkillProvider) listSkillsInSpace(ctx stdctx.Context, spaceName string, opts *ListOptions) (*Result, error) { + // Get space UUID for search API + spaceUUID, err := p.getSpaceUUIDByName(ctx, spaceName) + if err != nil { + return nil, fmt.Errorf("space '%s' not found: %w", spaceName, err) + } + + // Set default limit to 10 if not specified + limit := opts.Limit + if limit <= 0 { + limit = 10 + } + + // Try search API first (supports pagination, sorting, and large collections) + payload := map[string]interface{}{ + "query": "", // Empty query = list all (match_all) + "space_id": spaceUUID, + "page": 1, + "page_size": limit, + "sort_by": opts.SortBy, + "sort_order": opts.SortOrder, + } + + logger.Debug("Listing skills via search API", zap.String("space", spaceName), zap.String("spaceUUID", spaceUUID), zap.Int("limit", limit)) + + resp, err := p.httpClient.Request("POST", "/skills/search", true, "auto", nil, payload) + if err == nil { + var result struct { + Code int `json:"code"` + Msg string `json:"message"` + Data struct { + Skills []struct { + SkillID string `json:"skill_id"` + Name string `json:"name"` + Description string `json:"description"` + Tags []string `json:"tags"` + Score float64 `json:"score"` + CreateTime int64 `json:"create_time,omitempty"` + UpdateTime int64 `json:"update_time,omitempty"` + } `json:"skills"` + Total int64 `json:"total"` + } `json:"data"` + } + + if err := json.Unmarshal(resp.Body, &result); err == nil && result.Code == 0 { + logger.Debug("Search API response", zap.Int("skills_count", len(result.Data.Skills)), zap.Int64("total", result.Data.Total)) + // If search returned results, use them + if len(result.Data.Skills) > 0 { + nodes := make([]*Node, 0, len(result.Data.Skills)) + for _, skill := range result.Data.Skills { + updatedAt := time.UnixMilli(skill.UpdateTime) + if skill.UpdateTime == 0 { + updatedAt = time.UnixMilli(skill.CreateTime) + } + nodes = append(nodes, &Node{ + Name: skill.Name, + Type: NodeTypeDirectory, + Path: fmt.Sprintf("skills/%s/%s", spaceName, skill.Name), + UpdatedAt: updatedAt, + Metadata: map[string]interface{}{ + "id": skill.SkillID, + "tags": skill.Tags, + "score": skill.Score, + "description": skill.Description, + }, + }) + } + logger.Info("Listed skills via SEARCH", zap.String("space", spaceName), zap.Int("count", len(nodes)), zap.Int64("total", result.Data.Total)) + return &Result{ + Nodes: nodes, + Total: int(result.Data.Total), + HasMore: int(result.Data.Total) > limit, + NextOffset: limit, + }, nil + } + // Search returned empty result, fall through to file system + logger.Debug("Search returned empty result, falling back to file system") + } else { + logger.Debug("Search API error", zap.Error(err), zap.Int("code", result.Code), zap.String("msg", result.Msg)) + } + } else { + logger.Debug("Search request failed", zap.Error(err)) + } + + // Fall back to file system listing (for skills not yet indexed) + logger.Info("Listing skills via FILE SYSTEM (search unavailable)", zap.String("space", spaceName)) + return p.listSkillsInSpaceFromFileSystem(ctx, spaceName, opts) +} + +// listSkillsInSpaceFromFileSystem lists skills from file system (fallback when search returns empty) +func (p *SkillProvider) listSkillsInSpaceFromFileSystem(ctx stdctx.Context, spaceName string, opts *ListOptions) (*Result, error) { + // Get the skills space folder ID from file system + skillsFolderID, err := p.getSkillsFolderID(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get skills folder: %w", err) + } + logger.Debug("Got skills folder ID", zap.String("skillsFolderID", skillsFolderID)) + + // Find the space folder + spaceFolderID, err := p.findFolderID(ctx, skillsFolderID, spaceName) + if err != nil { + return nil, fmt.Errorf("failed to find space folder: %w", err) + } + logger.Debug("Got space folder ID", zap.String("spaceName", spaceName), zap.String("spaceFolderID", spaceFolderID)) + + // List all subfolders in the space folder (each subfolder is a skill) + skillsResp, err := p.httpClient.Request("GET", fmt.Sprintf("/files?parent_id=%s", spaceFolderID), true, "auto", nil, nil) + if err != nil { + return nil, fmt.Errorf("failed to list skills: %w", err) + } + + var skillsResult struct { + Code int `json:"code"` + Msg string `json:"message"` + Data struct { + Files []struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + UpdateTime int64 `json:"update_time"` + } `json:"files"` + } `json:"data"` + } + + if err := json.Unmarshal(skillsResp.Body, &skillsResult); err != nil { + return nil, fmt.Errorf("failed to parse skills response: %w", err) + } + + if skillsResult.Code != 0 { + return nil, fmt.Errorf("failed to list skills: %s", skillsResult.Msg) + } + logger.Debug("File system list response", zap.Int("files_count", len(skillsResult.Data.Files))) + + // Convert folders to nodes + nodes := make([]*Node, 0) + for _, file := range skillsResult.Data.Files { + // Only include folders (skill directories) + if file.Type == "folder" { + nodes = append(nodes, &Node{ + Name: file.Name, + Type: NodeTypeDirectory, + Path: fmt.Sprintf("skills/%s/%s", spaceName, file.Name), + UpdatedAt: time.UnixMilli(file.UpdateTime), + Metadata: map[string]interface{}{ + "id": file.ID, + }, + }) + } + } + + // Apply limit + limit := opts.Limit + if limit <= 0 { + limit = 10 + } + total := len(nodes) + if len(nodes) > limit { + nodes = nodes[:limit] + } + + logger.Info("Listed skills via FILE SYSTEM", zap.String("space", spaceName), zap.Int("count", len(nodes)), zap.Int("total", total)) + + return &Result{ + Nodes: nodes, + Total: total, + HasMore: total > limit, + NextOffset: limit, + }, nil +} + +// getSkillsFolderID gets the ID of the 'skills' folder +func (p *SkillProvider) getSkillsFolderID(ctx stdctx.Context) (string, error) { + resp, err := p.httpClient.Request("GET", "/files", true, "auto", nil, nil) + if err != nil { + return "", fmt.Errorf("failed to list root folders: %w", err) + } + + var result struct { + Code int `json:"code"` + Msg string `json:"message"` + Data struct { + Files []struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + } `json:"files"` + } `json:"data"` + } + + if err := json.Unmarshal(resp.Body, &result); err != nil { + return "", fmt.Errorf("failed to parse response: %w", err) + } + + if result.Code != 0 { + return "", fmt.Errorf("failed to list folders: %s", result.Msg) + } + + for _, file := range result.Data.Files { + if file.Name == "skills" && file.Type == "folder" { + return file.ID, nil + } + } + + return "", fmt.Errorf("skills folder not found") +} + +// findFolderID finds a folder by name under a parent folder +func (p *SkillProvider) findFolderID(ctx stdctx.Context, parentID, folderName string) (string, error) { + resp, err := p.httpClient.Request("GET", fmt.Sprintf("/files?parent_id=%s", parentID), true, "auto", nil, nil) + if err != nil { + return "", fmt.Errorf("failed to list folders: %w", err) + } + + var result struct { + Code int `json:"code"` + Msg string `json:"message"` + Data struct { + Files []struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + } `json:"files"` + } `json:"data"` + } + + if err := json.Unmarshal(resp.Body, &result); err != nil { + return "", fmt.Errorf("failed to parse response: %w", err) + } + + if result.Code != 0 { + return "", fmt.Errorf("failed to list folders: %s", result.Msg) + } + + for _, file := range result.Data.Files { + if file.Name == folderName && file.Type == "folder" { + return file.ID, nil + } + } + + return "", fmt.Errorf("folder '%s' not found", folderName) +} + +// getSkillFolderID gets the folder ID of a skill in a space. +// First tries the search API (which may have cached folder_id from indexing), +// then falls back to direct file system traversal. +func (p *SkillProvider) getSkillFolderID(ctx stdctx.Context, spaceID, skillName string) (string, error) { + // Try search API first + spaceUUID, err := p.getSpaceUUIDByName(ctx, spaceID) + if err == nil { + payload := map[string]interface{}{ + "query": skillName, + "space_id": spaceUUID, + "page": 1, + "page_size": 10, + } + resp, err := p.httpClient.Request("POST", "/skills/search", true, "auto", nil, payload) + if err == nil { + var searchResult struct { + Code int `json:"code"` + Msg string `json:"message"` + Data struct { + Skills []struct { + SkillID string `json:"skill_id"` + FolderID string `json:"folder_id"` + Name string `json:"name"` + } `json:"skills"` + } `json:"data"` + } + if err := json.Unmarshal(resp.Body, &searchResult); err == nil && searchResult.Code == 0 { + for _, skill := range searchResult.Data.Skills { + if skill.Name == skillName { + return skill.FolderID, nil + } + } + } + } + } + + // Fallback: traverse file system directly + skillsFolderID, err := p.getSkillsFolderID(ctx) + if err != nil { + return "", err + } + spaceFolderID, err := p.findFolderID(ctx, skillsFolderID, spaceID) + if err != nil { + return "", err + } + return p.findFolderID(ctx, spaceFolderID, skillName) +} + +// listSkillVersions lists versions of a skill +func (p *SkillProvider) listSkillVersions(ctx stdctx.Context, spaceID, skillName string, opts *ListOptions) (*Result, error) { + skillFolderID, err := p.getSkillFolderID(ctx, spaceID, skillName) + if err != nil { + return nil, fmt.Errorf("skill '%s' not found in space '%s'", skillName, spaceID) + } + + // List the skill folder to get versions (subdirectories) + filesResp, err := p.httpClient.Request("GET", fmt.Sprintf("/files?parent_id=%s", skillFolderID), true, "auto", nil, nil) + if err != nil { + return nil, fmt.Errorf("failed to list versions: %w", err) + } + + var filesResult struct { + Code int `json:"code"` + Msg string `json:"message"` + Data struct { + Files []struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + UpdateTime int64 `json:"update_time"` + } `json:"files"` + } `json:"data"` + } + + if err := json.Unmarshal(filesResp.Body, &filesResult); err != nil { + return nil, fmt.Errorf("failed to parse files response: %w", err) + } + + if filesResult.Code != 0 { + return nil, fmt.Errorf("failed to list files: %s", filesResult.Msg) + } + + // Convert version folders to nodes + nodes := make([]*Node, 0) + for _, file := range filesResult.Data.Files { + // Only include folders (version directories) + if file.Type == "folder" { + nodes = append(nodes, &Node{ + Name: file.Name, + Type: NodeTypeDirectory, + Path: fmt.Sprintf("skills/%s/%s/%s", spaceID, skillName, file.Name), + UpdatedAt: time.UnixMilli(file.UpdateTime), + Metadata: map[string]interface{}{ + "id": file.ID, + }, + }) + } + } + + return &Result{ + Nodes: nodes, + Total: len(nodes), + }, nil +} + +// listSkillContent lists content of a specific skill version +func (p *SkillProvider) listSkillContent(ctx stdctx.Context, spaceID, skillName, version string, extraParts []string, opts *ListOptions) (*Result, error) { + // Skill content is stored in file system under skills/{space}/{skill}/{version}/ + // We need to traverse the file system to find the skill folder and list its contents + + // Get the skill folder ID (search API or file system fallback) + skillFolderID, err := p.getSkillFolderID(ctx, spaceID, skillName) + if err != nil { + return nil, fmt.Errorf("skill '%s' not found in space '%s'", skillName, spaceID) + } + + // List the version folder under the skill folder + filesResp, err := p.httpClient.Request("GET", fmt.Sprintf("/files?parent_id=%s", skillFolderID), true, "auto", nil, nil) + if err != nil { + return nil, fmt.Errorf("failed to list skill versions: %w", err) + } + + var filesResult struct { + Code int `json:"code"` + Msg string `json:"message"` + Data struct { + Files []struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Size int64 `json:"size"` + UpdateTime int64 `json:"update_time"` + } `json:"files"` + } `json:"data"` + } + + if err := json.Unmarshal(filesResp.Body, &filesResult); err != nil { + return nil, fmt.Errorf("failed to parse files response: %w", err) + } + + if filesResult.Code != 0 { + return nil, fmt.Errorf("failed to list files: %s", filesResult.Msg) + } + + // Find the version folder + var versionFolderID string + for _, file := range filesResult.Data.Files { + if file.Name == version && file.Type == "folder" { + versionFolderID = file.ID + break + } + } + + if versionFolderID == "" { + return nil, fmt.Errorf("version '%s' not found for skill '%s'", version, skillName) + } + + // Step 4: If there are extra parts, navigate deeper + currentFolderID := versionFolderID + currentPath := fmt.Sprintf("skills/%s/%s/%s", spaceID, skillName, version) + + // Check if the last part is a file (for ls on a specific file) + var lastFile *struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Size int64 `json:"size"` + UpdateTime int64 `json:"update_time"` + } + + for i, part := range extraParts { + isLastPart := (i == len(extraParts)-1) + + // List current folder to find the next part + subResp, err := p.httpClient.Request("GET", fmt.Sprintf("/files?parent_id=%s", currentFolderID), true, "auto", nil, nil) + if err != nil { + return nil, fmt.Errorf("failed to navigate path: %w", err) + } + + var subResult struct { + Code int `json:"code"` + Msg string `json:"message"` + Data struct { + Files []struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Size int64 `json:"size"` + UpdateTime int64 `json:"update_time"` + } `json:"files"` + } `json:"data"` + } + + if err := json.Unmarshal(subResp.Body, &subResult); err != nil { + return nil, fmt.Errorf("failed to parse navigation response: %w", err) + } + + if subResult.Code != 0 { + return nil, fmt.Errorf("navigation failed: %s", subResult.Msg) + } + + found := false + for _, file := range subResult.Data.Files { + if file.Name == part { + if file.Type != "folder" { + // This is a file + if isLastPart { + // If it's the last part, remember the file for listing + lastFile = &file + found = true + break + } + // Not the last part - cannot navigate into a file + return nil, fmt.Errorf("'%s' is not a directory", part) + } + currentFolderID = file.ID + currentPath = currentPath + "/" + part + found = true + break + } + } + + if !found { + return nil, fmt.Errorf("path not found: %s", part) + } + + // If we found a file as the last part, return it + if lastFile != nil { + return &Result{ + Nodes: []*Node{{ + Name: lastFile.Name, + Type: NodeTypeFile, + Path: currentPath + "/" + lastFile.Name, + Metadata: map[string]interface{}{ + "id": lastFile.ID, + "size": lastFile.Size, + "update_time": lastFile.UpdateTime, + }, + }}, + Total: 1, + }, nil + } + } + + // Step 5: List the final folder contents + finalResp, err := p.httpClient.Request("GET", fmt.Sprintf("/files?parent_id=%s", currentFolderID), true, "auto", nil, nil) + if err != nil { + return nil, fmt.Errorf("failed to list folder contents: %w", err) + } + + var finalResult struct { + Code int `json:"code"` + Msg string `json:"message"` + Data struct { + Files []struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Size int64 `json:"size"` + UpdateTime int64 `json:"update_time"` + } `json:"files"` + Total int `json:"total"` + } `json:"data"` + } + + if err := json.Unmarshal(finalResp.Body, &finalResult); err != nil { + return nil, fmt.Errorf("failed to parse final response: %w", err) + } + + if finalResult.Code != 0 { + return nil, fmt.Errorf("failed to list contents: %s", finalResult.Msg) + } + + // Convert to nodes + nodes := make([]*Node, 0, len(finalResult.Data.Files)) + for _, file := range finalResult.Data.Files { + nodeType := NodeTypeFile + if file.Type == "folder" { + nodeType = NodeTypeDirectory + } + + nodes = append(nodes, &Node{ + Name: file.Name, + Type: nodeType, + Path: currentPath + "/" + file.Name, + Size: file.Size, + UpdatedAt: time.UnixMilli(file.UpdateTime), + Metadata: map[string]interface{}{ + "id": file.ID, + }, + }) + } + + return &Result{ + Nodes: nodes, + Total: len(nodes), + }, nil +} + +// getSpaceUUIDByName gets space UUID by its name +func (p *SkillProvider) getSpaceUUIDByName(ctx stdctx.Context, spaceName string) (string, error) { + resp, err := p.httpClient.Request("GET", "/skills/spaces", true, "auto", nil, nil) + if err != nil { + return "", fmt.Errorf("failed to list hubs: %w", err) + } + + var result struct { + Code int `json:"code"` + Msg string `json:"message"` + Data struct { + Spaces []struct { + ID string `json:"id"` + Name string `json:"name"` + } `json:"spaces"` + } `json:"data"` + } + + if err := json.Unmarshal(resp.Body, &result); err != nil { + return "", fmt.Errorf("failed to parse hubs response: %w", err) + } + + if result.Code != 0 { + return "", fmt.Errorf("failed to list hubs: %s", result.Msg) + } + + for _, space := range result.Data.Spaces { + if space.Name == spaceName { + return space.ID, nil + } + } + + return "", fmt.Errorf("space with name '%s' not found", spaceName) +} + +// DeleteSkill deletes a skill and its index +func (p *SkillProvider) DeleteSkill(ctx stdctx.Context, spaceID, skillName string) error { + // Get space UUID + spaceUUID, err := p.getSpaceUUIDByName(ctx, spaceID) + if err != nil { + return err + } + + // Call delete skill index API + // API format: DELETE /skills/index?skill_id={skill_name}&space_id={space_id} + resp, err := p.httpClient.Request("DELETE", + fmt.Sprintf("/skills/index?skill_id=%s&space_id=%s", + url.QueryEscape(skillName), + url.QueryEscape(spaceUUID)), + true, "auto", nil, nil) + if err != nil { + return fmt.Errorf("delete index request failed: %w", err) + } + + var result struct { + Code int `json:"code"` + Msg string `json:"message"` + } + + if err := json.Unmarshal(resp.Body, &result); err != nil { + return fmt.Errorf("failed to parse response: %w", err) + } + + if result.Code != 0 { + if result.Msg != "" { + return fmt.Errorf("delete failed: %s", result.Msg) + } + return fmt.Errorf("delete failed with code: %d", result.Code) + } + + return nil +} + +// IndexSkill indexes a skill for search +func (p *SkillProvider) IndexSkill(ctx stdctx.Context, spaceID string, skillInfo map[string]interface{}) error { + // Get space UUID + spaceUUID, err := p.getSpaceUUIDByName(ctx, spaceID) + if err != nil { + return err + } + + // Get default embedding model + embdID, _ := p.getDefaultEmbdID(ctx, spaceUUID) + + // Build index request + payload := map[string]interface{}{ + "skills": []interface{}{skillInfo}, + "space_id": spaceUUID, + "embd_id": embdID, + } + + // Call index API + resp, err := p.httpClient.Request("POST", "/skills/index", true, "auto", nil, payload) + if err != nil { + return fmt.Errorf("index request failed: %w", err) + } + + var result struct { + Code int `json:"code"` + Msg string `json:"msg"` + Data struct { + IndexedCount int `json:"indexed_count"` + } `json:"data"` + } + + if err := json.Unmarshal(resp.Body, &result); err != nil { + return fmt.Errorf("failed to parse index response: %w", err) + } + + if result.Code != 0 { + return fmt.Errorf("index failed: %s", result.Msg) + } + + return nil +} + +// getDefaultEmbdID gets the default embedding model ID from skill search config +func (p *SkillProvider) getDefaultEmbdID(ctx stdctx.Context, spaceID string) (string, error) { + resp, err := p.httpClient.Request("GET", + fmt.Sprintf("/skills/config?embd_id=&space_id=%s", url.QueryEscape(spaceID)), + true, "web", nil, nil) + if err != nil { + return "", nil + } + + var result struct { + Code int `json:"code"` + Msg string `json:"message"` + Data struct { + EmbdID string `json:"embd_id"` + } `json:"data"` + } + + if err := json.Unmarshal(resp.Body, &result); err != nil { + return "", nil + } + + if result.Code != 0 { + return "", nil + } + + return result.Data.EmbdID, nil +} + +// ============================================================================ +// Skill Upload Functions +// ============================================================================ + +// UploadSkill uploads a skill directory to the server +// nameOverride: user-specified skill name (overrides SKILL.md metadata) +func (p *SkillProvider) UploadSkill(ctx stdctx.Context, skillPath string, versionOverride string, spaceID string, fileProvider Provider, nameOverride string) error { + spaceID = normalizeSpaceID(spaceID) + + // 1. Validate the skill directory + result, files, err := ValidateSkillDirectory(skillPath, versionOverride, nameOverride) + if err != nil { + return fmt.Errorf("validation error: %w", err) + } + if !result.Valid { + return fmt.Errorf("validation failed: %s", GetValidationErrorMessage(result)) + } + + // Get skill name from validation result (SKILL.md metadata or user-specified) + // Fallback to directory name if not specified + skillName := result.Name + if skillName == "" { + skillName = filepath.Base(skillPath) + skillName = normalizeSkillName(skillName) + } + + // Use provided version or default + version := result.Version + if version == "" { + version = "1.0.0" + } + + // 2. Ensure skills space exists + spaceFolderID, err := p.ensureSkillsSpaceFolder(ctx, spaceID, fileProvider) + if err != nil { + return fmt.Errorf("failed to ensure skills space: %w", err) + } + + // 3. Get or create skill folder + skillFolderID, err := p.getOrCreateSkillFolder(ctx, spaceID, spaceFolderID, skillName, fileProvider) + if err != nil { + return err + } + + // 4. Check if version already exists + exists, err := p.versionExists(ctx, spaceID, skillName, version, fileProvider) + if err != nil { + return fmt.Errorf("failed to check version: %w", err) + } + if exists { + return &SkillConflictError{Type: "version", Name: skillName, Version: version} + } + + // 5. Create version folder + versionFolderID, err := p.createFolder(ctx, skillFolderID, version) + if err != nil { + return fmt.Errorf("failed to create version folder: %w", err) + } + + // 6. Upload all files + for _, file := range files { + sanitized := sanitizeRelPath(file.Path) + if sanitized == "" || isMacJunkPath(sanitized) || shouldIgnore(sanitized, defaultIgnorePatterns) { + continue + } + + err = p.uploadFile(ctx, file, versionFolderID) + if err != nil { + return fmt.Errorf("failed to upload file %s: %w", file.Path, err) + } + } + + // 7. Index the skill for search + if err := p.indexSkillFromUpload(ctx, result, files, spaceID, skillFolderID); err != nil { + return fmt.Errorf("failed to index skill: %w", err) + } + + return nil +} + +// ensureSkillsSpaceFolder ensures the 'skills/' folder exists +func (p *SkillProvider) ensureSkillsSpaceFolder(ctx stdctx.Context, spaceID string, fileProvider Provider) (string, error) { + skillsFolderID, err := p.ensureSkillsFolder(ctx, fileProvider) + if err != nil { + return "", err + } + + result, err := fileProvider.List(ctx, "skills", nil) + if err != nil { + return "", err + } + + for _, node := range result.Nodes { + if node.Type == NodeTypeDirectory && node.Name == spaceID { + return GetString(node.Metadata["id"]), nil + } + } + + return p.createFolder(ctx, skillsFolderID, spaceID) +} + +// ensureSkillsFolder ensures the 'skills' folder exists +func (p *SkillProvider) ensureSkillsFolder(ctx stdctx.Context, fileProvider Provider) (string, error) { + result, err := fileProvider.List(ctx, "", nil) + if err != nil { + return "", err + } + + for _, node := range result.Nodes { + if node.Type == NodeTypeDirectory && node.Name == "skills" { + return GetString(node.Metadata["id"]), nil + } + } + + return p.createFolder(ctx, "", "skills") +} + +// getOrCreateSkillFolder gets existing skill folder or creates new one +func (p *SkillProvider) getOrCreateSkillFolder(ctx stdctx.Context, spaceID, parentID, skillName string, fileProvider Provider) (string, error) { + result, err := fileProvider.List(ctx, fmt.Sprintf("skills/%s", spaceID), nil) + if err != nil { + return "", err + } + + for _, node := range result.Nodes { + if node.Type == NodeTypeDirectory && node.Name == skillName { + return GetString(node.Metadata["id"]), nil + } + } + + return p.createFolder(ctx, parentID, skillName) +} + +// versionExists checks if a version already exists +func (p *SkillProvider) versionExists(ctx stdctx.Context, spaceID, skillName, version string, fileProvider Provider) (bool, error) { + result, err := fileProvider.List(ctx, fmt.Sprintf("skills/%s/%s", spaceID, skillName), nil) + if err != nil { + return false, err + } + + for _, node := range result.Nodes { + if node.Type == NodeTypeDirectory && node.Name == version { + return true, nil + } + } + return false, nil +} + +// createFolder creates a new folder and returns its ID +func (p *SkillProvider) createFolder(ctx stdctx.Context, parentID, name string) (string, error) { + payload := map[string]interface{}{ + "name": name, + "type": "folder", + } + if parentID != "" { + payload["parent_id"] = parentID + } + + resp, err := p.httpClient.Request("POST", "/files", true, "auto", nil, payload) + if err != nil { + return "", err + } + + var result struct { + Code int `json:"code"` + Data struct { + ID string `json:"id"` + } `json:"data"` + } + + if err := json.Unmarshal(resp.Body, &result); err != nil { + return "", err + } + + if result.Code != 0 { + return "", fmt.Errorf("server returned error code: %d", result.Code) + } + + return result.Data.ID, nil +} + +// uploadFile uploads a single file using multipart form +func (p *SkillProvider) uploadFile(ctx stdctx.Context, file *SkillFile, parentID string) error { + var buf bytes.Buffer + writer := multipart.NewWriter(&buf) + + if parentID != "" { + writer.WriteField("parent_id", parentID) + } + + part, err := writer.CreateFormFile("file", file.Path) + if err != nil { + return err + } + if _, err := part.Write(file.Content); err != nil { + return err + } + writer.Close() + + return p.httpClient.UploadMultipart("/files", writer.FormDataContentType(), &buf) +} + +// indexSkillFromUpload indexes the skill after upload +func (p *SkillProvider) indexSkillFromUpload(ctx stdctx.Context, result *SkillValidationResult, files []*SkillFile, spaceID string, skillFolderID string) error { + var contentBuilder strings.Builder + for _, file := range files { + if !isTextFile(file.Path, "") { + continue + } + if len(file.Content) > MaxSkillFileSize { + continue + } + sanitized := sanitizeRelPath(file.Path) + if sanitized == "" || isMacJunkPath(sanitized) || shouldIgnore(sanitized, defaultIgnorePatterns) { + continue + } + contentBuilder.WriteString(fmt.Sprintf("\n=== %s ===\n", file.Path)) + contentBuilder.Write(file.Content) + } + content := contentBuilder.String() + + // Use skill name as ID (without version suffix) + // This ensures all versions of the same skill share the same index document + skillID := result.Name + + skillInfo := map[string]interface{}{ + "id": skillID, + "folder_id": skillFolderID, + "name": result.Name, + "description": result.Description, + "tags": result.Tags, + "content": content, + "version": result.Version, + } + + return p.IndexSkill(ctx, spaceID, skillInfo) +} + +// ============================================================================ +// Validation Functions +// ============================================================================ + +// ValidateSkillDirectory validates a skill directory +// nameOverride: user-specified skill name (overrides SKILL.md metadata) +func ValidateSkillDirectory(skillPath string, versionOverride string, nameOverride string) (*SkillValidationResult, []*SkillFile, error) { + info, err := os.Stat(skillPath) + if err != nil { + return nil, nil, fmt.Errorf("cannot access directory %s: %w", skillPath, err) + } + if !info.IsDir() { + return nil, nil, fmt.Errorf("%s is not a directory", skillPath) + } + + files, err := readSkillFiles(skillPath) + if err != nil { + return nil, nil, err + } + + if len(files) == 0 { + return &SkillValidationResult{Valid: false, Error: "no_files"}, nil, nil + } + + var totalSize int64 + for _, f := range files { + totalSize += f.Size + } + if totalSize > MaxSkillTotalSize { + return &SkillValidationResult{Valid: false, Error: "total_size_exceeded"}, nil, nil + } + + var validFiles []*SkillFile + for _, f := range files { + if f.Size > MaxSkillFileSize { + return &SkillValidationResult{ + Valid: false, + Error: "file_too_large", + Details: f.Path, + }, nil, nil + } + + sanitized := sanitizeRelPath(f.Path) + if sanitized == "" { + return &SkillValidationResult{Valid: false, Error: "invalid_path"}, nil, nil + } + + if isMacJunkPath(sanitized) || shouldIgnore(sanitized, defaultIgnorePatterns) { + continue + } + + validFiles = append(validFiles, f) + } + + if len(validFiles) == 0 { + return &SkillValidationResult{Valid: false, Error: "no_valid_files"}, nil, nil + } + + var skillMdFile *SkillFile + for _, f := range validFiles { + normalized := strings.ToLower(f.Path) + if normalized == "skill.md" || strings.HasSuffix(normalized, "/skill.md") { + skillMdFile = f + break + } + } + + if skillMdFile == nil { + return &SkillValidationResult{Valid: false, Error: "missing_skill_md"}, nil, nil + } + + metadata, err := parseFrontmatter(string(skillMdFile.Content)) + if err != nil { + return &SkillValidationResult{ + Valid: false, + Error: "invalid_frontmatter", + Details: err.Error(), + }, nil, nil + } + + if metadata.Name == "" { + return &SkillValidationResult{Valid: false, Error: "missing_name"}, nil, nil + } + + if !isValidSkillName(metadata.Name) { + return &SkillValidationResult{ + Valid: false, + Error: "invalid_name_format", + Details: metadata.Name, + }, nil, nil + } + + version := versionOverride + if version == "" { + version = metadata.Version + } + // Set default version if not provided + if version == "" { + version = "1.0.0" + } + + if !isValidSemver(version) { + return &SkillValidationResult{ + Valid: false, + Error: "invalid_version", + Details: version, + }, nil, nil + } + + for _, f := range validFiles { + if !isTextFile(f.Path, "") { + return &SkillValidationResult{ + Valid: false, + Error: "invalid_file_type", + Details: f.Path, + }, nil, nil + } + } + + // Use user-specified name if provided, otherwise use metadata.Name from SKILL.md + skillName := metadata.Name + if nameOverride != "" { + skillName = nameOverride + } + + return &SkillValidationResult{ + Valid: true, + Name: skillName, + Description: metadata.Description, + Version: version, + Tags: metadata.Tags, + }, validFiles, nil +} + +// readSkillFiles recursively reads all files in the skill directory +func readSkillFiles(skillPath string) ([]*SkillFile, error) { + var files []*SkillFile + + err := filepath.Walk(skillPath, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if !info.IsDir() { + relPath, err := filepath.Rel(skillPath, path) + if err != nil { + return err + } + + relPath = filepath.ToSlash(relPath) + + content, err := os.ReadFile(path) + if err != nil { + return fmt.Errorf("failed to read file %s: %w", path, err) + } + + files = append(files, &SkillFile{ + Path: relPath, + Content: content, + Size: info.Size(), + }) + } + + return nil + }) + + return files, err +} + +// parseFrontmatter extracts YAML frontmatter from markdown content +func parseFrontmatter(content string) (*SkillMetadata, error) { + lines := strings.Split(content, "\n") + + if len(lines) == 0 || strings.TrimSpace(lines[0]) != "---" { + return nil, fmt.Errorf("missing frontmatter start") + } + + var endIndex int + found := false + for i := 1; i < len(lines); i++ { + if strings.TrimSpace(lines[i]) == "---" { + endIndex = i + found = true + break + } + } + + if !found { + return nil, fmt.Errorf("missing frontmatter end") + } + + frontmatter := strings.Join(lines[1:endIndex], "\n") + var metadata SkillMetadata + if err := yaml.Unmarshal([]byte(frontmatter), &metadata); err != nil { + return nil, fmt.Errorf("failed to parse frontmatter: %w", err) + } + + return &metadata, nil +} + +// isValidSkillName checks if skill name follows slug format +func isValidSkillName(name string) bool { + matched, _ := regexp.MatchString(`^[a-z0-9][a-z0-9_-]*$`, name) + return matched +} + +// isValidSemver checks basic semver format +func isValidSemver(version string) bool { + matched, _ := regexp.MatchString(`^\d+\.\d+\.\d+`, version) + return matched +} + +// isTextFile checks if file is text-based +func isTextFile(filePath, contentType string) bool { + if contentType != "" { + normalized := strings.ToLower(strings.TrimSpace(strings.Split(contentType, ";")[0])) + if strings.HasPrefix(normalized, "text/") { + return true + } + textContentTypes := map[string]bool{ + "application/json": true, "application/xml": true, "application/yaml": true, + "application/x-yaml": true, "application/toml": true, "application/javascript": true, + "application/typescript": true, "application/markdown": true, "image/svg+xml": true, + } + if textContentTypes[normalized] { + return true + } + } + + ext := strings.ToLower(filepath.Ext(filePath)) + if ext != "" { + ext = ext[1:] + } + return textFileExtensions[ext] +} + +// sanitizeRelPath sanitizes relative path +func sanitizeRelPath(path string) string { + normalized := regexp.MustCompile(`^\./+`).ReplaceAllString(path, "") + normalized = strings.TrimLeft(normalized, "/") + + if normalized == "" || strings.HasSuffix(normalized, "/") { + return "" + } + if strings.Contains(normalized, "..") || strings.Contains(normalized, "\\") { + return "" + } + return normalized +} + +// isMacJunkPath checks if path is Mac junk file +func isMacJunkPath(path string) bool { + normalized := strings.ToLower(path) + if normalized == ".ds_store" || strings.HasSuffix(normalized, "/.ds_store") { + return true + } + if strings.HasPrefix(normalized, "__macosx/") || normalized == "__macosx" { + return true + } + if strings.HasPrefix(normalized, "._") || strings.Contains(normalized, "/._") { + return true + } + return false +} + +// shouldIgnore checks if path should be ignored +func shouldIgnore(filePath string, patterns []string) bool { + normalizedPath := strings.ToLower(filePath) + for _, pattern := range patterns { + trimmedPattern := strings.TrimSpace(pattern) + if trimmedPattern == "" || strings.HasPrefix(trimmedPattern, "#") { + continue + } + if matchPattern(normalizedPath, strings.ToLower(trimmedPattern)) { + return true + } + } + return false +} + +// matchPattern matches path against ignore pattern +func matchPattern(filePath, pattern string) bool { + if strings.HasSuffix(pattern, "/") { + dirPattern := strings.TrimSuffix(pattern, "/") + return strings.HasPrefix(filePath, dirPattern+"/") || filePath == dirPattern + } + + if filePath == pattern { + return true + } + + regex := globToRegex(pattern) + matched, _ := regexp.MatchString(regex, filePath) + return matched +} + +// globToRegex converts glob pattern to regex +func globToRegex(pattern string) string { + var regex strings.Builder + regex.WriteString("^") + + for i := 0; i < len(pattern); i++ { + c := pattern[i] + + switch c { + case '*': + if i+1 < len(pattern) && pattern[i+1] == '*' { + regex.WriteString(".*") + i++ + } else { + regex.WriteString("[^/]*") + } + case '?': + regex.WriteString("[^/]") + case '.': + regex.WriteString("\\.") + case '\\', '/', '$', '^', '+', '(', ')', '[', ']', '{', '}': + regex.WriteString("\\") + regex.WriteByte(c) + default: + regex.WriteByte(c) + } + } + + regex.WriteString("$") + return regex.String() +} + +// normalizeSpaceID normalizes space ID +func normalizeSpaceID(spaceID string) string { + spaceID = strings.TrimSpace(spaceID) + if spaceID == "" { + return DefaultSpaceID + } + return spaceID +} + +// normalizeSkillName normalizes skill name +func normalizeSkillName(name string) string { + name = strings.ToLower(name) + name = strings.ReplaceAll(name, " ", "-") + name = strings.ReplaceAll(name, "_", "-") + re := regexp.MustCompile(`[^a-z0-9-]+`) + name = re.ReplaceAllString(name, "-") + re = regexp.MustCompile(`-+`) + name = re.ReplaceAllString(name, "-") + name = strings.Trim(name, "-") + return name +} + +// GetValidationErrorMessage returns human-readable error message +func GetValidationErrorMessage(result *SkillValidationResult) string { + switch result.Error { + case "no_files": + return "No files found in the skill directory" + case "total_size_exceeded": + return fmt.Sprintf("Total size exceeds limit of %d MB", MaxSkillTotalSize/(1024*1024)) + case "file_too_large": + return fmt.Sprintf("File too large: %s (max %d MB per file)", result.Details, MaxSkillFileSize/(1024*1024)) + case "invalid_path": + return "Invalid file path detected" + case "missing_skill_md": + return "SKILL.md not found in the skill directory" + case "invalid_frontmatter": + if result.Details != "" { + return fmt.Sprintf("Invalid SKILL.md frontmatter: %s", result.Details) + } + return "Invalid SKILL.md frontmatter format" + case "missing_name": + return "SKILL.md missing required field: name" + case "invalid_name_format": + return fmt.Sprintf("Invalid skill name format: %s (must be lowercase, alphanumeric with hyphens/underscores)", result.Details) + case "invalid_version": + return fmt.Sprintf("Invalid version format: %s (must be semver like 1.0.0)", result.Details) + case "invalid_file_type": + return fmt.Sprintf("Invalid file type: %s (only text files allowed)", result.Details) + case "no_valid_files": + return "No valid files found after filtering" + default: + return fmt.Sprintf("Validation failed: %s", result.Error) + } +} + +// GetString safely extracts a string value from interface{} +func GetString(v interface{}) string { + if v == nil { + return "" + } + switch s := v.(type) { + case string: + return s + default: + return fmt.Sprintf("%v", v) + } +} + +// ============================================================================ +// Skill Uploader +// ============================================================================ + +// SkillUploader handles uploading skills to the server +type SkillUploader struct { + client HTTPClientInterface + fileProvider *FileProvider + skillProvider Provider + force bool // Force mode: overwrite existing versions +} + +// NewSkillUploader creates a new uploader +func NewSkillUploader(client HTTPClientInterface, fileProvider *FileProvider) *SkillUploader { + return &SkillUploader{ + client: client, + fileProvider: fileProvider, + } +} + +// SetSkillProvider sets the skill provider +func (u *SkillUploader) SetSkillProvider(provider Provider) { + u.skillProvider = provider +} + +// SetForce sets the force mode (overwrite existing versions) +func (u *SkillUploader) SetForce(force bool) { + u.force = force +} + +// parseSpaceFromPath extracts space ID from a path like "skills/space1" or "skills" +// Returns "default" for "skills" (no space specified) +func parseSpaceFromPath(path string) string { + path = strings.TrimSpace(path) + if path == "" || path == "skills" { + return DefaultSpaceID + } + // Handle paths like "skills/space1" or "hub1" + if strings.HasPrefix(path, "skills/") { + path = strings.TrimPrefix(path, "skills/") + } + if path == "" { + return DefaultSpaceID + } + return normalizeSpaceID(path) +} + +// UploadSkill uploads a skill directory to the server +// nameOverride: user-specified skill name (overrides SKILL.md metadata) +func (u *SkillUploader) UploadSkill(ctx stdctx.Context, skillPath string, versionOverride string, hubPath string, nameOverride string) error { + // Parse space from path + spaceID := parseSpaceFromPath(hubPath) + + // 1. Validate the skill directory + fmt.Printf("Validating skill at %s...\n", skillPath) + result, files, err := ValidateSkillDirectory(skillPath, versionOverride, nameOverride) + if err != nil { + return fmt.Errorf("validation error: %w", err) + } + if !result.Valid { + return fmt.Errorf("validation failed: %s", GetValidationErrorMessage(result)) + } + + // Get skill name from validation result (SKILL.md metadata or user-specified) + // Fallback to directory name if not specified + skillName := result.Name + if skillName == "" { + skillName = filepath.Base(skillPath) + skillName = normalizeSkillName(skillName) + } + + // Use provided version or default + version := result.Version + if version == "" { + version = "1.0.0" + } + + fmt.Printf("✓ Skill '%s' (v%s) is valid\n", skillName, version) + + // 2. Ensure skills space exists + fmt.Printf("Checking skills space '%s'...\n", spaceID) + spaceFolderID, err := u.ensureSkillsSpaceFolder(ctx, spaceID) + if err != nil { + return fmt.Errorf("failed to ensure skills space: %w", err) + } + + // 3. Get or create skill folder + fmt.Printf("Checking skill '%s'...\n", skillName) + skillFolderID, err := u.getOrCreateSkillFolder(ctx, spaceID, spaceFolderID, skillName) + if err != nil { + return err + } + + // 4. Check if version already exists + fmt.Printf("Checking version '%s'...\n", version) + exists, err := u.versionExists(ctx, spaceID, skillName, version) + if err != nil { + return fmt.Errorf("failed to check version: %w", err) + } + if exists { + if u.force { + // Force mode: delete existing version folder + fmt.Printf("Force mode: removing existing version '%s'...\n", version) + versionPath := fmt.Sprintf("skills/%s/%s/%s", spaceID, skillName, version) + if err := u.deleteVersionFolder(ctx, versionPath); err != nil { + return fmt.Errorf("failed to remove existing version: %w", err) + } + fmt.Printf("✓ Existing version '%s' removed\n", version) + } else { + return &SkillConflictError{Type: "version", Name: skillName, Version: version} + } + } + + // 5. Create version folder + fmt.Println("Creating version folder...") + versionFolderID, err := u.createFolder(ctx, skillFolderID, version) + if err != nil { + return fmt.Errorf("failed to create version folder: %w", err) + } + + // 6. Upload all files + fmt.Printf("Uploading %d files...\n", len(files)) + for _, file := range files { + sanitized := sanitizeRelPath(file.Path) + if sanitized == "" || isMacJunkPath(sanitized) || shouldIgnore(sanitized, defaultIgnorePatterns) { + continue + } + + err = u.uploadFile(ctx, file, versionFolderID) + if err != nil { + return fmt.Errorf("failed to upload file %s: %w", file.Path, err) + } + } + + fmt.Printf("✓ Successfully uploaded skill '%s' version %s\n", skillName, version) + + // 7. Index the skill for search + fmt.Println("Indexing skill for search...") + if err := u.indexSkill(ctx, result, files, spaceID, skillFolderID); err != nil { + fmt.Printf("⚠ Warning: Failed to index skill for search: %v\n", err) + } else { + fmt.Println("✓ Skill indexed successfully") + } + + return nil +} + +// ensureSkillsSpaceFolder ensures the 'skills/' folder exists +func (u *SkillUploader) ensureSkillsSpaceFolder(ctx stdctx.Context, spaceID string) (string, error) { + skillsFolderID, err := u.ensureSkillsFolder(ctx) + if err != nil { + return "", err + } + + result, err := u.fileProvider.List(ctx, "skills", nil) + if err != nil { + return "", err + } + + for _, node := range result.Nodes { + if node.Type == NodeTypeDirectory && node.Name == spaceID { + return GetString(node.Metadata["id"]), nil + } + } + + return u.createFolder(ctx, skillsFolderID, spaceID) +} + +// ensureSkillsFolder ensures the 'skills' folder exists +func (u *SkillUploader) ensureSkillsFolder(ctx stdctx.Context) (string, error) { + result, err := u.fileProvider.List(ctx, "", nil) + if err != nil { + return "", err + } + + for _, node := range result.Nodes { + if node.Type == NodeTypeDirectory && node.Name == "skills" { + return GetString(node.Metadata["id"]), nil + } + } + + return u.createFolder(ctx, "", "skills") +} + +// getOrCreateSkillFolder gets existing skill folder or creates new one +func (u *SkillUploader) getOrCreateSkillFolder(ctx stdctx.Context, spaceID, parentID, skillName string) (string, error) { + result, err := u.fileProvider.List(ctx, fmt.Sprintf("skills/%s", spaceID), nil) + if err != nil { + return "", err + } + + for _, node := range result.Nodes { + if node.Type == NodeTypeDirectory && node.Name == skillName { + return GetString(node.Metadata["id"]), nil + } + } + + return u.createFolder(ctx, parentID, skillName) +} + +// versionExists checks if a version already exists +func (u *SkillUploader) versionExists(ctx stdctx.Context, spaceID, skillName, version string) (bool, error) { + result, err := u.fileProvider.List(ctx, fmt.Sprintf("skills/%s/%s", spaceID, skillName), nil) + if err != nil { + return false, err + } + + for _, node := range result.Nodes { + if node.Type == NodeTypeDirectory && node.Name == version { + return true, nil + } + } + return false, nil +} + +// deleteVersionFolder deletes a version folder by path +func (u *SkillUploader) deleteVersionFolder(ctx stdctx.Context, versionPath string) error { + return u.fileProvider.DeleteFolderByPath(ctx, versionPath) +} + +// createFolder creates a new folder and returns its ID +func (u *SkillUploader) createFolder(ctx stdctx.Context, parentID, name string) (string, error) { + payload := map[string]interface{}{ + "name": name, + "type": "folder", + } + if parentID != "" { + payload["parent_id"] = parentID + } + + resp, err := u.client.Request("POST", "/files", true, "auto", nil, payload) + if err != nil { + return "", err + } + + var result struct { + Code int `json:"code"` + Data struct { + ID string `json:"id"` + } `json:"data"` + } + + if err := json.Unmarshal(resp.Body, &result); err != nil { + return "", err + } + + if result.Code != 0 { + return "", fmt.Errorf("server returned error code: %d", result.Code) + } + + return result.Data.ID, nil +} + +// uploadFile uploads a single file using multipart form +func (u *SkillUploader) uploadFile(ctx stdctx.Context, file *SkillFile, parentID string) error { + var buf bytes.Buffer + writer := multipart.NewWriter(&buf) + + if parentID != "" { + writer.WriteField("parent_id", parentID) + } + + part, err := writer.CreateFormFile("file", file.Path) + if err != nil { + return err + } + if _, err := part.Write(file.Content); err != nil { + return err + } + writer.Close() + + return u.client.UploadMultipart("/files", writer.FormDataContentType(), &buf) +} + +// indexSkill indexes the skill for search +func (u *SkillUploader) indexSkill(ctx stdctx.Context, result *SkillValidationResult, files []*SkillFile, spaceID, skillFolderID string) error { + if u.skillProvider == nil { + return fmt.Errorf("skill provider not available") + } + + skillProvider, ok := u.skillProvider.(*SkillProvider) + if !ok { + return fmt.Errorf("invalid skill provider type") + } + + var contentBuilder strings.Builder + for _, file := range files { + if !isTextFile(file.Path, "") { + continue + } + if len(file.Content) > MaxSkillFileSize { + continue + } + sanitized := sanitizeRelPath(file.Path) + if sanitized == "" || isMacJunkPath(sanitized) || shouldIgnore(sanitized, defaultIgnorePatterns) { + continue + } + contentBuilder.WriteString(fmt.Sprintf("\n=== %s ===\n", file.Path)) + contentBuilder.Write(file.Content) + } + content := contentBuilder.String() + + // Use skill name as ID (without version suffix) + // This ensures all versions of the same skill share the same index document + skillID := result.Name + + skillInfo := map[string]interface{}{ + "id": skillID, + "folder_id": skillFolderID, + "name": result.Name, + "description": result.Description, + "tags": result.Tags, + "content": content, + "version": result.Version, + } + + return skillProvider.IndexSkill(ctx, spaceID, skillInfo) +} diff --git a/internal/cli/filesystem/skill_hub/security/guard.go b/internal/cli/filesystem/skill_hub/security/guard.go new file mode 100644 index 0000000000..a57f4a2397 --- /dev/null +++ b/internal/cli/filesystem/skill_hub/security/guard.go @@ -0,0 +1,164 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package security + +import ( + "fmt" + "strings" +) + +// Guard provides security policy enforcement +type Guard struct { + trustedRepos map[string]bool + policy map[string][3]string +} + +// NewGuard creates a new security guard +func NewGuard() *Guard { + return &Guard{ + trustedRepos: TrustedRepos, + policy: InstallPolicy, + } +} + +// extractCanonicalRepo extracts the canonical owner/repo from an identifier +// Supports formats: "owner/repo", "github.com/owner/repo/path", "owner/repo/path" +func extractCanonicalRepo(identifier string) string { + // Normalize the identifier + identifier = strings.TrimSpace(identifier) + identifier = strings.ToLower(identifier) + + // Remove protocol prefix if present + if idx := strings.Index(identifier, "://"); idx != -1 { + identifier = identifier[idx+3:] + } + + // Remove github.com prefix if present + if strings.HasPrefix(identifier, "github.com/") { + identifier = strings.TrimPrefix(identifier, "github.com/") + } + + // Split into parts + parts := strings.Split(identifier, "/") + if len(parts) < 2 { + return "" + } + + // Extract owner and repo (first two components) + owner := strings.TrimSpace(parts[0]) + repo := strings.TrimSpace(parts[1]) + + if owner == "" || repo == "" { + return "" + } + + return owner + "/" + repo +} + +// ResolveTrustLevel determines the trust level based on source and identifier +func (g *Guard) ResolveTrustLevel(source, identifier string) string { + // Official/builtin source + if source == "official" || source == "builtin" { + return "builtin" + } + + // Extract canonical repo key and check against trusted repositories + canonicalRepo := extractCanonicalRepo(identifier) + if canonicalRepo != "" && g.trustedRepos[canonicalRepo] { + return "trusted" + } + + // Default to community + return "community" +} + +// ShouldAllowInstall determines if installation should be allowed based on scan results +// Returns (allowed bool, reason string) +func (g *Guard) ShouldAllowInstall(result *ScanResult, force bool) (bool, string) { + policy, ok := g.policy[result.TrustLevel] + if !ok { + policy = g.policy["community"] + } + + vi, ok := VerdictIndex[result.Verdict] + if !ok { + vi = 2 // dangerous + } + + decision := policy[vi] + + switch decision { + case "allow": + return true, fmt.Sprintf("Allowed (%s source, %s verdict)", result.TrustLevel, result.Verdict) + case "ask": + return false, fmt.Sprintf("Requires confirmation (%s source + %s verdict, %d findings)", + result.TrustLevel, result.Verdict, len(result.Findings)) + case "block": + if force { + return true, fmt.Sprintf("Force-installed despite %s verdict (%d findings)", + result.Verdict, len(result.Findings)) + } + return false, fmt.Sprintf("Blocked (%s source + %s verdict, %d findings). Use --force to override.", + result.TrustLevel, result.Verdict, len(result.Findings)) + } + + return false, "Unknown policy decision" +} + +// FormatScanReport formats a scan result for display +func (g *Guard) FormatScanReport(result *ScanResult) string { + var sb strings.Builder + + sb.WriteString("╔════════════════════════════════════════════════════════════════╗\n") + sb.WriteString(fmt.Sprintf("║ Security Scan Report: %-40s ║\n", result.SkillName)) + sb.WriteString("╚════════════════════════════════════════════════════════════════╝\n") + sb.WriteString(fmt.Sprintf("Source: %s\n", result.Source)) + sb.WriteString(fmt.Sprintf("Trust Level: %s\n", result.TrustLevel)) + sb.WriteString(fmt.Sprintf("Verdict: %s\n", result.Verdict)) + sb.WriteString(fmt.Sprintf("Findings: %d\n", len(result.Findings))) + + if len(result.Findings) > 0 { + sb.WriteString("\n─── Findings ───\n") + + // Group by severity + severityOrder := []string{"critical", "high", "medium", "low"} + for _, sev := range severityOrder { + for _, f := range result.Findings { + if f.Severity == sev { + sb.WriteString(fmt.Sprintf("\n[%s] %s\n", strings.ToUpper(sev), f.PatternID)) + sb.WriteString(fmt.Sprintf(" Category: %s\n", f.Category)) + sb.WriteString(fmt.Sprintf(" File: %s:%d\n", f.File, f.Line)) + sb.WriteString(fmt.Sprintf(" Match: %s\n", f.Match)) + sb.WriteString(fmt.Sprintf(" Description: %s\n", f.Description)) + } + } + } + } + + sb.WriteString("\n") + return sb.String() +} + +// AddTrustedRepo adds a repository to the trusted list +func (g *Guard) AddTrustedRepo(repo string) { + g.trustedRepos[repo] = true +} + +// IsTrustedRepo checks if a repository is trusted +func (g *Guard) IsTrustedRepo(repo string) bool { + return g.trustedRepos[repo] +} diff --git a/internal/cli/filesystem/skill_hub/security/patterns.go b/internal/cli/filesystem/skill_hub/security/patterns.go new file mode 100644 index 0000000000..54e585f375 --- /dev/null +++ b/internal/cli/filesystem/skill_hub/security/patterns.go @@ -0,0 +1,284 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package security + +// ThreatPattern represents a security threat detection pattern +// Inspired by hermes-agent's skills_guard.py + type ThreatPattern struct { + Pattern string // Regular expression pattern + PatternID string // Unique identifier for this pattern + Severity string // critical | high | medium | low + Category string // exfiltration | injection | destructive | persistence | network | obfuscation + Description string // Human-readable description +} + +// ThreatPatterns contains all security threat detection rules +var ThreatPatterns = []ThreatPattern{ + // ========== Data Exfiltration ========== + { + Pattern: `curl\s+[^\n]*\$\{?\w*(KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|API)`, + PatternID: "env_exfil_curl", + Severity: "critical", + Category: "exfiltration", + Description: "curl command interpolating secret environment variable", + }, + { + Pattern: `wget\s+[^\n]*\$\{?\w*(KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|API)`, + PatternID: "env_exfil_wget", + Severity: "critical", + Category: "exfiltration", + Description: "wget command interpolating secret environment variable", + }, + { + Pattern: `\$HOME/\.ssh|\~/\.ssh`, + PatternID: "ssh_dir_access", + Severity: "high", + Category: "exfiltration", + Description: "references user SSH directory", + }, + { + Pattern: `os\.environ\b`, + PatternID: "python_os_environ", + Severity: "high", + Category: "exfiltration", + Description: "accesses os.environ (potential env dump)", + }, + { + Pattern: `printenv|env\s*\|`, + PatternID: "dump_all_env", + Severity: "high", + Category: "exfiltration", + Description: "dumps all environment variables", + }, + + // ========== Prompt Injection ========== + { + Pattern: `(?i)ignore\s+(?:\w+\s+)*(previous|all|above|prior)\s+instructions`, + PatternID: "prompt_injection_ignore", + Severity: "critical", + Category: "injection", + Description: "prompt injection: ignore previous instructions", + }, + { + Pattern: `(?i)\bDAN\s+mode\b|Do\s+Anything\s+Now`, + PatternID: "jailbreak_dan", + Severity: "critical", + Category: "injection", + Description: "DAN (Do Anything Now) jailbreak attempt", + }, + { + Pattern: `(?i)you\s+are\s+(?:\w+\s+)*now\s+`, + PatternID: "role_hijack", + Severity: "high", + Category: "injection", + Description: "attempts to override the agent's role", + }, + { + Pattern: `(?i)system\s+prompt\s+override`, + PatternID: "sys_prompt_override", + Severity: "critical", + Category: "injection", + Description: "attempts to override the system prompt", + }, + { + Pattern: `(?i)disregard\s+(?:\w+\s+)*(your|all|any)\s+(?:\w+\s+)*(instructions|rules|guidelines)`, + PatternID: "disregard_rules", + Severity: "critical", + Category: "injection", + Description: "instructs agent to disregard its rules", + }, + + // ========== Destructive Operations ========== + { + Pattern: `rm\s+-rf\s+/`, + PatternID: "destructive_root_rm", + Severity: "critical", + Category: "destructive", + Description: "recursive delete from root", + }, + { + Pattern: `rm\s+(-[^\s]*)?r.*\$HOME|\brmdir\s+.*\$HOME`, + PatternID: "destructive_home_rm", + Severity: "critical", + Category: "destructive", + Description: "recursive delete targeting home directory", + }, + { + Pattern: `\bmkfs\b`, + PatternID: "format_filesystem", + Severity: "critical", + Category: "destructive", + Description: "formats a filesystem", + }, + { + Pattern: `\bdd\s+.*if=.*of=/dev/`, + PatternID: "disk_overwrite", + Severity: "critical", + Category: "destructive", + Description: "raw disk write operation", + }, + { + Pattern: `shutil\.rmtree\s*\(\s*["\'/]`, + PatternID: "python_rmtree", + Severity: "high", + Category: "destructive", + Description: "Python rmtree on absolute or root-relative path", + }, + { + Pattern: `rm\s+(-[a-zA-Z]*r[a-zA-Z]*\s+|--)recursive\s+).*\$`, + PatternID: "rm_recursive_dangerous", + Severity: "high", + Category: "destructive", + Description: "recursive rm with suspicious target", + }, + + // ========== Persistence ========== + { + Pattern: `\bcrontab\b`, + PatternID: "persistence_cron", + Severity: "medium", + Category: "persistence", + Description: "modifies cron jobs", + }, + { + Pattern: `\.(bashrc|zshrc|profile|bash_profile|bash_login|zprofile|zlogin)\b`, + PatternID: "shell_rc_mod", + Severity: "medium", + Category: "persistence", + Description: "references shell startup file", + }, + { + Pattern: `authorized_keys`, + PatternID: "ssh_backdoor", + Severity: "critical", + Category: "persistence", + Description: "modifies SSH authorized keys", + }, + { + Pattern: `AGENTS\.md|CLAUDE\.md|\.cursorrules|\.clinerules`, + PatternID: "agent_config_mod", + Severity: "critical", + Category: "persistence", + Description: "references agent config files (could persist malicious instructions)", + }, + { + Pattern: `\.ssh/config`, + PatternID: "ssh_config_mod", + Severity: "high", + Category: "persistence", + Description: "modifies SSH configuration", + }, + + // ========== Network Threats ========== + { + Pattern: `\bnc\s+-[lp]|ncat\s+-[lp]|\bsocat\b`, + PatternID: "reverse_shell", + Severity: "critical", + Category: "network", + Description: "potential reverse shell listener", + }, + { + Pattern: `/bin/(ba)?sh\s+-i\s+.*>/dev/tcp/`, + PatternID: "bash_reverse_shell", + Severity: "critical", + Category: "network", + Description: "bash interactive reverse shell via /dev/tcp", + }, + { + Pattern: `\bngrok\b|\blocaltunnel\b|\bserveo\b|\bcloudflared\b`, + PatternID: "tunnel_service", + Severity: "high", + Category: "network", + Description: "uses tunneling service for external access", + }, + { + Pattern: `webhook\.site|requestbin\.com|pipedream\.net|hookbin\.com`, + PatternID: "exfil_service", + Severity: "high", + Category: "network", + Description: "references known data exfiltration/webhook testing service", + }, + { + Pattern: `python\s+-c\s+.*socket.*subprocess`, + PatternID: "python_reverse_shell", + Severity: "critical", + Category: "network", + Description: "Python reverse shell pattern", + }, + + // ========== Obfuscation ========== + { + Pattern: `base64\s+(-d|--decode)\s*\|`, + PatternID: "base64_decode_pipe", + Severity: "high", + Category: "obfuscation", + Description: "base64 decodes and pipes to execution", + }, + { + Pattern: `\beval\s*\(\s*["\']`, + PatternID: "eval_string", + Severity: "high", + Category: "obfuscation", + Description: "eval() with string argument", + }, + { + Pattern: `echo\s+[^\n]*\|\s*(bash|sh|python|perl|ruby|node)`, + PatternID: "echo_pipe_exec", + Severity: "critical", + Category: "obfuscation", + Description: "echo piped to interpreter for execution", + }, + { + Pattern: `curl\s+[^\n]*\|\s*(ba)?sh`, + PatternID: "curl_pipe_shell", + Severity: "critical", + Category: "supply_chain", + Description: "curl piped to shell (download-and-execute)", + }, + { + Pattern: `\bexec\s*\(\s*(base64|decode|unescape)`, + PatternID: "exec_encoded", + Severity: "high", + Category: "obfuscation", + Description: "executes encoded content", + }, +} + +// TrustedRepos contains the list of trusted repositories +// These repos have a higher trust level +var TrustedRepos = map[string]bool{ + "openai/skills": true, + "anthropics/skills": true, + "microsoft/skills": true, + "google/skills": true, +} + +// InstallPolicy defines the installation policy for each trust level +// Format: [safe, caution, dangerous] -> action +// Actions: allow, block, ask +var InstallPolicy = map[string][3]string{ + "builtin": {"allow", "allow", "allow"}, // Official skills: always allow + "trusted": {"allow", "allow", "block"}, // Trusted repos: caution allowed, dangerous blocked + "community": {"allow", "block", "block"}, // Community: only safe allowed +} + +// VerdictIndex maps verdict to array index +var VerdictIndex = map[string]int{ + "safe": 0, + "caution": 1, + "dangerous": 2, +} diff --git a/internal/cli/filesystem/skill_hub/security/scanner.go b/internal/cli/filesystem/skill_hub/security/scanner.go new file mode 100644 index 0000000000..6252f176ce --- /dev/null +++ b/internal/cli/filesystem/skill_hub/security/scanner.go @@ -0,0 +1,150 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package security + +import ( + "regexp" + "strings" +) + +// Finding represents a security issue found during scanning +type Finding struct { + PatternID string // Rule ID + Severity string // critical | high | medium | low + Category string // exfiltration | injection | destructive | persistence | network | obfuscation + File string // File path where found + Line int // Line number + Match string // The matched text + Description string // Human-readable description +} + +// ScanResult represents the result of a security scan +type ScanResult struct { + SkillName string + Source string + TrustLevel string // builtin | trusted | community + Verdict string // safe | caution | dangerous + Findings []Finding +} + +// Scanner performs security scans on skill content +type Scanner struct { + patterns []ThreatPattern +} + +// NewScanner creates a new security scanner +func NewScanner() *Scanner { + return &Scanner{ + patterns: ThreatPatterns, + } +} + +// ScanSkill scans skill files for security threats +func (s *Scanner) ScanSkill(skillName, source, trustLevel string, files map[string][]byte) *ScanResult { + var allFindings []Finding + + for filename, content := range files { + findings := s.scanFile(filename, string(content)) + allFindings = append(allFindings, findings...) + } + + verdict := s.determineVerdict(allFindings) + + return &ScanResult{ + SkillName: skillName, + Source: source, + TrustLevel: trustLevel, + Verdict: verdict, + Findings: allFindings, + } +} + +// scanFile scans a single file for threats +func (s *Scanner) scanFile(filename, content string) []Finding { + var findings []Finding + lines := strings.Split(content, "\n") + + for _, pattern := range s.patterns { + re, err := regexp.Compile("(?i:" + pattern.Pattern + ")") + if err != nil { + continue + } + + for i, line := range lines { + if matches := re.FindString(line); matches != "" { + findings = append(findings, Finding{ + PatternID: pattern.PatternID, + Severity: pattern.Severity, + Category: pattern.Category, + File: filename, + Line: i + 1, + Match: strings.TrimSpace(matches), + Description: pattern.Description, + }) + } + } + } + + return findings +} + +// determineVerdict determines the overall verdict based on findings +func (s *Scanner) determineVerdict(findings []Finding) string { + if len(findings) == 0 { + return "safe" + } + + hasCritical := false + hasHigh := false + + for _, f := range findings { + if f.Severity == "critical" { + hasCritical = true + } else if f.Severity == "high" { + hasHigh = true + } + } + + if hasCritical { + return "dangerous" + } + if hasHigh { + return "caution" + } + return "caution" +} + +// HasCriticalChecks if any finding is critical severity +func (r *ScanResult) HasCritical() bool { + for _, f := range r.Findings { + if f.Severity == "critical" { + return true + } + } + return false +} + +// CountBySeverity counts findings by severity level +func (r *ScanResult) CountBySeverity(severity string) int { + count := 0 + for _, f := range r.Findings { + if f.Severity == severity { + count++ + } + } + return count +} diff --git a/internal/cli/filesystem/skill_hub/source/clawhub.go b/internal/cli/filesystem/skill_hub/source/clawhub.go new file mode 100644 index 0000000000..8ec933408f --- /dev/null +++ b/internal/cli/filesystem/skill_hub/source/clawhub.go @@ -0,0 +1,933 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package source + +import ( + "archive/zip" + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "path/filepath" + "regexp" + "sort" + "strconv" + "strings" + "time" +) + +// progressLogger is a simple logger for user-facing progress messages +type progressLogger struct { + enabled bool +} + +func (l *progressLogger) log(format string, args ...interface{}) { + if l.enabled { + fmt.Printf(" → "+format+"\n", args...) + } +} + +func (l *progressLogger) error(format string, args ...interface{}) { + fmt.Printf(" ✗ "+format+"\n", args...) +} + +func (l *progressLogger) success(format string, args ...interface{}) { + fmt.Printf(" ✓ "+format+"\n", args...) +} + +const ( + clawHubBaseURL = "https://clawhub.ai/api/v1" +) + +// ClawHubSource handles ClawHub registry skills +// Reference implementation: hermes-agent/tools/skills_hub.py ClawHubSource +// All skills are treated as community trust — ClawHavoc incident showed +// their vetting is insufficient (341 malicious skills found Feb 2026). +type ClawHubSource struct { + client HTTPClientInterface + logger progressLogger +} + +// NewClawHubSource creates a new ClawHub source adapter +func NewClawHubSource(client HTTPClientInterface) *ClawHubSource { + return &ClawHubSource{client: client, logger: progressLogger{enabled: true}} +} + +// SourceID returns the source identifier +func (s *ClawHubSource) SourceID() string { + return "clawhub" +} + +// TrustLevel returns the trust level for ClawHub +func (s *ClawHubSource) TrustLevel(identifier string) string { + // ClawHub has community verification + return "community" +} + +// Search searches for skills on ClawHub matching the query +func (s *ClawHubSource) Search(query string, limit int) ([]*SkillMetadata, error) { + if limit <= 0 { + limit = 10 + } + + // Try direct slug match first for exact queries + if query != "" && len(query) >= 2 { + meta, err := s.exactSlugMeta(query) + if err == nil && meta != nil { + return []*SkillMetadata{meta}, nil + } + } + + // Use the lightweight listing API + url := fmt.Sprintf("%s/skills", clawHubBaseURL) + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } + + q := req.URL.Query() + if query != "" { + q.Add("search", query) + } + q.Add("limit", strconv.Itoa(limit)) + req.URL.RawQuery = q.Encode() + + resp, err := s.client.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to search ClawHub: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("ClawHub API returned %d", resp.StatusCode) + } + + var data struct { + Items []struct { + Slug string `json:"slug"` + DisplayName string `json:"displayName"` + Name string `json:"name"` + Summary string `json:"summary"` + Description string `json:"description"` + Tags interface{} `json:"tags"` + } `json:"items"` + } + if err := json.NewDecoder(resp.Body).Decode(&data); err != nil { + return nil, err + } + + results := make([]*SkillMetadata, 0, len(data.Items)) + for _, item := range data.Items { + slug := item.Slug + if slug == "" { + continue + } + displayName := item.DisplayName + if displayName == "" { + displayName = item.Name + } + if displayName == "" { + displayName = slug + } + summary := item.Summary + if summary == "" { + summary = item.Description + } + + results = append(results, &SkillMetadata{ + Name: displayName, + Description: summary, + Version: "", + Author: "", + Tags: normalizeTags(item.Tags), + }) + } + + // Apply search scoring and filtering + results = s.finalizeSearchResults(query, results, limit) + return results, nil +} + +// Fetch retrieves a skill from ClawHub +// Downloads the skill as a ZIP bundle and extracts text files +// Supports identifier with version: "slug@version" or just "slug" (uses latest) +func (s *ClawHubSource) Fetch(identifier string) (*SkillBundle, error) { + slug, specifiedVersion := extractSlugAndVersion(identifier) + s.logger.log("Looking up skill '%s' on ClawHub...", slug) + + // Fetch skill metadata + skillData, err := s.getSkillData(slug) + if err != nil { + s.logger.error("Cannot find skill '%s' on ClawHub: %v", slug, err) + return nil, fmt.Errorf("skill '%s' not found on ClawHub: %w", slug, err) + } + s.logger.success("Found skill: %s", skillData.DisplayName) + + // Determine version to download + var version string + if specifiedVersion != "" { + version = specifiedVersion + s.logger.log("Using specified version: %s", version) + } else { + // Resolve the latest version + s.logger.log("Resolving latest version...") + version, err = s.resolveLatestVersion(slug, skillData) + if err != nil { + s.logger.error("Cannot determine version for '%s': %v", slug, err) + return nil, fmt.Errorf("could not resolve latest version for %s: %w", slug, err) + } + if version == "" { + s.logger.error("No versions available for skill '%s'", slug) + return nil, fmt.Errorf("no version found for skill %s", slug) + } + s.logger.success("Latest version: %s", version) + } + + // Try to get files from version metadata endpoint first (avoids rate-limited /download) + var files map[string][]byte + s.logger.log("Fetching skill files (version %s)...", version) + versionData, err := s.getVersionData(slug, version) + if err == nil { + files = s.extractFiles(versionData) + if len(files) > 0 { + s.logger.success("Fetched %d files from metadata", len(files)) + } + } + + // Fallback to ZIP download if metadata method didn't return files + if len(files) == 0 { + s.logger.log("Trying ZIP download...") + // Add delay before download to avoid rate limit + time.Sleep(3 * time.Second) + zipFiles, err2 := s.downloadZip(slug, version) + if err2 != nil { + s.logger.error("Failed to download skill bundle: %v", err2) + return nil, fmt.Errorf("failed to download skill '%s': %w", slug, err2) + } + files = zipFiles + s.logger.success("Downloaded %d files via ZIP", len(files)) + } + + // Validate: must have SKILL.md + if _, ok := files["SKILL.md"]; !ok { + s.logger.error("Downloaded bundle is missing SKILL.md (required file)") + return nil, fmt.Errorf("SKILL.md not found in skill %s (version %s)", slug, version) + } + + return &SkillBundle{ + Name: slug, + Files: files, + Source: "clawhub", + Identifier: slug, + TrustLevel: s.TrustLevel(identifier), + Metadata: &SkillMetadata{ + Name: skillData.DisplayName, + Description: skillData.Summary, + Version: version, + }, + }, nil +} + +// Inspect retrieves metadata from ClawHub without downloading full content +func (s *ClawHubSource) Inspect(identifier string) (*SkillMetadata, error) { + slug := extractSlug(identifier) + + skillData, err := s.getSkillData(slug) + if err != nil { + return nil, err + } + + return &SkillMetadata{ + Name: skillData.DisplayName, + Description: skillData.Summary, + Version: "", + Author: "", + Tags: normalizeTags(skillData.Tags), + }, nil +} + +// getSkillData fetches skill metadata from ClawHub API with retry logic +func (s *ClawHubSource) getSkillData(slug string) (*clawHubSkillData, error) { + url := fmt.Sprintf("%s/skills/%s", clawHubBaseURL, slug) + + body, err := s.doRequestWithRetry("GET", url, nil) + if err != nil { + return nil, err + } + + // ClawHub API may return nested structure: {"skill": {...}, "latestVersion": ...} + var rawData map[string]interface{} + if err := json.Unmarshal(body, &rawData); err != nil { + return nil, err + } + + return coerceSkillPayload(rawData), nil +} + +// getVersionData fetches version-specific metadata with retry logic +func (s *ClawHubSource) getVersionData(slug, version string) (map[string]interface{}, error) { + url := fmt.Sprintf("%s/skills/%s/versions/%s", clawHubBaseURL, slug, version) + + body, err := s.doRequestWithRetry("GET", url, nil) + if err != nil { + return nil, err + } + + var data map[string]interface{} + if err := json.Unmarshal(body, &data); err != nil { + return nil, err + } + return data, nil +} + +// resolveLatestVersion extracts the latest version from skill data with retry logic +func (s *ClawHubSource) resolveLatestVersion(slug string, skillData *clawHubSkillData) (string, error) { + // Try latestVersion field first + if skillData.LatestVersion != "" { + return skillData.LatestVersion, nil + } + + // Try tags.latest + if skillData.TagsLatest != "" { + return skillData.TagsLatest, nil + } + + // Fallback: fetch versions list and take first + url := fmt.Sprintf("%s/skills/%s/versions", clawHubBaseURL, slug) + + body, err := s.doRequestWithRetry("GET", url, nil) + if err != nil { + return "", err + } + + var versions []struct { + Version string `json:"version"` + } + if err := json.Unmarshal(body, &versions); err != nil { + return "", err + } + + if len(versions) > 0 && versions[0].Version != "" { + return versions[0].Version, nil + } + + return "", nil +} + +// downloadZip downloads skill as ZIP bundle and extracts text files +func (s *ClawHubSource) downloadZip(slug, version string) (map[string][]byte, error) { + // Use the correct endpoint with slug parameter (matching hermes-agent) + url := fmt.Sprintf("%s/download?slug=%s&version=%s", clawHubBaseURL, slug, version) + s.logger.log("Downloading ZIP from: %s", url) + + body, err := s.doRequestWithRetry("GET", url, nil) + if err != nil { + return nil, fmt.Errorf("download failed: %w", err) + } + + s.logger.log("Downloaded %d bytes, extracting files...", len(body)) + + // Extract ZIP + zipReader, err := zip.NewReader(bytes.NewReader(body), int64(len(body))) + if err != nil { + s.logger.error("Downloaded file is not a valid ZIP archive: %v", err) + return nil, fmt.Errorf("invalid ZIP file: %w", err) + } + + files := make(map[string][]byte) + skippedCount := 0 + for _, file := range zipReader.File { + if file.FileInfo().IsDir() { + continue + } + + // Validate path for safety + name := file.Name + if !isSafePath(name) { + skippedCount++ + continue + } + + // Skip large files (>500KB) + if file.UncompressedSize64 > 500_000 { + skippedCount++ + s.logger.log("Skipping large file: %s (%.1f MB)", name, float64(file.UncompressedSize64)/1024/1024) + continue + } + + // Read file content + rc, err := file.Open() + if err != nil { + skippedCount++ + continue + } + content, err := io.ReadAll(rc) + rc.Close() + if err != nil { + skippedCount++ + continue + } + + // Only include text files (check for null bytes indicating binary) + if isTextContent(content) { + files[name] = content + } else { + skippedCount++ + s.logger.log("Skipping binary file: %s", name) + } + } + + if skippedCount > 0 { + s.logger.log("Skipped %d files (unsafe paths, large files, or binary content)", skippedCount) + } + + if len(files) == 0 { + s.logger.error("No valid files found in the ZIP archive") + return nil, fmt.Errorf("no valid files extracted from ZIP") + } + + return files, nil +} + +// extractFiles extracts files from version data structure +func (s *ClawHubSource) extractFiles(versionData map[string]interface{}) map[string][]byte { + files := make(map[string][]byte) + + // Check for nested version -> files structure + if nested, ok := versionData["version"].(map[string]interface{}); ok { + versionData = nested + } + + fileList, ok := versionData["files"] + if !ok { + return files + } + + // Handle map structure: {"filename": "content"} + if fileMap, ok := fileList.(map[string]interface{}); ok { + for name, content := range fileMap { + if s, ok := content.(string); ok && isSafePath(name) { + files[name] = []byte(s) + } + } + return files + } + + // Handle array structure with file metadata + if fileArray, ok := fileList.([]interface{}); ok { + for _, item := range fileArray { + fileMeta, ok := item.(map[string]interface{}) + if !ok { + continue + } + + name := "" + if n, ok := fileMeta["path"].(string); ok && n != "" { + name = n + } else if n, ok := fileMeta["name"].(string); ok && n != "" { + name = n + } + if name == "" || !isSafePath(name) { + continue + } + + // Try inline content first + if content, ok := fileMeta["content"].(string); ok { + files[name] = []byte(content) + continue + } + + // Try rawUrl/downloadUrl + var url string + if u, ok := fileMeta["rawUrl"].(string); ok && u != "" { + url = u + } else if u, ok := fileMeta["downloadUrl"].(string); ok && u != "" { + url = u + } else if u, ok := fileMeta["url"].(string); ok && u != "" { + url = u + } + + if url != "" && strings.HasPrefix(url, "http") { + content, err := s.fetchText(url) + if err == nil { + files[name] = []byte(content) + } + } + } + } + + return files +} + +// fetchText fetches text content from URL +func (s *ClawHubSource) fetchText(url string) (string, error) { + resp, err := s.client.Get(url) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("HTTP %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + return string(body), nil +} + +// doRequestWithRetry performs HTTP request with retry logic for 429 rate limiting +func (s *ClawHubSource) doRequestWithRetry(method, url string, body []byte) ([]byte, error) { + maxRetries := 5 + var lastErr error + isDownload := strings.Contains(url, "/download") + + for attempt := 0; attempt < maxRetries; attempt++ { + // Initial delay for download requests to avoid triggering rate limit + if attempt == 0 && isDownload { + s.logger.log("Adding initial delay for download request...") + time.Sleep(5 * time.Second) + } + + var bodyReader io.Reader + if body != nil { + bodyReader = bytes.NewReader(body) + } + + req, err := http.NewRequest(method, url, bodyReader) + if err != nil { + lastErr = fmt.Errorf("failed to create request: %w", err) + s.logger.error("Request setup failed: %v", lastErr) + continue + } + + // Simple headers like hermes-agent + req.Header.Set("User-Agent", "RAGFlow-CLI/1.0") + req.Header.Set("Accept", "application/json") + + resp, err := s.client.Do(req) + if err != nil { + lastErr = err + if attempt < maxRetries-1 { + s.logger.error("Request failed (attempt %d/%d): %v", attempt+1, maxRetries, err) + } + continue + } + + // Read response body immediately + respBody, err := io.ReadAll(resp.Body) + resp.Body.Close() + + if err != nil { + lastErr = fmt.Errorf("failed to read response: %w", err) + if attempt < maxRetries-1 { + s.logger.error("Response read failed (attempt %d/%d): %v", attempt+1, maxRetries, err) + } + continue + } + + // Handle rate limiting - ClawHub has strict limits, wait 30-60s to reset window + if resp.StatusCode == http.StatusTooManyRequests { + retryAfter := resp.Header.Get("Retry-After") + waitSeconds := 30 // Default: wait 30 seconds + if retryAfter != "" { + if seconds, err := strconv.Atoi(retryAfter); err == nil && seconds > 0 { + waitSeconds = seconds + } + } + // Ensure minimum 30s wait to reset rate limit window + if waitSeconds < 30 { + waitSeconds = 30 + } + // Cap at 60 seconds + if waitSeconds > 60 { + waitSeconds = 60 + } + s.logger.log("Rate limited by ClawHub, waiting %d seconds...", waitSeconds) + time.Sleep(time.Duration(waitSeconds) * time.Second) + lastErr = fmt.Errorf("rate limited (429)") + continue + } + + if resp.StatusCode == http.StatusNotFound { + lastErr = fmt.Errorf("skill not found (HTTP 404)") + s.logger.error("%v", lastErr) + return nil, lastErr // Don't retry 404 + } + + if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden { + lastErr = fmt.Errorf("access denied (HTTP %d) - check your credentials", resp.StatusCode) + s.logger.error("%v", lastErr) + return nil, lastErr // Don't retry auth errors + } + + if resp.StatusCode != http.StatusOK { + lastErr = fmt.Errorf("ClawHub API returned HTTP %d", resp.StatusCode) + if attempt < maxRetries-1 { + s.logger.error("Server error (attempt %d/%d): HTTP %d", attempt+1, maxRetries, resp.StatusCode) + } + continue + } + + return respBody, nil + } + + // Provide helpful error message based on the last error + var userMsg string + if lastErr != nil { + errStr := lastErr.Error() + switch { + case strings.Contains(errStr, "connection refused"): + userMsg = "Cannot connect to ClawHub - the service may be down or your network is blocking the connection" + case strings.Contains(errStr, "timeout") || strings.Contains(errStr, "deadline exceeded"): + userMsg = "Connection to ClawHub timed out - your network may be slow or the service is unresponsive" + case strings.Contains(errStr, "no such host") || strings.Contains(errStr, "DNS"): + userMsg = "Cannot resolve ClawHub hostname - check your internet connection or DNS settings" + case strings.Contains(errStr, "certificate"): + userMsg = "SSL certificate error - your system may have outdated certificates or someone is intercepting the connection" + default: + userMsg = fmt.Sprintf("Network error after %d attempts: %v", maxRetries, lastErr) + } + } else { + userMsg = fmt.Sprintf("Failed after %d attempts - unknown error", maxRetries) + } + + return nil, fmt.Errorf("%s", userMsg) +} + +// exactSlugMeta tries to find skill by exact slug match +func (s *ClawHubSource) exactSlugMeta(query string) (*SkillMetadata, error) { + slug := extractSlug(query) + queryTermList := extractQueryTerms(query) + + candidates := []string{} + + // If slug looks valid, add it + if slug != "" && regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9._-]*$`).MatchString(slug) { + candidates = append(candidates, slug) + } + + // Generate variations from query terms + if len(queryTermList) > 0 { + baseSlug := strings.Join(queryTermList, "-") + if len(queryTermList) >= 2 { + candidates = append(candidates, + baseSlug+"-agent", + baseSlug+"-skill", + baseSlug+"-tool", + baseSlug+"-assistant", + baseSlug+"-playbook", + baseSlug, + ) + } else { + candidates = append(candidates, baseSlug) + } + } + + seen := make(map[string]bool) + for _, candidate := range candidates { + if seen[candidate] { + continue + } + seen[candidate] = true + + meta, err := s.Inspect(candidate) + if err == nil && meta != nil && meta.Name != "" { + return meta, nil + } + } + + return nil, fmt.Errorf("no exact match found") +} + +// finalizeSearchResults applies scoring and filtering to search results +func (s *ClawHubSource) finalizeSearchResults(query string, results []*SkillMetadata, limit int) []*SkillMetadata { + if query == "" { + deduped := dedupeResults(results) + if len(deduped) > limit { + return deduped[:limit] + } + return deduped + } + + // Score and filter + filtered := make([]*SkillMetadata, 0) + for _, meta := range results { + if s.searchScore(query, meta) > 0 { + filtered = append(filtered, meta) + } + } + + // Sort by score + sort.Slice(filtered, func(i, j int) bool { + scoreI := s.searchScore(query, filtered[i]) + scoreJ := s.searchScore(query, filtered[j]) + if scoreI != scoreJ { + return scoreI > scoreJ + } + if filtered[i].Name != filtered[j].Name { + return strings.ToLower(filtered[i].Name) < strings.ToLower(filtered[j].Name) + } + return strings.ToLower(filtered[i].Description) < strings.ToLower(filtered[j].Description) + }) + + deduped := dedupeResults(filtered) + if len(deduped) > limit { + return deduped[:limit] + } + return deduped +} + +// searchScore calculates relevance score for a skill against query +func (s *ClawHubSource) searchScore(query string, meta *SkillMetadata) int { + queryNorm := strings.ToLower(strings.TrimSpace(query)) + if queryNorm == "" { + return 1 + } + + nameLower := strings.ToLower(meta.Name) + descLower := strings.ToLower(meta.Description) + + queryTermList := extractQueryTerms(queryNorm) + nameTermList := extractQueryTerms(nameLower) + + score := 0 + + // Exact matches (high scores) + if queryNorm == nameLower { + score += 130 + } + if strings.ReplaceAll(nameLower, " ", "-") == queryNorm { + score += 120 + } + if strings.HasPrefix(nameLower, queryNorm) { + score += 90 + } + + // Query terms match name terms + if len(queryTermList) > 0 && len(nameTermList) >= len(queryTermList) { + match := true + for i, term := range queryTermList { + if i >= len(nameTermList) || nameTermList[i] != term { + match = false + break + } + } + if match { + score += 65 + } + } + + // Substring matches + if strings.Contains(nameLower, queryNorm) { + score += 35 + } + if strings.Contains(descLower, queryNorm) { + score += 10 + } + + // Individual term matches + for _, term := range queryTermList { + if strings.Contains(nameLower, term) { + score += 12 + } + if strings.Contains(descLower, term) { + score += 3 + } + } + + return score +} + +// Helper types and functions + +// clawHubSkillData represents ClawHub skill API response +type clawHubSkillData struct { + Slug string `json:"slug"` + DisplayName string `json:"displayName"` + Name string `json:"name"` + Summary string `json:"summary"` + Description string `json:"description"` + Tags interface{} `json:"tags"` + LatestVersion string `json:"latestVersion"` + TagsLatest string `json:"tags_latest"` // Extracted from tags dict +} + +// coerceSkillPayload handles nested ClawHub API response structures +// ClawHub API may return: {"skill": {...}, "latestVersion": ...} or flat structure +func coerceSkillPayload(data map[string]interface{}) *clawHubSkillData { + result := &clawHubSkillData{} + + // Check for nested skill structure + nested, hasNested := data["skill"].(map[string]interface{}) + if hasNested { + // Merge nested skill data + for k, v := range nested { + data[k] = v + } + // Keep latestVersion from outer if present + if lv, ok := data["latestVersion"].(string); ok && lv != "" { + result.LatestVersion = lv + } + } + + // Extract fields + if v, ok := data["slug"].(string); ok { + result.Slug = v + } + if v, ok := data["displayName"].(string); ok { + result.DisplayName = v + } + if v, ok := data["name"].(string); ok && result.DisplayName == "" { + result.DisplayName = v + } + if v, ok := data["summary"].(string); ok { + result.Summary = v + } + if v, ok := data["description"].(string); ok && result.Summary == "" { + result.Summary = v + } + if v, ok := data["tags"]; ok { + result.Tags = v + // Extract latest from tags dict + if tagMap, ok := v.(map[string]interface{}); ok { + if latest, ok := tagMap["latest"].(string); ok { + result.TagsLatest = latest + } + } + } + + return result +} + +// extractSlug extracts the skill slug from identifier +func extractSlug(identifier string) string { + parts := strings.Split(identifier, "/") + return parts[len(parts)-1] +} + +// extractSlugAndVersion extracts the skill slug and optional version from identifier +// Supports formats: "slug", "slug@version", "owner/slug", "owner/slug@version" +func extractSlugAndVersion(identifier string) (slug, version string) { + // First get the last part (handles owner/slug format) + parts := strings.Split(identifier, "/") + lastPart := parts[len(parts)-1] + + // Check for version separator @ + if idx := strings.LastIndex(lastPart, "@"); idx > 0 { + return lastPart[:idx], lastPart[idx+1:] + } + + return lastPart, "" +} + +// normalizeTags normalizes tags from various formats +func normalizeTags(tags interface{}) []string { + result := []string{} + + switch v := tags.(type) { + case []interface{}: + for _, t := range v { + if s, ok := t.(string); ok && s != "" && s != "latest" { + result = append(result, s) + } + } + case []string: + for _, s := range v { + if s != "" && s != "latest" { + result = append(result, s) + } + } + case map[string]interface{}: + for k := range v { + if k != "" && k != "latest" { + result = append(result, k) + } + } + } + + return result +} + +// dedupeResults removes duplicate skills by name, keeping first occurrence +func dedupeResults(results []*SkillMetadata) []*SkillMetadata { + seen := make(map[string]bool) + unique := []*SkillMetadata{} + for _, r := range results { + key := strings.ToLower(r.Name) + if !seen[key] { + seen[key] = true + unique = append(unique, r) + } + } + return unique +} + +// extractQueryTerms splits query into normalized terms +func extractQueryTerms(query string) []string { + re := regexp.MustCompile(`[^a-z0-9]+`) + parts := re.Split(strings.ToLower(query), -1) + result := []string{} + for _, p := range parts { + if p != "" { + result = append(result, p) + } + } + return result +} + +// isSafePath validates that a path is safe (no directory traversal) +func isSafePath(path string) bool { + // Clean the path + clean := filepath.Clean(path) + + // Check for absolute paths + if filepath.IsAbs(clean) { + return false + } + + // Check for parent directory references + parts := strings.Split(clean, string(filepath.Separator)) + for _, part := range parts { + if part == ".." { + return false + } + } + + return true +} + +// isTextContent checks if content appears to be text (not binary) +func isTextContent(data []byte) bool { + // Check for null bytes (indicates binary) + for _, b := range data { + if b == 0 { + return false + } + } + return true +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} diff --git a/internal/cli/filesystem/skill_hub/source/github.go b/internal/cli/filesystem/skill_hub/source/github.go new file mode 100644 index 0000000000..c2e2e3060e --- /dev/null +++ b/internal/cli/filesystem/skill_hub/source/github.go @@ -0,0 +1,260 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package source + +import ( + "encoding/base64" + "encoding/json" + "fmt" + "io" + "net/http" + "path" + "strings" +) + +// GitHubSource handles GitHub repository skills +type GitHubSource struct { + client HTTPClientInterface +} + +// NewGitHubSource creates a new GitHub source adapter +func NewGitHubSource(client HTTPClientInterface) *GitHubSource { + return &GitHubSource{client: client} +} + +// SourceID returns the source identifier +func (s *GitHubSource) SourceID() string { + return "github" +} + +// TrustLevel returns the trust level based on repository +func (s *GitHubSource) TrustLevel(identifier string) string { + owner, repo, _, err := parseGitHubURL(identifier) + if err != nil { + return "community" + } + if isTrustedGitHubRepo(owner, repo) { + return "trusted" + } + return "community" +} + +// Fetch retrieves a skill from GitHub +func (s *GitHubSource) Fetch(identifier string) (*SkillBundle, error) { + owner, repo, pathStr, err := parseGitHubURL(identifier) + if err != nil { + return nil, err + } + + // Default to repo root if no path specified + if pathStr == "" { + pathStr = "." + } + + // Try to get SKILL.md first to determine skill name + skillName := repo + meta := &SkillMetadata{Version: "1.0.0"} + + skillMdContent, err := s.fetchFileContent(owner, repo, path.Join(pathStr, "SKILL.md")) + if err == nil { + parsedMeta, parseErr := parseSkillFrontmatter(skillMdContent) + if parseErr == nil { + meta = parsedMeta + if meta.Name != "" { + skillName = meta.Name + } + } + // If parsing fails, use default meta and skillName + } + + // Fetch all files in the directory + files, err := s.fetchDirectoryContents(owner, repo, pathStr) + if err != nil { + return nil, fmt.Errorf("failed to fetch directory contents: %w", err) + } + + return &SkillBundle{ + Name: skillName, + Files: files, + Source: "github", + Identifier: identifier, + TrustLevel: s.TrustLevel(identifier), + Metadata: meta, + }, nil +} + +// Inspect retrieves metadata from GitHub +func (s *GitHubSource) Inspect(identifier string) (*SkillMetadata, error) { + owner, repo, pathStr, err := parseGitHubURL(identifier) + if err != nil { + return nil, err + } + + skillMdPath := path.Join(pathStr, "SKILL.md") + content, err := s.fetchFileContent(owner, repo, skillMdPath) + if err != nil { + // Return basic metadata if SKILL.md not found + return &SkillMetadata{ + Name: repo, + Description: fmt.Sprintf("Skill from %s/%s", owner, repo), + Version: "1.0.0", + }, nil + } + + meta, err := parseSkillFrontmatter(content) + if err != nil { + return nil, fmt.Errorf("invalid SKILL.md frontmatter in %s: %w", identifier, err) + } + return meta, nil +} + +// fetchFileContent fetches a single file from GitHub +func (s *GitHubSource) fetchFileContent(owner, repo, filePath string) (string, error) { + var url string + if filePath == "" || filePath == "." { + url = fmt.Sprintf("https://api.github.com/repos/%s/%s/contents", owner, repo) + } else { + url = fmt.Sprintf("https://api.github.com/repos/%s/%s/contents/%s", owner, repo, filePath) + } + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return "", err + } + req.Header.Set("Accept", "application/vnd.github.v3+json") + req.Header.Set("User-Agent", "ragflow-cli") + + resp, err := s.client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("GitHub API returned %d", resp.StatusCode) + } + + var result struct { + Content string `json:"content"` + Encoding string `json:"encoding"` + } + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return "", err + } + + if result.Encoding == "base64" { + decoded, err := base64.StdEncoding.DecodeString(result.Content) + if err != nil { + return "", err + } + return string(decoded), nil + } + + return result.Content, nil +} + +// fetchDirectoryContents recursively fetches directory contents from GitHub +func (s *GitHubSource) fetchDirectoryContents(owner, repo, dirPath string) (map[string][]byte, error) { + var url string + if dirPath == "" || dirPath == "." { + url = fmt.Sprintf("https://api.github.com/repos/%s/%s/contents", owner, repo) + } else { + url = fmt.Sprintf("https://api.github.com/repos/%s/%s/contents/%s", owner, repo, dirPath) + } + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } + req.Header.Set("Accept", "application/vnd.github.v3+json") + req.Header.Set("User-Agent", "ragflow-cli") + + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("GitHub API returned %d", resp.StatusCode) + } + + var items []struct { + Name string `json:"name"` + Path string `json:"path"` + Type string `json:"type"` + DownloadURL string `json:"download_url"` + } + if err := json.NewDecoder(resp.Body).Decode(&items); err != nil { + return nil, err + } + + files := make(map[string][]byte) + for _, item := range items { + // Skip hidden files and common ignore patterns + if strings.HasPrefix(item.Name, ".") { + continue + } + if item.Name == "node_modules" || item.Name == "__pycache__" { + continue + } + + if item.Type == "file" { + // Calculate relative path + relPath := item.Path + if dirPath != "" && dirPath != "." { + relPath = strings.TrimPrefix(item.Path, dirPath+"/") + } + + content, err := s.downloadFile(item.DownloadURL) + if err != nil { + continue // Skip files we can't download + } + files[relPath] = content + } else if item.Type == "dir" { + // Recursively fetch subdirectory + subFiles, err := s.fetchDirectoryContents(owner, repo, item.Path) + if err != nil { + continue + } + for subPath, content := range subFiles { + relPath := subPath + if dirPath != "" && dirPath != "." { + relPath = strings.TrimPrefix(subPath, dirPath+"/") + } + files[relPath] = content + } + } + } + + return files, nil +} + +// downloadFile downloads a file from the given URL +func (s *GitHubSource) downloadFile(url string) ([]byte, error) { + resp, err := s.client.Get(url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP %d", resp.StatusCode) + } + + return io.ReadAll(resp.Body) +} diff --git a/internal/cli/filesystem/skill_hub/source/interface.go b/internal/cli/filesystem/skill_hub/source/interface.go new file mode 100644 index 0000000000..8cc8617ecf --- /dev/null +++ b/internal/cli/filesystem/skill_hub/source/interface.go @@ -0,0 +1,177 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package source + +import ( + "fmt" + "net/url" + "os" + "path/filepath" + "strings" +) + +// SkillSource is the interface for skill sources +type SkillSource interface { + // SourceID returns the source identifier (local, github, clawhub, skillssh) + SourceID() string + + // Fetch downloads and returns the skill bundle + Fetch(identifier string) (*SkillBundle, error) + + // Inspect retrieves metadata without downloading full content + Inspect(identifier string) (*SkillMetadata, error) + + // TrustLevel returns the trust level for this source (builtin/trusted/community) + TrustLevel(identifier string) string +} + +// SourceResolver resolves source references to appropriate adapters +type SourceResolver struct { + sources map[string]SkillSource +} + +// NewSourceResolver creates a new source resolver +func NewSourceResolver(client HTTPClientInterface) *SourceResolver { + return &SourceResolver{ + sources: map[string]SkillSource{ + "local": NewLocalSource(), + "github": NewGitHubSource(client), + "clawhub": NewClawHubSource(client), + "skillssh": NewSkillsShSource(client), + }, + } +} + +// Resolve parses a source reference and returns the appropriate source adapter +// Supported formats: +// - ./path, /absolute/path -> local +// - github.com/owner/repo/path -> github +// - clawhub://owner/skill-name, clawhub.ai/owner/skill-name -> clawhub +// - skill://skill-name, skills.sh/skill/name -> skillssh +func (r *SourceResolver) Resolve(ref string) (SkillSource, string, error) { + ref = strings.TrimSpace(ref) + if ref == "" { + return nil, "", fmt.Errorf("empty source reference") + } + + // Check for URI schemes + if strings.HasPrefix(ref, "clawhub://") { + identifier := strings.TrimPrefix(ref, "clawhub://") + return r.sources["clawhub"], identifier, nil + } + if strings.HasPrefix(ref, "skill://") { + identifier := strings.TrimPrefix(ref, "skill://") + return r.sources["skillssh"], identifier, nil + } + + // Check for local path (starts with ./ or / or ~) + if strings.HasPrefix(ref, "./") || strings.HasPrefix(ref, "/") || strings.HasPrefix(ref, "~/") { + // Expand ~ to home directory + if strings.HasPrefix(ref, "~/") { + home, err := getHomeDir() + if err != nil { + return nil, "", fmt.Errorf("cannot resolve home directory: %w", err) + } + ref = filepath.Join(home, ref[2:]) + } + return r.sources["local"], ref, nil + } + + // Check for github.com domain + if strings.HasPrefix(ref, "github.com/") || strings.HasPrefix(ref, "https://github.com/") { + identifier := strings.TrimPrefix(ref, "https://") + return r.sources["github"], identifier, nil + } + + // Check for clawhub.ai domain + if strings.HasPrefix(ref, "clawhub.ai/") || strings.HasPrefix(ref, "https://clawhub.ai/") { + identifier := strings.TrimPrefix(ref, "https://") + identifier = strings.TrimPrefix(identifier, "clawhub.ai/") + return r.sources["clawhub"], identifier, nil + } + + // Check for skills.sh domain + if strings.HasPrefix(ref, "skills.sh/") || strings.HasPrefix(ref, "https://skills.sh/") { + identifier := strings.TrimPrefix(ref, "https://") + identifier = strings.TrimPrefix(identifier, "skills.sh/") + return r.sources["skillssh"], identifier, nil + } + + // Default: treat as local path if it exists, otherwise error + return r.sources["local"], ref, nil +} + +// getHomeDir returns the user's home directory +func getHomeDir() (string, error) { + home := os.Getenv("HOME") + if home == "" { + home = os.Getenv("USERPROFILE") + } + if home == "" { + return "", fmt.Errorf("cannot determine home directory") + } + return home, nil +} + +// parseGitHubURL parses a GitHub URL and returns owner, repo, and path +func parseGitHubURL(urlStr string) (owner, repo, path string, err error) { + // Remove protocol prefix if present + urlStr = strings.TrimPrefix(urlStr, "https://") + urlStr = strings.TrimPrefix(urlStr, "http://") + + // Remove github.com/ prefix + urlStr = strings.TrimPrefix(urlStr, "github.com/") + + parts := strings.Split(urlStr, "/") + if len(parts) < 2 { + return "", "", "", fmt.Errorf("invalid GitHub URL format") + } + + owner = parts[0] + repo = parts[1] + if len(parts) > 2 { + path = strings.Join(parts[2:], "/") + } + + return owner, repo, path, nil +} + +// extractSkillNameFromPath extracts the skill name from a path +func extractSkillNameFromPath(path string) string { + base := filepath.Base(path) + // Remove common suffixes + base = strings.TrimSuffix(base, ".git") + return base +} + +// isTrustedGitHubRepo checks if a GitHub repo is trusted +func isTrustedGitHubRepo(owner, repo string) bool { + fullName := owner + "/" + repo + trusted := map[string]bool{ + "openai/skills": true, + "anthropics/skills": true, + "microsoft/skills": true, + "google/skills": true, + } + return trusted[fullName] +} + +// Helper to check if URL is valid +func isValidURL(str string) bool { + u, err := url.Parse(str) + return err == nil && (u.Scheme == "http" || u.Scheme == "https") && u.Host != "" +} diff --git a/internal/cli/filesystem/skill_hub/source/local.go b/internal/cli/filesystem/skill_hub/source/local.go new file mode 100644 index 0000000000..e8d8aae489 --- /dev/null +++ b/internal/cli/filesystem/skill_hub/source/local.go @@ -0,0 +1,206 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package source + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "gopkg.in/yaml.v3" +) + +// LocalSource handles local filesystem skills +type LocalSource struct{} + +// NewLocalSource creates a new local source adapter +func NewLocalSource() *LocalSource { + return &LocalSource{} +} + +// SourceID returns the source identifier +func (s *LocalSource) SourceID() string { + return "local" +} + +// TrustLevel returns the trust level for local sources +func (s *LocalSource) TrustLevel(identifier string) string { + return "community" // Local skills default to community trust level +} + +// Fetch retrieves a skill from the local filesystem +func (s *LocalSource) Fetch(identifier string) (*SkillBundle, error) { + // Validate path exists + info, err := os.Stat(identifier) + if err != nil { + return nil, fmt.Errorf("cannot access path %s: %w", identifier, err) + } + if !info.IsDir() { + return nil, fmt.Errorf("%s is not a directory", identifier) + } + + // Read SKILL.md + skillMdPath := filepath.Join(identifier, "SKILL.md") + content, err := os.ReadFile(skillMdPath) + if err != nil { + return nil, fmt.Errorf("SKILL.md not found in %s: %w", identifier, err) + } + + // Parse frontmatter + meta, err := parseSkillFrontmatter(string(content)) + if err != nil { + return nil, fmt.Errorf("invalid SKILL.md frontmatter in %s: %w", identifier, err) + } + skillName := meta.Name + if skillName == "" { + skillName = filepath.Base(identifier) + } + + // Collect all files + files := make(map[string][]byte) + ignorePatterns := []string{ + ".git/", ".svn/", ".hg/", "node_modules/", "__MACOSX/", + ".DS_Store", "._*", "*.log", "*.tmp", "*.temp", "*.swp", "*.swo", "*~", + ".env", ".env.*", ".vscode/", ".idea/", "Thumbs.db", "desktop.ini", + } + + err = filepath.Walk(identifier, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } + + // Skip non-regular files (symlinks, devices, pipes, etc.) + if !info.Mode().IsRegular() { + return nil + } + + relPath, err := filepath.Rel(identifier, path) + if err != nil { + return err + } + + // Check ignore patterns + for _, pattern := range ignorePatterns { + if matched, _ := filepath.Match(pattern, relPath); matched { + return nil + } + if strings.Contains(relPath, pattern) { + return nil + } + } + + // Only include text files based on extension + if !isTextFile(path) { + return nil + } + + data, err := os.ReadFile(path) + if err != nil { + return err + } + files[relPath] = data + return nil + }) + if err != nil { + return nil, err + } + + return &SkillBundle{ + Name: skillName, + Files: files, + Source: "local", + Identifier: identifier, + TrustLevel: s.TrustLevel(identifier), + Metadata: meta, + }, nil +} + +// Inspect retrieves metadata without reading all files +func (s *LocalSource) Inspect(identifier string) (*SkillMetadata, error) { + info, err := os.Stat(identifier) + if err != nil { + return nil, err + } + if !info.IsDir() { + return nil, fmt.Errorf("not a directory") + } + + skillMdPath := filepath.Join(identifier, "SKILL.md") + content, err := os.ReadFile(skillMdPath) + if err != nil { + return nil, err + } + + meta, err := parseSkillFrontmatter(string(content)) + if err != nil { + return nil, fmt.Errorf("invalid SKILL.md frontmatter in %s: %w", identifier, err) + } + if meta.Name == "" { + meta.Name = filepath.Base(identifier) + } + + return meta, nil +} + +// parseSkillFrontmatter extracts YAML frontmatter from SKILL.md content +// Returns an error if frontmatter delimiters are missing or YAML is invalid +func parseSkillFrontmatter(content string) (*SkillMetadata, error) { + meta := &SkillMetadata{} + + // Look for YAML frontmatter + content = strings.TrimSpace(content) + if !strings.HasPrefix(content, "---") { + return nil, fmt.Errorf("missing opening frontmatter delimiter '---'") + } + + // Find end of frontmatter + endIdx := strings.Index(content[3:], "---") + if endIdx == -1 { + return nil, fmt.Errorf("missing closing frontmatter delimiter '---'") + } + + frontmatter := content[3 : endIdx+3] + if err := yaml.Unmarshal([]byte(frontmatter), meta); err != nil { + return nil, fmt.Errorf("invalid YAML frontmatter: %w", err) + } + + return meta, nil +} + +// isTextFile checks if a file is a text file based on extension +func isTextFile(filename string) bool { + ext := strings.ToLower(filepath.Ext(filename)) + if ext != "" && ext[0] == '.' { + ext = ext[1:] + } + + textExts := map[string]bool{ + "md": true, "mdx": true, "txt": true, "json": true, "json5": true, + "yaml": true, "yml": true, "toml": true, "js": true, "cjs": true, "mjs": true, + "ts": true, "tsx": true, "jsx": true, "py": true, "sh": true, "rb": true, + "go": true, "rs": true, "swift": true, "kt": true, "java": true, "cs": true, + "cpp": true, "c": true, "h": true, "hpp": true, "sql": true, "csv": true, + "ini": true, "cfg": true, "env": true, "xml": true, "html": true, + "css": true, "scss": true, "sass": true, "svg": true, + } + + return textExts[ext] +} diff --git a/internal/cli/filesystem/skill_hub/source/skillssh.go b/internal/cli/filesystem/skill_hub/source/skillssh.go new file mode 100644 index 0000000000..14feead8c0 --- /dev/null +++ b/internal/cli/filesystem/skill_hub/source/skillssh.go @@ -0,0 +1,574 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package source + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "regexp" + "strings" +) + +const ( + skillsShBaseURL = "https://skills.sh" +) + +var ( + // Regex patterns for parsing skills.sh detail page + skillsShInstallCmdRe = regexp.MustCompile(`(?i)npx\s+skills\s+add\s+(?Phttps?://github\.com/[^\s<]+|[^\s<]+)(?:\s+--skill\s+(?P[^\s<]+))?`) + skillsShPageH1Re = regexp.MustCompile(`(?i)]*>(?P.*?)</h1>`) + skillsShProseH1Re = regexp.MustCompile(`(?i)<div[^>]*class=["'][^"']*prose[^"']*["'][^>]*>.*?<h1[^>]*>(?P<title>.*?)</h1>`) + skillsShProsePRe = regexp.MustCompile(`(?i)<div[^>]*class=["'][^"']*prose[^"']*["'][^>]*>.*?<p[^>]*>(?P<body>.*?)</p>`) + skillsShWeeklyRe = regexp.MustCompile(`Weekly Installs.*?children\\":\\"(?P<count>[0-9.,Kk]+)\\"`) +) + +// SkillsShDetail holds parsed information from skills.sh detail page +type SkillsShDetail struct { + Repo string `json:"repo"` + InstallSkill string `json:"install_skill"` + PageTitle string `json:"page_title"` + BodyTitle string `json:"body_title"` + BodySummary string `json:"body_summary"` + WeeklyInstalls string `json:"weekly_installs"` + InstallCommand string `json:"install_command"` + RepoURL string `json:"repo_url"` + DetailURL string `json:"detail_url"` +} + +// SkillsShSource handles skills.sh registry skills +type SkillsShSource struct { + client HTTPClientInterface + github *GitHubSource +} + +// NewSkillsShSource creates a new skills.sh source adapter +func NewSkillsShSource(client HTTPClientInterface) *SkillsShSource { + return &SkillsShSource{ + client: client, + github: NewGitHubSource(client), + } +} + +// SourceID returns the source identifier +func (s *SkillsShSource) SourceID() string { + return "skills-sh" +} + +// TrustLevel returns the trust level for skills.sh +func (s *SkillsShSource) TrustLevel(identifier string) string { + canonical := s.normalizeIdentifier(identifier) + // Delegate to github trust level based on the repo + for _, candidate := range s.candidateIdentifiers(canonical) { + if level := s.github.TrustLevel(candidate); level != "community" { + return level + } + } + return "community" +} + +// Fetch retrieves a skill from skills.sh +func (s *SkillsShSource) Fetch(identifier string) (*SkillBundle, error) { + canonical := s.normalizeIdentifier(identifier) + + // Fetch detail page from skills.sh + detail, err := s.fetchDetailPage(canonical) + if err != nil { + // Continue without detail info + detail = nil + } + + // Try candidate identifiers + for _, candidate := range s.candidateIdentifiers(canonical) { + bundle, err := s.github.Fetch(candidate) + if err == nil && bundle != nil { + // Validate SKILL.md exists + if _, ok := bundle.Files["SKILL.md"]; !ok { + continue + } + // Update bundle with skills.sh info + bundle.Source = "skills-sh" + bundle.Identifier = s.wrapIdentifier(canonical) + bundle.TrustLevel = s.TrustLevel(identifier) + if detail != nil { + bundle.Metadata = s.mergeDetailMetadata(bundle.Metadata, detail, canonical) + } + return bundle, nil + } + } + + // Try to discover identifier + resolved, err := s.discoverIdentifier(canonical, detail) + if err == nil && resolved != "" { + bundle, err := s.github.Fetch(resolved) + if err == nil && bundle != nil { + // Validate SKILL.md exists + if _, ok := bundle.Files["SKILL.md"]; !ok { + return nil, fmt.Errorf("skill missing required SKILL.md file") + } + bundle.Source = "skills-sh" + bundle.Identifier = s.wrapIdentifier(canonical) + bundle.TrustLevel = s.TrustLevel(identifier) + if detail != nil { + bundle.Metadata = s.mergeDetailMetadata(bundle.Metadata, detail, canonical) + } + return bundle, nil + } + } + + return nil, fmt.Errorf("skill not found: %s", identifier) +} + +// Inspect retrieves metadata from skills.sh +func (s *SkillsShSource) Inspect(identifier string) (*SkillMetadata, error) { + canonical := s.normalizeIdentifier(identifier) + + // Fetch detail page + detail, err := s.fetchDetailPage(canonical) + if err != nil { + detail = nil + } + + // Try to get metadata from github + meta, err := s.resolveGitHubMeta(canonical, detail) + if err != nil { + return nil, err + } + + // Update with skills.sh info + meta = s.finalizeInspectMeta(meta, canonical, detail) + return meta, nil +} + +// normalizeIdentifier removes skills.sh prefixes +func (s *SkillsShSource) normalizeIdentifier(identifier string) string { + prefixes := []string{ + "skills-sh/", + "skills.sh/", + "skils-sh/", + "skils.sh/", + } + for _, prefix := range prefixes { + if strings.HasPrefix(identifier, prefix) { + return identifier[len(prefix):] + } + } + return identifier +} + +// wrapIdentifier adds skills-sh prefix +func (s *SkillsShSource) wrapIdentifier(identifier string) string { + return "skills-sh/" + identifier +} + +// candidateIdentifiers generates possible GitHub paths for a skill +func (s *SkillsShSource) candidateIdentifiers(identifier string) []string { + parts := strings.SplitN(identifier, "/", 3) + if len(parts) < 3 { + return []string{identifier} + } + + repo := parts[0] + "/" + parts[1] + skillPath := strings.TrimPrefix(parts[2], "/") + + candidates := []string{ + fmt.Sprintf("github.com/%s/%s", repo, skillPath), + fmt.Sprintf("github.com/%s/skills/%s", repo, skillPath), + fmt.Sprintf("github.com/%s/.agents/skills/%s", repo, skillPath), + fmt.Sprintf("github.com/%s/.claude/skills/%s", repo, skillPath), + } + + // Deduplicate + seen := make(map[string]bool) + result := []string{} + for _, c := range candidates { + if !seen[c] { + seen[c] = true + result = append(result, c) + } + } + return result +} + +// fetchDetailPage fetches and parses skills.sh detail page +func (s *SkillsShSource) fetchDetailPage(identifier string) (*SkillsShDetail, error) { + url := fmt.Sprintf("%s/%s", skillsShBaseURL, identifier) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } + + resp, err := s.client.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to fetch detail page: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("skills.sh returned %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + return s.parseDetailPage(identifier, string(body)), nil +} + +// parseDetailPage extracts information from skills.sh HTML +func (s *SkillsShSource) parseDetailPage(identifier, html string) *SkillsShDetail { + parts := strings.SplitN(identifier, "/", 3) + if len(parts) < 3 { + return nil + } + + defaultRepo := parts[0] + "/" + parts[1] + skillToken := parts[2] + repo := defaultRepo + installSkill := skillToken + + // Extract install command + installCmd := "" + if match := skillsShInstallCmdRe.FindStringSubmatch(html); match != nil { + installCmd = strings.TrimSpace(match[0]) + repoValue := strings.TrimSpace(s.extractGroup(skillsShInstallCmdRe, match, "repo")) + skillValue := strings.TrimSpace(s.extractGroup(skillsShInstallCmdRe, match, "skill")) + if skillValue != "" { + installSkill = skillValue + } + if extracted := s.extractRepoSlug(repoValue); extracted != "" { + repo = extracted + } + } + + return &SkillsShDetail{ + Repo: repo, + InstallSkill: installSkill, + PageTitle: s.extractFirstMatch(skillsShPageH1Re, html), + BodyTitle: s.extractFirstMatch(skillsShProseH1Re, html), + BodySummary: s.extractFirstMatch(skillsShProsePRe, html), + WeeklyInstalls: s.extractWeeklyInstalls(html), + InstallCommand: installCmd, + RepoURL: fmt.Sprintf("https://github.com/%s", repo), + DetailURL: fmt.Sprintf("%s/%s", skillsShBaseURL, identifier), + } +} + +// discoverIdentifier tries to find the skill in non-standard locations +func (s *SkillsShSource) discoverIdentifier(identifier string, detail *SkillsShDetail) (string, error) { + parts := strings.SplitN(identifier, "/", 3) + if len(parts) < 3 { + return "", fmt.Errorf("invalid identifier format") + } + + defaultRepo := parts[0] + "/" + parts[1] + repo := defaultRepo + if detail != nil && detail.Repo != "" { + repo = detail.Repo + } + + skillToken := parts[2] + tokens := []string{skillToken} + if detail != nil { + tokens = append(tokens, detail.InstallSkill, detail.PageTitle, detail.BodyTitle) + } + + // Try standard skill paths + basePaths := []string{"skills/", ".agents/skills/", ".claude/skills/"} + for _, basePath := range basePaths { + candidate := fmt.Sprintf("github.com/%s/%s%s", repo, basePath, skillToken) + meta, err := s.github.Inspect(candidate) + if err == nil && meta != nil { + return candidate, nil + } + } + + // Try tree lookup for nested skills + treeResult, err := s.findSkillInRepoTree(repo, skillToken) + if err == nil && treeResult != "" { + return treeResult, nil + } + + // Scan repo root directories + rootURL := fmt.Sprintf("https://api.github.com/repos/%s/contents/", repo) + req, err := http.NewRequest("GET", rootURL, nil) + if err != nil { + return "", err + } + req.Header.Set("Accept", "application/vnd.github.v3+json") + req.Header.Set("User-Agent", "ragflow-cli") + + resp, err := s.client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("github API returned %d", resp.StatusCode) + } + + var entries []struct { + Name string `json:"name"` + Type string `json:"type"` + } + if err := json.NewDecoder(resp.Body).Decode(&entries); err != nil { + return "", err + } + + for _, entry := range entries { + if entry.Type != "dir" { + continue + } + if strings.HasPrefix(entry.Name, ".") || strings.HasPrefix(entry.Name, "_") { + continue + } + if entry.Name == "skills" || entry.Name == ".agents" || entry.Name == ".claude" { + continue // Already tried + } + + // Try direct match + directID := fmt.Sprintf("github.com/%s/%s/%s", repo, entry.Name, skillToken) + meta, err := s.github.Inspect(directID) + if err == nil && meta != nil { + return directID, nil + } + } + + return "", fmt.Errorf("skill not found in repo") +} + +// findSkillInRepoTree searches for skill in repo tree +func (s *SkillsShSource) findSkillInRepoTree(repo, skillToken string) (string, error) { + // Get repo tree + url := fmt.Sprintf("https://api.github.com/repos/%s/git/trees/HEAD?recursive=1", repo) + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return "", err + } + req.Header.Set("Accept", "application/vnd.github.v3+json") + req.Header.Set("User-Agent", "ragflow-cli") + + resp, err := s.client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("github API returned %d", resp.StatusCode) + } + + var result struct { + Tree []struct { + Path string `json:"path"` + Type string `json:"type"` + } `json:"tree"` + } + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return "", err + } + + // Look for skill directories matching the token + for _, item := range result.Tree { + if item.Type != "tree" { + continue + } + parts := strings.Split(item.Path, "/") + if len(parts) == 0 { + continue + } + dirName := parts[len(parts)-1] + if s.matchesSkillToken(dirName, skillToken) { + return fmt.Sprintf("github.com/%s/%s", repo, item.Path), nil + } + } + + return "", fmt.Errorf("skill not found in tree") +} + +// matchesSkillToken checks if a directory name matches skill token +func (s *SkillsShSource) matchesSkillToken(dirName, skillToken string) bool { + variants := s.tokenVariants(dirName) + tokenVariants := s.tokenVariants(skillToken) + for v := range tokenVariants { + if variants[v] { + return true + } + } + return false +} + +// tokenVariants generates normalized token variants +func (s *SkillsShSource) tokenVariants(value string) map[string]bool { + variants := make(map[string]bool) + if value == "" { + return variants + } + + value = strings.ToLower(strings.TrimSpace(value)) + if value == "" { + return variants + } + + // Base name (last path component) + parts := strings.Split(value, "/") + base := parts[len(parts)-1] + + // Clean variant + clean := strings.TrimPrefix(base, "@") + + variants[value] = true + variants[strings.ReplaceAll(value, "_", "-")] = true + variants[strings.ReplaceAll(value, "/", "-")] = true + variants[base] = true + variants[strings.ReplaceAll(base, "_", "-")] = true + variants[clean] = true + variants[strings.ReplaceAll(clean, "_", "-")] = true + + return variants +} + +// resolveGitHubMeta tries to get metadata from GitHub +func (s *SkillsShSource) resolveGitHubMeta(identifier string, detail *SkillsShDetail) (*SkillMetadata, error) { + for _, candidate := range s.candidateIdentifiers(identifier) { + meta, err := s.github.Inspect(candidate) + if err == nil && meta != nil { + return meta, nil + } + } + + resolved, err := s.discoverIdentifier(identifier, detail) + if err == nil && resolved != "" { + return s.github.Inspect(resolved) + } + + return nil, fmt.Errorf("skill metadata not found") +} + +// finalizeInspectMeta updates metadata with skills.sh info +func (s *SkillsShSource) finalizeInspectMeta(meta *SkillMetadata, canonical string, detail *SkillsShDetail) *SkillMetadata { + if meta == nil { + meta = &SkillMetadata{} + } + + meta = &SkillMetadata{ + Name: meta.Name, + Description: meta.Description, + Version: meta.Version, + Author: meta.Author, + Tags: meta.Tags, + Tools: meta.Tools, + } + + // Use body summary as description if available + if detail != nil && detail.BodySummary != "" { + meta.Description = s.stripHTML(detail.BodySummary) + } else if detail != nil && detail.WeeklyInstalls != "" && meta.Description != "" { + meta.Description = fmt.Sprintf("%s · %s weekly installs on skills.sh", meta.Description, detail.WeeklyInstalls) + } + + return meta +} + +// mergeDetailMetadata merges skills.sh detail into bundle metadata +func (s *SkillsShSource) mergeDetailMetadata(meta *SkillMetadata, detail *SkillsShDetail, canonical string) *SkillMetadata { + if meta == nil { + meta = &SkillMetadata{} + } + + // Create new metadata to avoid modifying the original + merged := &SkillMetadata{ + Name: meta.Name, + Description: meta.Description, + Version: meta.Version, + Author: meta.Author, + Tags: meta.Tags, + Tools: meta.Tools, + } + + if detail.BodySummary != "" { + merged.Description = s.stripHTML(detail.BodySummary) + } + + return merged +} + +// extractFirstMatch extracts first matching group from regex +func (s *SkillsShSource) extractFirstMatch(re *regexp.Regexp, text string) string { + match := re.FindStringSubmatch(text) + if match == nil { + return "" + } + for i, name := range re.SubexpNames() { + if i > 0 && i < len(match) && name != "" { + return s.stripHTML(strings.TrimSpace(match[i])) + } + } + return "" +} + +// extractGroup extracts a named group from regex match +// The regex must be passed to map group names to capture indices +func (s *SkillsShSource) extractGroup(re *regexp.Regexp, match []string, name string) string { + if re == nil || match == nil || name == "" { + return "" + } + + for i, groupName := range re.SubexpNames() { + if i >= 0 && i < len(match) && groupName == name { + return match[i] + } + } + return "" +} + +// extractWeeklyInstalls extracts weekly install count +func (s *SkillsShSource) extractWeeklyInstalls(html string) string { + match := skillsShWeeklyRe.FindStringSubmatch(html) + if match == nil { + return "" + } + for i, name := range skillsShWeeklyRe.SubexpNames() { + if i > 0 && i < len(match) && name == "count" { + return match[i] + } + } + return "" +} + +// extractRepoSlug extracts owner/repo from URL or string +func (s *SkillsShSource) extractRepoSlug(value string) string { + value = strings.TrimSpace(value) + value = strings.TrimPrefix(value, "https://github.com/") + value = strings.Trim(value, "/") + parts := strings.Split(value, "/") + if len(parts) >= 2 { + return parts[0] + "/" + parts[1] + } + return "" +} + +// stripHTML removes HTML tags +func (s *SkillsShSource) stripHTML(value string) string { + // Simple HTML tag removal + re := regexp.MustCompile(`<[^>]+>`) + return strings.TrimSpace(re.ReplaceAllString(value, "")) +} diff --git a/internal/cli/filesystem/skill_hub/source/types.go b/internal/cli/filesystem/skill_hub/source/types.go new file mode 100644 index 0000000000..5590a26509 --- /dev/null +++ b/internal/cli/filesystem/skill_hub/source/types.go @@ -0,0 +1,47 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package source + +import "net/http" + +// HTTPClientInterface defines the interface for HTTP operations +// This is duplicated here to avoid circular imports +type HTTPClientInterface interface { + Do(req *http.Request) (*http.Response, error) + Get(url string) (*http.Response, error) +} + +// SkillMetadata represents the metadata from SKILL.md frontmatter +// This is duplicated here to avoid circular imports +type SkillMetadata struct { + Name string `yaml:"name"` + Description string `yaml:"description"` + Version string `yaml:"version"` + Author string `yaml:"author"` + Tags []string `yaml:"tags"` + Tools interface{} `yaml:"tools"` +} + +// SkillBundle represents a downloaded skill package +type SkillBundle struct { + Name string + Files map[string][]byte + Source string + Identifier string + TrustLevel string + Metadata *SkillMetadata +} diff --git a/internal/cli/filesystem/skill_install.go b/internal/cli/filesystem/skill_install.go new file mode 100644 index 0000000000..b80cf3ff13 --- /dev/null +++ b/internal/cli/filesystem/skill_install.go @@ -0,0 +1,437 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package filesystem + +import ( + stdctx "context" + "fmt" + "net/http" + "net/http/cookiejar" + "net/url" + "os" + "path/filepath" + "strings" + "time" + + "golang.org/x/net/http2" + "golang.org/x/net/publicsuffix" + + "ragflow/internal/cli/filesystem/skill_hub/security" + "ragflow/internal/cli/filesystem/skill_hub/source" +) + +// InstallSkillArgs holds the parsed arguments for install-skill command +type InstallSkillArgs struct { + SpaceID string // Target skills space ID + SourceRef string // Source reference (path or identifier) + Version string // Skill version + SkillName string // Optional: override skill name + Force bool // Force reinstall + SkipVerify bool // Skip security verification + ShowHelp bool +} + +// SkillInstallCommand handles the install-skill command +type SkillInstallCommand struct { + client HTTPClientInterface + fileProvider *FileProvider + skillProvider Provider + scanner *security.Scanner + guard *security.Guard + sourceResolver *source.SourceResolver +} + +// sourceHTTPClientAdapter adapts filesystem.HTTPClientInterface to source.HTTPClientInterface +// This allows us to use the existing HTTP client infrastructure with the source package +type sourceHTTPClientAdapter struct { + client HTTPClientInterface + httpClient *http.Client +} + +func (a *sourceHTTPClientAdapter) Do(req *http.Request) (*http.Response, error) { + // Use standard http.Client for direct requests (e.g., GitHub API) + // This bypasses the RAGFlow API client which adds its own base URL + return a.httpClient.Do(req) +} + +func (a *sourceHTTPClientAdapter) Get(url string) (*http.Response, error) { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } + return a.Do(req) +} + +// NewInstallSkillCommand creates a new install-skill command handler +func NewInstallSkillCommand(client HTTPClientInterface, fileProvider *FileProvider, skillProvider Provider) *SkillInstallCommand { + // Log proxy settings + if httpProxy := os.Getenv("http_proxy"); httpProxy != "" { + fmt.Printf("Using HTTP proxy: %s\n", httpProxy) + } + if httpsProxy := os.Getenv("https_proxy"); httpsProxy != "" { + fmt.Printf("Using HTTPS proxy: %s\n", httpsProxy) + } + + // Create transport with HTTP/2 support and connection reuse + transport := &http.Transport{ + Proxy: http.ProxyFromEnvironment, + // Enable connection pooling + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + IdleConnTimeout: 90 * time.Second, + // Enable keep-alive + DisableKeepAlives: false, + ForceAttemptHTTP2: true, + } + // Enable HTTP/2 + http2.ConfigureTransport(transport) + + // Check what proxy will be used + testURL, _ := url.Parse("https://github.com") + if proxy, err := transport.Proxy(&http.Request{URL: testURL}); err == nil && proxy != nil { + fmt.Printf("Proxy enabled for GitHub: %s\n", proxy.String()) + } else if err != nil { + fmt.Printf("Warning: proxy detection error: %v\n", err) + } + + // Create cookie jar for session persistence + jar, err := cookiejar.New(&cookiejar.Options{ + PublicSuffixList: publicsuffix.List, + }) + if err != nil { + fmt.Printf("Warning: failed to create cookie jar: %v\n", err) + jar = nil + } + + // Wrap client with adapter - use standard http.Client with timeout for direct external requests + adaptedClient := &sourceHTTPClientAdapter{ + client: client, + httpClient: &http.Client{ + Timeout: 60 * time.Second, + Transport: transport, + Jar: jar, + }, + } + + return &SkillInstallCommand{ + client: client, + fileProvider: fileProvider, + skillProvider: skillProvider, + scanner: security.NewScanner(), + guard: security.NewGuard(), + sourceResolver: source.NewSourceResolver(adaptedClient), + } +} + +// Execute runs the install-skill command +func (c *SkillInstallCommand) Execute(args []string) error { + parsedArgs, err := c.parseArgs(args) + if err != nil { + return err + } + + if parsedArgs.ShowHelp { + c.PrintHelp() + return nil + } + + ctx := stdctx.Background() + + // 1. Resolve source + fmt.Printf("Resolving source reference: %s\n", parsedArgs.SourceRef) + src, identifier, err := c.sourceResolver.Resolve(parsedArgs.SourceRef) + if err != nil { + return fmt.Errorf("invalid source reference: %w", err) + } + + // 2. Fetch skill bundle + // If version specified, append to identifier for sources that support it + fetchIdentifier := identifier + if parsedArgs.Version != "" { + fetchIdentifier = fmt.Sprintf("%s@%s", identifier, parsedArgs.Version) + fmt.Printf("Fetching skill from %s (version %s)...\n", src.SourceID(), parsedArgs.Version) + } else { + fmt.Printf("Fetching skill from %s...\n", src.SourceID()) + } + bundle, err := src.Fetch(fetchIdentifier) + if err != nil { + return fmt.Errorf("failed to fetch skill: %w", err) + } + fmt.Printf("Found skill '%s' (v%s) with %d files\n", + bundle.Name, bundle.Metadata.Version, len(bundle.Files)) + + // Override skill name if specified + if parsedArgs.SkillName != "" { + bundle.Name = parsedArgs.SkillName + } + + // 3. Check if skill already exists + exists, err := c.skillExists(ctx, parsedArgs.SpaceID, bundle.Name) + if err != nil { + return fmt.Errorf("failed to check existing skill: %w", err) + } + + if exists && !parsedArgs.Force { + return fmt.Errorf("skill '%s' already exists in space '%s'. Use --force to reinstall", bundle.Name, parsedArgs.SpaceID) + } + + // 4. Security scan (unless skipped) + if !parsedArgs.SkipVerify { + fmt.Println("Running security scan...") + trustLevel := src.TrustLevel(identifier) + scanResult := c.scanner.ScanSkill(bundle.Name, src.SourceID(), trustLevel, bundle.Files) + + allowed, reason := c.guard.ShouldAllowInstall(scanResult, parsedArgs.Force) + if !allowed { + fmt.Println(c.guard.FormatScanReport(scanResult)) + return fmt.Errorf("installation blocked: %s", reason) + } + + fmt.Println(c.guard.FormatScanReport(scanResult)) + fmt.Printf("✓ Security check passed: %s\n\n", reason) + } + + // 5. Force mode: delete existing skill first + if parsedArgs.Force && exists { + fmt.Printf("Force mode: removing existing skill '%s'...\n", bundle.Name) + if err := c.uninstallSkill(ctx, parsedArgs.SpaceID, bundle.Name); err != nil { + return fmt.Errorf("failed to remove existing skill: %w", err) + } + fmt.Println() + } + + // 6. Install skill + fmt.Printf("Installing skill '%s' to space '%s'...\n", bundle.Name, parsedArgs.SpaceID) + if err := c.installSkill(ctx, parsedArgs.SpaceID, bundle, parsedArgs.Force); err != nil { + return fmt.Errorf("failed to install skill: %w", err) + } + + // 7. Update index + fmt.Printf("Updating search index for skill '%s'...\n", bundle.Name) + if err := c.updateIndex(ctx, parsedArgs.SpaceID, bundle.Name); err != nil { + fmt.Printf("⚠ Warning: failed to update index: %v\n", err) + } + + fmt.Printf("✓ Successfully installed skill '%s' (version: %s)\n", bundle.Name, bundle.Metadata.Version) + return nil +} + +// uninstallSkill removes an existing skill (for --force mode) +func (c *SkillInstallCommand) uninstallSkill(ctx stdctx.Context, spaceID, skillName string) error { + var indexErr, folderErr error + + // Delete index + if skillProv, ok := c.skillProvider.(*SkillProvider); ok { + if err := skillProv.DeleteSkill(ctx, spaceID, skillName); err != nil { + indexErr = fmt.Errorf("failed to delete search index: %w", err) + fmt.Printf("⚠ Warning: %v\n", indexErr) + } else { + fmt.Printf("✓ Search index deleted\n") + } + } + + // Delete folder + if c.fileProvider != nil { + folderPath := fmt.Sprintf("skills/%s/%s", spaceID, skillName) + if err := c.fileProvider.DeleteFolderByPath(ctx, folderPath); err != nil { + folderErr = fmt.Errorf("failed to delete skill folder: %w", err) + fmt.Printf("⚠ Warning: %v\n", folderErr) + } else { + fmt.Printf("✓ Skill folder deleted\n") + } + } + + // Return error if both failed + if indexErr != nil && folderErr != nil { + return fmt.Errorf("failed to uninstall: index (%v), folder (%v)", indexErr, folderErr) + } + + return nil +} + +// installSkill installs a skill bundle using existing SkillUploader +func (c *SkillInstallCommand) installSkill(ctx stdctx.Context, spaceID string, bundle *source.SkillBundle, force bool) error { + // Create a temporary directory to hold the skill files + tempDir, err := os.MkdirTemp("", "skill-install-*") + if err != nil { + return fmt.Errorf("failed to create temp directory: %w", err) + } + defer os.RemoveAll(tempDir) + + // Write files to temp directory + skillDir := filepath.Join(tempDir, bundle.Name) + if err := os.MkdirAll(skillDir, 0755); err != nil { + return fmt.Errorf("failed to create skill directory: %w", err) + } + + for relPath, content := range bundle.Files { + filePath := filepath.Join(skillDir, relPath) + dir := filepath.Dir(filePath) + if err := os.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("failed to create directory %s: %w", dir, err) + } + if err := os.WriteFile(filePath, content, 0644); err != nil { + return fmt.Errorf("failed to write file %s: %w", relPath, err) + } + } + + // Use existing SkillUploader to upload the skill + uploader := NewSkillUploader(c.client, c.fileProvider) + uploader.SetSkillProvider(c.skillProvider) + uploader.SetForce(force) + + version := bundle.Metadata.Version + if version == "" { + version = "1.0.0" + } + + return uploader.UploadSkill(ctx, skillDir, version, fmt.Sprintf("skills/%s", spaceID), bundle.Name) +} + +// skillExists checks if a skill already exists +func (c *SkillInstallCommand) skillExists(ctx stdctx.Context, spaceID, skillName string) (bool, error) { + folderPath := fmt.Sprintf("skills/%s/%s", spaceID, skillName) + _, err := c.fileProvider.List(ctx, folderPath, nil) + if err != nil { + // If error, likely doesn't exist + return false, nil + } + return true, nil +} + +// updateIndex updates the search index for a skill +// Note: Indexing is now handled by SkillUploader during upload +func (c *SkillInstallCommand) updateIndex(ctx stdctx.Context, spaceID, skillName string) error { + // Indexing is automatically performed by SkillUploader.UploadSkill + // This method is kept for potential future use + return nil +} + +// parseArgs parses command arguments +func (c *SkillInstallCommand) parseArgs(args []string) (*InstallSkillArgs, error) { + result := &InstallSkillArgs{} + + var nonFlagArgs []string + for i := 0; i < len(args); i++ { + arg := args[i] + + switch arg { + case "-h", "--help": + result.ShowHelp = true + return result, nil + case "-v", "--version": + if i+1 < len(args) && !strings.HasPrefix(args[i+1], "-") { + result.Version = args[i+1] + i++ + } else { + return nil, fmt.Errorf("version flag requires a value") + } + case "-n", "--name": + if i+1 < len(args) && !strings.HasPrefix(args[i+1], "-") { + result.SkillName = args[i+1] + i++ + } else { + return nil, fmt.Errorf("name flag requires a value") + } + case "-f", "--force": + result.Force = true + case "--skip-verify": + result.SkipVerify = true + default: + if !strings.HasPrefix(arg, "-") { + nonFlagArgs = append(nonFlagArgs, arg) + } + } + } + + // Parse space and source ref + if len(nonFlagArgs) < 1 { + return nil, fmt.Errorf("space ID is required") + } + if len(nonFlagArgs) < 2 { + return nil, fmt.Errorf("source reference is required (local path or remote identifier)") + } + + result.SpaceID = nonFlagArgs[0] + result.SourceRef = nonFlagArgs[1] + + return result, nil +} + +// PrintHelp prints the help message +func (c *SkillInstallCommand) PrintHelp() { + fmt.Println(`Usage: install-skill <space> <source> [options] + +Install a skill from multiple sources into a RAGFlow space. + +Arguments: + <space> Target skills space ID (required) + <source> Skill source reference (required): + - Local: ./path/to/skill or /absolute/path + - GitHub: github.com/owner/repo/path/to/skill + - ClawHub: clawhub://owner/skill-name or clawhub.ai/owner/skill-name + - skills.sh: skill://skill-name or skills.sh/skill/name + +Options: + -v, --version string Specify skill version (default: from SKILL.md or 1.0.0) + -n, --name string Override skill name (default: from SKILL.md) + -f, --force Force reinstall if skill exists (deletes existing first) + --skip-verify Skip security verification (use with caution) + -h, --help Show this help message + +Security: + By default, all skills are scanned for potential security threats before + installation. The scan checks for: + - Data exfiltration patterns (curl $SECRET, .ssh access, etc.) + - Prompt injection attempts (DAN mode, ignore instructions, etc.) + - Destructive commands (rm -rf /, mkfs, etc.) + - Persistence mechanisms (cron, .bashrc, authorized_keys, etc.) + - Network threats (reverse shells, tunneling, etc.) + - Obfuscation (base64 | bash, eval(), etc.) + + Trust levels: + - builtin: Official RAGFlow skills (always allowed) + - trusted: openai/skills, anthropics/skills (caution allowed) + - community: All other sources (findings blocked unless --force) + +Examples: + # Install from local path + install-skill my-space ./my-local-skill + + # Install from GitHub + install-skill my-space github.com/openai/skills/skill-creator + + # Force reinstall (delete existing and reinstall) + install-skill my-space ./my-skill --force + + # Force install with custom name, skip security check + install-skill my-space claw://unknown-skill --force --name my-skill --skip-verify + + # Install specific version + install-skill my-space skill://kubernetes --version 2.1.0 + +Note: 'add-skill' command is deprecated. Use 'install-skill' instead.`) +} + +// getDir extracts directory from file path +func getDir(path string) string { + idx := strings.LastIndex(path, "/") + if idx == -1 { + return "" + } + return path[:idx] +} diff --git a/internal/cli/filesystem/skill_uninstall.go b/internal/cli/filesystem/skill_uninstall.go new file mode 100644 index 0000000000..ec2f7ea9cc --- /dev/null +++ b/internal/cli/filesystem/skill_uninstall.go @@ -0,0 +1,166 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package filesystem + +import ( + stdctx "context" + "fmt" + "strings" +) + +// UninstallSkillArgs holds the parsed arguments for uninstall-skill command +type UninstallSkillArgs struct { + SkillName string + SpaceID string + ShowHelp bool +} + +// SkillUninstallCommand handles the uninstall-skill command +type SkillUninstallCommand struct { + client HTTPClientInterface + skillProvider Provider + fileProvider *FileProvider +} + +// NewUninstallSkillCommand creates a new uninstall-skill command handler +func NewUninstallSkillCommand(client HTTPClientInterface, skillProvider Provider, fileProvider *FileProvider) *SkillUninstallCommand { + return &SkillUninstallCommand{ + client: client, + skillProvider: skillProvider, + fileProvider: fileProvider, + } +} + +// Execute runs the uninstall-skill command +func (c *SkillUninstallCommand) Execute(args []string) error { + parsedArgs, err := c.parseArgs(args) + if err != nil { + return err + } + + if parsedArgs.ShowHelp { + c.PrintHelp() + return nil + } + + return c.uninstallSkill(stdctx.Background(), parsedArgs.SpaceID, parsedArgs.SkillName) +} + +// uninstallSkill deletes a skill and its index +func (c *SkillUninstallCommand) uninstallSkill(ctx stdctx.Context, spaceID, skillName string) error { + if c.skillProvider == nil { + return fmt.Errorf("skill provider not available") + } + + fmt.Printf("Uninstalling skill '%s' from space '%s'...\n\n", skillName, spaceID) + + var indexErr, folderErr error + + // 1. Delete search index + skillProvider, ok := c.skillProvider.(*SkillProvider) + if ok { + fmt.Printf("Deleting search index for skill '%s'...\n", skillName) + if err := skillProvider.DeleteSkill(ctx, spaceID, skillName); err != nil { + indexErr = fmt.Errorf("failed to delete search index: %w", err) + fmt.Printf("⚠ %v\n", indexErr) + } else { + fmt.Printf("✓ Search index deleted\n") + } + } + + // 2. Delete file system folder + if c.fileProvider != nil { + fmt.Printf("Deleting skill folder '%s/%s'...\n", spaceID, skillName) + folderPath := fmt.Sprintf("skills/%s/%s", spaceID, skillName) + if err := c.fileProvider.DeleteFolderByPath(ctx, folderPath); err != nil { + folderErr = fmt.Errorf("failed to delete skill folder: %w", err) + fmt.Printf("⚠ %v\n", folderErr) + } else { + fmt.Printf("✓ Skill folder deleted\n") + } + } + + // 3. Report results + fmt.Println() + + if indexErr != nil && folderErr != nil { + return fmt.Errorf("failed to completely uninstall skill '%s': index deletion failed (%v), folder deletion failed (%v)", + skillName, indexErr, folderErr) + } + if indexErr != nil { + return fmt.Errorf("failed to uninstall skill '%s': %w", skillName, indexErr) + } + if folderErr != nil { + return fmt.Errorf("failed to uninstall skill '%s': %w", skillName, folderErr) + } + + fmt.Printf("✓ Successfully uninstalled skill '%s'\n", skillName) + return nil +} + +// parseArgs parses command arguments +func (c *SkillUninstallCommand) parseArgs(args []string) (*UninstallSkillArgs, error) { + result := &UninstallSkillArgs{} + + var nonFlagArgs []string + for i := 0; i < len(args); i++ { + arg := args[i] + + switch arg { + case "-h", "--help": + result.ShowHelp = true + return result, nil + default: + if !strings.HasPrefix(arg, "-") { + nonFlagArgs = append(nonFlagArgs, arg) + } + } + } + + // Parse space and skill name + if len(nonFlagArgs) < 1 { + return nil, fmt.Errorf("space ID is required") + } + if len(nonFlagArgs) < 2 { + return nil, fmt.Errorf("skill name is required") + } + + result.SpaceID = nonFlagArgs[0] + result.SkillName = nonFlagArgs[1] + + return result, nil +} + +// PrintHelp prints the help message +func (c *SkillUninstallCommand) PrintHelp() { + fmt.Println(`Usage: uninstall-skill <space> <skill-name> + +Remove a skill from RAGFlow and delete its search index. + +Arguments: + <space> Skills space ID (required) + <skill-name> Name of the skill to uninstall (required) + +Options: + -h, --help Show this help message + +Examples: + uninstall-skill my-space my-skill + uninstall-skill production document-analyzer + +Note: 'delete-skill' command is deprecated. Use 'uninstall-skill' instead.`) +} diff --git a/internal/cli/contextengine/types.go b/internal/cli/filesystem/types.go similarity index 96% rename from internal/cli/contextengine/types.go rename to internal/cli/filesystem/types.go index b017774228..8845ff8629 100644 --- a/internal/cli/contextengine/types.go +++ b/internal/cli/filesystem/types.go @@ -14,11 +14,11 @@ // limitations under the License. // -package contextengine +package filesystem import "time" -// NodeType represents the type of a node in the context filesystem +// NodeType represents the type of a node in the virtual filesystem type NodeType string const ( @@ -52,7 +52,7 @@ const ( CommandCat CommandType = "cat" ) -// Command represents a context engine command +// Command represents a filesystem command type Command struct { Type CommandType `json:"type"` Path string `json:"path"` diff --git a/internal/cli/contextengine/utils.go b/internal/cli/filesystem/utils.go similarity index 99% rename from internal/cli/contextengine/utils.go rename to internal/cli/filesystem/utils.go index ca9b7ca986..6b0199be71 100644 --- a/internal/cli/contextengine/utils.go +++ b/internal/cli/filesystem/utils.go @@ -14,7 +14,7 @@ // limitations under the License. // -package contextengine +package filesystem import ( "encoding/json" diff --git a/internal/cli/http_client.go b/internal/cli/http_client.go index 6dc1a8846b..fd84d80ce3 100644 --- a/internal/cli/http_client.go +++ b/internal/cli/http_client.go @@ -336,6 +336,50 @@ func (c *HTTPClient) RequestJSON(method, path string, useAPIBase bool, authKind return resp.JSON() } +// UploadMultipart uploads data using multipart/form-data +func (c *HTTPClient) UploadMultipart(path string, contentType string, body io.Reader) error { + url := c.BuildURL(path, true) + + req, err := http.NewRequest("POST", url, body) + if err != nil { + return err + } + + // Set headers + req.Header.Set("Content-Type", contentType) + if c.APIToken != "" { + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", c.APIToken)) + } else if c.LoginToken != "" { + req.Header.Set("Authorization", c.LoginToken) + } + + resp, err := c.client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + + if resp.StatusCode != 200 { + return fmt.Errorf("upload failed: HTTP %d - %s", resp.StatusCode, string(respBody)) + } + + // Check response code + var result struct { + Code int `json:"code"` + Message string `json:"message"` + } + if err := json.Unmarshal(respBody, &result); err == nil && result.Code != 0 { + return fmt.Errorf("upload failed: %s", result.Message) + } + + return nil +} + // RequestStream makes an HTTP request for SSE streaming and returns the response body reader func (c *HTTPClient) RequestStream(method, path string, useAPIBase bool, authKind string, headers map[string]string, jsonBody map[string]interface{}) (io.ReadCloser, error) { url := c.BuildURL(path, useAPIBase) diff --git a/internal/cli/lexer.go b/internal/cli/lexer.go index c8ffb1bffd..8e682df547 100644 --- a/internal/cli/lexer.go +++ b/internal/cli/lexer.go @@ -108,7 +108,7 @@ func (l *Lexer) NextToken() Token { tok.Type = TokenEOF tok.Value = "" default: - if isLetter(l.ch) { + if isLetter(l.ch) || l.ch == '_' { ident := l.readIdentifier() return l.lookupIdent(ident) } else if isDigit(l.ch) { diff --git a/internal/cli/parser.go b/internal/cli/parser.go index 254893ef75..92908f2ea9 100644 --- a/internal/cli/parser.go +++ b/internal/cli/parser.go @@ -57,9 +57,10 @@ func (p *Parser) Parse(adminCommand bool) (*Command, error) { } // Check for ContextEngine commands (ls, cat, search) - //if p.curToken.Type == TokenIdentifier && isCECommand(p.curToken.Value) { - // return p.parseCECommand() - //} + // Note: These are now handled in parseUserCommand to support both SQL-style and CE-style syntax + // if p.curToken.Type == TokenIdentifier && isCECommand(p.curToken.Value) { + // return p.parseCECommand() + // } return p.parseCommand(adminCommand) } @@ -199,9 +200,9 @@ func (p *Parser) parseUserCommand() (*Command, error) { case TokenCheck: return p.parseCheckCommand() case TokenLS: - return p.parseContextListCommand() + return p.parseCEListCommand() case TokenCat: - return p.parseContextCatCommand() + return p.parseCECatCommand() case TokenUse: return p.parseUseCommand() case TokenUpdate: @@ -248,7 +249,7 @@ func isKeyword(tokenType int) bool { return tokenType >= TokenLogin && tokenType <= TokenTag } -// isCECommand checks if the given string is a ContextEngine command +// isCECommand checks if the given string is a Filesystem command func isCECommand(s string) bool { upper := strings.ToUpper(s) switch upper { @@ -304,6 +305,8 @@ func (p *Parser) parseCECommand() (*Command, error) { switch cmdName { case "LS", "LIST": return p.parseCEListCommand() + case "CAT": + return p.parseCECatCommand() case "SEARCH": return p.parseCESearchCommand() default: @@ -327,8 +330,49 @@ func (p *Parser) parseCEListCommand() (*Command, error) { if p.curToken.Type == TokenQuotedString { path = strings.Trim(path, "\"'") } - cmd.Params["path"] = path p.nextToken() + + // Handle path components separated by slashes (e.g., "skills/hub1") + for p.curToken.Type == TokenSlash { + p.nextToken() // consume slash + if p.curToken.Type == TokenIdentifier || p.curToken.Type == TokenDatasets || + p.curToken.Type == TokenAgents || p.curToken.Type == TokenChats { + path = path + "/" + p.curToken.Value + p.nextToken() + } else if p.curToken.Type == TokenNumber { + // Handle version numbers like 1.0.0 (parsed as number . number . number) + // OR filenames starting with numbers like 3_list_compressors.pdf + numberPart := p.curToken.Value + p.nextToken() + // Continue reading .number parts (version number format) + if p.curToken.Type == TokenIllegal && p.curToken.Value == "." { + versionPart := numberPart + for p.curToken.Type == TokenIllegal && p.curToken.Value == "." { + p.nextToken() // consume . + if p.curToken.Type == TokenNumber { + versionPart = versionPart + "." + p.curToken.Value + p.nextToken() + } else { + break + } + } + path = path + "/" + versionPart + } else if p.curToken.Type == TokenIdentifier { + // Filename starting with number: 3_list_compressors.pdf + path = path + "/" + numberPart + p.curToken.Value + p.nextToken() + } else { + // Just a number + path = path + "/" + numberPart + } + } else { + // Trailing slash, just append it + path = path + "/" + break + } + } + + cmd.Params["path"] = path } else { // Default to "datasets" root cmd.Params["path"] = "datasets" @@ -342,6 +386,76 @@ func (p *Parser) parseCEListCommand() (*Command, error) { return cmd, nil } +// parseCECatCommand parses the cat command +// Syntax: cat <path> +func (p *Parser) parseCECatCommand() (*Command, error) { + p.nextToken() // consume CAT + + cmd := NewCommand("ce_cat") + + if p.curToken.Type != TokenIdentifier && p.curToken.Type != TokenQuotedString { + return nil, fmt.Errorf("expected path after CAT") + } + + path := p.curToken.Value + if p.curToken.Type == TokenQuotedString { + path = strings.Trim(path, "\"'") + } + p.nextToken() + + // Handle path components separated by slashes (e.g., "skills/hub1/skill/README.md") + for p.curToken.Type == TokenSlash { + p.nextToken() // consume slash + if p.curToken.Type == TokenIdentifier || p.curToken.Type == TokenAgents || + p.curToken.Type == TokenChats || p.curToken.Type == TokenDatasets { + path = path + "/" + p.curToken.Value + p.nextToken() + } else if p.curToken.Type == TokenNumber { + // Handle version numbers like 1.0.0 (parsed as number . number . number) + // OR filenames starting with numbers like 3_list_compressors.pdf + numberPart := p.curToken.Value + p.nextToken() + // Continue reading .number parts (version number format) + if p.curToken.Type == TokenIllegal && p.curToken.Value == "." { + versionPart := numberPart + for p.curToken.Type == TokenIllegal && p.curToken.Value == "." { + p.nextToken() // consume . + if p.curToken.Type == TokenNumber { + versionPart = versionPart + "." + p.curToken.Value + p.nextToken() + } else { + break + } + } + path = path + "/" + versionPart + } else if p.curToken.Type == TokenIdentifier { + // Filename starting with number: 3_list_compressors.pdf + path = path + "/" + numberPart + p.curToken.Value + p.nextToken() + } else { + // Just a number + path = path + "/" + numberPart + } + } else if p.curToken.Type == TokenQuotedString { + path = path + "/" + strings.Trim(p.curToken.Value, "\"'") + p.nextToken() + } else { + // Trailing slash, just append it + path = path + "/" + break + } + } + + cmd.Params["path"] = path + + // Optional semicolon + if p.curToken.Type == TokenSemicolon { + p.nextToken() + } + + return cmd, nil +} + // parseCESearchCommand parses the search command // Syntax: search <query> or search <query> in <path> func (p *Parser) parseCESearchCommand() (*Command, error) { @@ -372,8 +486,52 @@ func (p *Parser) parseCESearchCommand() (*Command, error) { if p.curToken.Type == TokenQuotedString { path = strings.Trim(path, "\"'") } - cmd.Params["path"] = path p.nextToken() + + // Handle path components separated by slashes (e.g., "skills/hub1") + for p.curToken.Type == TokenSlash { + p.nextToken() // consume slash + if p.curToken.Type == TokenIdentifier || p.curToken.Type == TokenAgents || + p.curToken.Type == TokenChats || p.curToken.Type == TokenDatasets { + path = path + "/" + p.curToken.Value + p.nextToken() + } else if p.curToken.Type == TokenNumber { + // Handle version numbers like 1.0.0 (parsed as number . number . number) + // OR filenames starting with numbers like 3_list_compressors.pdf + numberPart := p.curToken.Value + p.nextToken() + // Continue reading .number parts (version number format) + if p.curToken.Type == TokenIllegal && p.curToken.Value == "." { + versionPart := numberPart + for p.curToken.Type == TokenIllegal && p.curToken.Value == "." { + p.nextToken() // consume . + if p.curToken.Type == TokenNumber { + versionPart = versionPart + "." + p.curToken.Value + p.nextToken() + } else { + break + } + } + path = path + "/" + versionPart + } else if p.curToken.Type == TokenIdentifier { + // Filename starting with number: 3_list_compressors.pdf + path = path + "/" + numberPart + p.curToken.Value + p.nextToken() + } else { + // Just a number + path = path + "/" + numberPart + } + } else if p.curToken.Type == TokenQuotedString { + path = path + "/" + strings.Trim(p.curToken.Value, "\"'") + p.nextToken() + } else { + // Trailing slash, just append it + path = path + "/" + break + } + } + + cmd.Params["path"] = path } else { cmd.Params["path"] = "." } diff --git a/internal/cli/response.go b/internal/cli/response.go index 90dd0dbba4..4331a76adb 100644 --- a/internal/cli/response.go +++ b/internal/cli/response.go @@ -322,3 +322,26 @@ func (r *ContextSearchResponse) PrintOut() { fmt.Printf("%d, %s\n", r.Code, r.Message) } } + +// ContextCatResponse represents the response for cat command +type ContextCatResponse struct { + Code int `json:"code"` + Content string `json:"content"` + Message string `json:"message"` + Duration float64 + OutputFormat OutputFormat +} + +func (r *ContextCatResponse) Type() string { return "ce_cat" } +func (r *ContextCatResponse) TimeCost() float64 { return r.Duration } +func (r *ContextCatResponse) SetOutputFormat(format OutputFormat) { r.OutputFormat = format } +func (r *ContextCatResponse) PrintOut() { + if r.Code == 0 { + fmt.Println(r.Content) + } else { + fmt.Println("ERROR") + fmt.Printf("%d, %s\n", r.Code, r.Message) + } +} + + diff --git a/internal/cli/types.go b/internal/cli/types.go index 12822f4a64..b6032cd11c 100644 --- a/internal/cli/types.go +++ b/internal/cli/types.go @@ -109,6 +109,7 @@ const ( TokenVector TokenSize TokenName // For ALTER PROVIDER <name> NAME <new_name> + TokenPool TokenBalance TokenInstance TokenInstances @@ -152,6 +153,7 @@ const ( TokenQuotedString TokenInteger TokenFloat + TokenNumber = TokenInteger // Alias for integer tokens in path parsing (e.g., version numbers like 1.0.0) // Special TokenSemicolon diff --git a/internal/cli/user_command.go b/internal/cli/user_command.go index c78a102960..bdf3250033 100644 --- a/internal/cli/user_command.go +++ b/internal/cli/user_command.go @@ -22,7 +22,7 @@ import ( "encoding/json" "fmt" "os" - ce "ragflow/internal/cli/contextengine" + ce "ragflow/internal/cli/filesystem" "strings" "time" ) @@ -1818,6 +1818,36 @@ func (c *RAGFlowClient) AddCustomModel(cmd *Command) (ResponseIf, error) { // Context related commands +// CECat handles the cat command - shows content using Context Engine +func (c *RAGFlowClient) CECat(cmd *Command) (ResponseIf, error) { + if c.HTTPClient.APIToken == "" && c.HTTPClient.LoginToken == "" { + return nil, fmt.Errorf("API token not set. Please login first") + } + if c.ServerType != "user" { + return nil, fmt.Errorf("this command is only allowed in USER mode") + } + + path, ok := cmd.Params["path"].(string) + if !ok { + return nil, fmt.Errorf("fail to convert 'path' to string") + } + + // Execute cat command through Filesystem Engine + ctx := context.Background() + content, err := c.ContextEngine.Cat(ctx, path) + if err != nil { + return nil, err + } + + // Convert to response + var response ContextCatResponse + response.OutputFormat = c.OutputFormat + response.Code = 0 + response.Content = string(content) + + return &response, nil +} + // CEList handles the ls command - lists nodes using Context Engine func (c *RAGFlowClient) CEList(cmd *Command) (ResponseIf, error) { // Get path from command params, default to "datasets" @@ -1838,7 +1868,7 @@ func (c *RAGFlowClient) CEList(cmd *Command) (ResponseIf, error) { opts.Offset = offset } - // Execute list command through Context Engine + // Execute list command through Filesystem Engine ctx := context.Background() result, err := c.ContextEngine.List(ctx, path, opts) if err != nil { @@ -1877,7 +1907,7 @@ func (c *RAGFlowClient) CESearch(cmd *Command) (ResponseIf, error) { opts.Recursive = recursive } - // Execute search command through Context Engine + // Execute search command through Filesystem Engine ctx := context.Background() result, err := c.ContextEngine.Search(ctx, path, opts) if err != nil { diff --git a/internal/cli/user_parser.go b/internal/cli/user_parser.go index 43317fe6ec..5496cc3aa1 100644 --- a/internal/cli/user_parser.go +++ b/internal/cli/user_parser.go @@ -2164,7 +2164,7 @@ func (p *Parser) parseSearchCommand() (*Command, error) { return cmd, nil } - cmd := NewCommand("context_search") + cmd := NewCommand("ce_search") cmd.Params["query"] = question diff --git a/internal/dao/database.go b/internal/dao/database.go index 429d2f5be1..cb33929d43 100644 --- a/internal/dao/database.go +++ b/internal/dao/database.go @@ -147,6 +147,7 @@ func InitDB() error { &entity.EvaluationResult{}, &entity.TimeRecord{}, &entity.License{}, + &entity.SkillSearchConfig{}, &entity.TenantModelInstance{}, &entity.TenantModel{}, &entity.TenantModelGroupMapping{}, diff --git a/internal/dao/file.go b/internal/dao/file.go index 347c04f6ea..e09a75fa56 100644 --- a/internal/dao/file.go +++ b/internal/dao/file.go @@ -199,6 +199,11 @@ func (dao *FileDAO) Create(file *entity.File) error { return DB.Create(file).Error } +// UpdateByID updates a file by ID +func (dao *FileDAO) UpdateByID(id string, updates map[string]interface{}) error { + return DB.Model(&entity.File{}).Where("id = ?", id).Updates(updates).Error +} + // DeleteByTenantID deletes all files by tenant ID (hard delete) func (dao *FileDAO) DeleteByTenantID(tenantID string) (int64, error) { result := DB.Unscoped().Where("tenant_id = ?", tenantID).Delete(&entity.File{}) @@ -308,11 +313,6 @@ func (dao *FileDAO) Query(name string, parentID string) []*entity.File { return files } -// UpdateByID updates file by ID with the given fields -func (dao *FileDAO) UpdateByID(id string, updates map[string]interface{}) error { - return DB.Model(&entity.File{}).Where("id = ?", id).Updates(updates).Error -} - // Delete deletes a file by ID (hard delete) func (dao *FileDAO) Delete(id string) error { return DB.Unscoped().Where("id = ?", id).Delete(&entity.File{}).Error diff --git a/internal/dao/migration.go b/internal/dao/migration.go index 2c0f4884bf..d7f227a5a9 100644 --- a/internal/dao/migration.go +++ b/internal/dao/migration.go @@ -18,6 +18,7 @@ package dao import ( "fmt" + "ragflow/internal/entity" "ragflow/internal/logger" "strings" @@ -51,6 +52,16 @@ func RunMigrations(db *gorm.DB) error { return fmt.Errorf("failed to modify column types: %w", err) } + // Create skill search tables + if err := migrateSkillSearchTables(db); err != nil { + return fmt.Errorf("failed to migrate skill search tables: %w", err) + } + + // Create skill space tables + if err := migrateSkillSpaceTables(db); err != nil { + return fmt.Errorf("failed to migrate skill space tables: %w", err) + } + logger.Info("All manual migrations completed successfully") return nil } @@ -313,3 +324,153 @@ func addColumnIfNotExists(db *gorm.DB, tableName, columnName, columnDef string) sql := fmt.Sprintf("ALTER TABLE %s ADD COLUMN %s %s", tableName, columnName, columnDef) return db.Exec(sql).Error } + +// migrateSkillSearchTables creates skill search related tables +func migrateSkillSearchTables(db *gorm.DB) error { + // Create skill_search_configs table only + if !db.Migrator().HasTable("skill_search_configs") { + logger.Info("Creating skill_search_configs table...") + sql := ` + CREATE TABLE IF NOT EXISTS skill_search_configs ( + id VARCHAR(32) PRIMARY KEY, + tenant_id VARCHAR(32) NOT NULL, + space_id VARCHAR(128) NOT NULL DEFAULT 'default', + embd_id VARCHAR(128) NOT NULL, + vector_similarity_weight FLOAT DEFAULT 0.3, + similarity_threshold FLOAT DEFAULT 0.2, + field_config JSON, + rerank_id VARCHAR(128), + tenant_rerank_id BIGINT, + top_k BIGINT DEFAULT 10, + index_version VARCHAR(32) DEFAULT '1.0.0', + status VARCHAR(1) DEFAULT '1', + create_time BIGINT, + update_time DATETIME, + INDEX idx_tenant_id (tenant_id), + INDEX idx_space_id (space_id), + UNIQUE INDEX idx_tenant_space_embd (tenant_id, space_id, embd_id) + ) + ` + if err := db.Exec(sql).Error; err != nil { + logger.Warn("Failed to create skill_search_configs table with MySQL dialect, trying generic", zap.Error(err)) + if err := db.AutoMigrate(&entity.SkillSearchConfig{}); err != nil { + return err + } + // AutoMigrate doesn't create unique indexes, so create them explicitly + logger.Info("Creating unique indexes for skill_search_configs...") + if err := db.Exec(`ALTER TABLE skill_search_configs ADD UNIQUE INDEX idx_tenant_space_embd (tenant_id, space_id, embd_id)`).Error; err != nil { + return fmt.Errorf("failed to create unique index idx_tenant_space_embd: %w", err) + } + } + } else { + // Add space_id for existing installations. + if err := addColumnIfNotExists(db, "skill_search_configs", "space_id", "VARCHAR(128) NOT NULL DEFAULT 'default'"); err != nil { + return fmt.Errorf("failed to add space_id column to skill_search_configs: %w", err) + } + + // Drop legacy unique index (tenant_id, embd_id) to allow per-space configs. + var legacyIndexExists int64 + db.Raw(`SELECT COUNT(*) FROM INFORMATION_SCHEMA.STATISTICS + WHERE TABLE_NAME = 'skill_search_configs' AND INDEX_NAME = 'idx_tenant_embd'`).Scan(&legacyIndexExists) + if legacyIndexExists > 0 { + logger.Info("Dropping legacy unique index idx_tenant_embd from skill_search_configs...") + if err := db.Exec(`ALTER TABLE skill_search_configs DROP INDEX idx_tenant_embd`).Error; err != nil { + return fmt.Errorf("failed to drop legacy unique index idx_tenant_embd: %w", err) + } + } + + // Table exists, check if unique index exists + var indexExists int64 + db.Raw(`SELECT COUNT(*) FROM INFORMATION_SCHEMA.STATISTICS + WHERE TABLE_NAME = 'skill_search_configs' AND INDEX_NAME = 'idx_tenant_space_embd'`).Scan(&indexExists) + if indexExists == 0 { + logger.Info("Adding unique index idx_tenant_space_embd to skill_search_configs...") + if err := db.Exec(`ALTER TABLE skill_search_configs + ADD UNIQUE INDEX idx_tenant_space_embd (tenant_id, space_id, embd_id)`).Error; err != nil { + return fmt.Errorf("failed to add unique index idx_tenant_space_embd: %w", err) + } + } + } + + return nil +} + +// migrateSkillSpaceTables creates skill space related tables +func migrateSkillSpaceTables(db *gorm.DB) error { + if !db.Migrator().HasTable("skill_spaces") { + logger.Info("Creating skill_spaces table...") + sql := ` + CREATE TABLE IF NOT EXISTS skill_spaces ( + id VARCHAR(32) PRIMARY KEY, + tenant_id VARCHAR(32) NOT NULL, + name VARCHAR(128) NOT NULL, + folder_id VARCHAR(32) NOT NULL, + description TEXT, + embd_id VARCHAR(128), + rerank_id VARCHAR(128), + top_k INT DEFAULT 10, + status VARCHAR(1) DEFAULT '1', + create_time BIGINT, + update_time DATETIME, + INDEX idx_tenant_id (tenant_id), + UNIQUE INDEX idx_tenant_name_status (tenant_id, name, status) + ) + ` + if err := db.Exec(sql).Error; err != nil { + logger.Warn("Failed to create skill_spaces table with MySQL dialect, trying generic", zap.Error(err)) + // Try with AutoMigrate as fallback + if err := db.AutoMigrate(&entity.SkillSpace{}); err != nil { + return err + } + // AutoMigrate doesn't create unique indexes, so create them explicitly + logger.Info("Creating unique indexes for skill_spaces...") + if err := db.Exec(`ALTER TABLE skill_spaces ADD UNIQUE INDEX idx_tenant_name_status (tenant_id, name, status)`).Error; err != nil { + return fmt.Errorf("failed to create unique index idx_tenant_name_status: %w", err) + } + } + } else { + // Migrate existing table: add status column first, then update index + if err := addColumnIfNotExists(db, "skill_spaces", "status", "VARCHAR(1) NOT NULL DEFAULT '1'"); err != nil { + return fmt.Errorf("failed to add status column to skill_spaces: %w", err) + } + // Migrate index after status column exists + if err := migrateSkillSpaceIndex(db); err != nil { + return fmt.Errorf("failed to migrate skill_space index: %w", err) + } + } + + return nil +} + +// migrateSkillSpaceIndex migrates the unique index to include status +func migrateSkillSpaceIndex(db *gorm.DB) error { + // Check if old index exists and drop it + var oldIndexExists int64 + db.Raw(` + SELECT COUNT(*) FROM INFORMATION_SCHEMA.STATISTICS + WHERE TABLE_NAME = 'skill_spaces' AND INDEX_NAME = 'idx_tenant_name' + `).Scan(&oldIndexExists) + + if oldIndexExists > 0 { + logger.Info("Dropping old idx_tenant_name index from skill_spaces...") + if err := db.Exec(`DROP INDEX idx_tenant_name ON skill_spaces`).Error; err != nil { + return fmt.Errorf("failed to drop old index idx_tenant_name: %w", err) + } + } + + // Check if new index exists + var newIndexExists int64 + db.Raw(` + SELECT COUNT(*) FROM INFORMATION_SCHEMA.STATISTICS + WHERE TABLE_NAME = 'skill_spaces' AND INDEX_NAME = 'idx_tenant_name_status' + `).Scan(&newIndexExists) + + if newIndexExists == 0 { + logger.Info("Creating new idx_tenant_name_status index on skill_spaces...") + if err := db.Exec(`CREATE UNIQUE INDEX idx_tenant_name_status ON skill_spaces(tenant_id, name, status)`).Error; err != nil { + return fmt.Errorf("failed to create unique index idx_tenant_name_status: %w", err) + } + } + + return nil +} diff --git a/internal/dao/skill_search_config.go b/internal/dao/skill_search_config.go new file mode 100644 index 0000000000..6c19964bc2 --- /dev/null +++ b/internal/dao/skill_search_config.go @@ -0,0 +1,196 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package dao + +import ( + "ragflow/internal/entity" + "strings" + "time" + + "github.com/google/uuid" +) + +// SkillSearchConfigDAO data access object for skill search config +type SkillSearchConfigDAO struct{} + +const defaultSkillSpaceID = "default" + +func normalizeSpaceID(spaceID string) string { + spaceID = strings.TrimSpace(spaceID) + if spaceID == "" { + return defaultSkillSpaceID + } + return spaceID +} + +// NewSkillSearchConfigDAO creates a new SkillSearchConfigDAO +func NewSkillSearchConfigDAO() *SkillSearchConfigDAO { + return &SkillSearchConfigDAO{} +} + +// Create creates a new skill search config +func (dao *SkillSearchConfigDAO) Create(config *entity.SkillSearchConfig) error { + return DB.Create(config).Error +} + +// GetByID retrieves a skill search config by ID +func (dao *SkillSearchConfigDAO) GetByID(id string) (*entity.SkillSearchConfig, error) { + var config entity.SkillSearchConfig + err := DB.Where("id = ? AND status = ?", id, "1").First(&config).Error + if err != nil { + return nil, err + } + return &config, nil +} + +// GetByTenantID retrieves a skill search config by tenant ID +func (dao *SkillSearchConfigDAO) GetByTenantID(tenantID, spaceID string) (*entity.SkillSearchConfig, error) { + var config entity.SkillSearchConfig + err := DB.Where("tenant_id = ? AND space_id = ? AND status = ?", tenantID, normalizeSpaceID(spaceID), "1").First(&config).Error + if err != nil { + return nil, err + } + return &config, nil +} + +// GetLatestByTenantID retrieves the latest skill search config by tenant ID (ordered by update_time desc) +// Prioritizes configs with non-empty embd_id to return user-saved configs over auto-created ones +func (dao *SkillSearchConfigDAO) GetLatestByTenantID(tenantID, spaceID string) (*entity.SkillSearchConfig, error) { + var config entity.SkillSearchConfig + // First try to get the latest config with non-empty embd_id (user-saved config) + err := DB.Where("tenant_id = ? AND space_id = ? AND status = ? AND embd_id != ?", tenantID, normalizeSpaceID(spaceID), "1", "").Order("update_time desc").First(&config).Error + if err == nil { + return &config, nil + } + // If no user-saved config found, get any config + err = DB.Where("tenant_id = ? AND space_id = ? AND status = ?", tenantID, normalizeSpaceID(spaceID), "1").Order("update_time desc").First(&config).Error + if err != nil { + return nil, err + } + return &config, nil +} + +// GetByTenantAndEmbdID retrieves a skill search config by tenant ID and embedding ID +func (dao *SkillSearchConfigDAO) GetByTenantAndEmbdID(tenantID, spaceID, embdID string) (*entity.SkillSearchConfig, error) { + var config entity.SkillSearchConfig + err := DB.Where("tenant_id = ? AND space_id = ? AND embd_id = ? AND status = ?", tenantID, normalizeSpaceID(spaceID), embdID, "1").First(&config).Error + if err != nil { + return nil, err + } + return &config, nil +} + +// GetOrCreate retrieves existing config or creates default one +func (dao *SkillSearchConfigDAO) GetOrCreate(tenantID, spaceID, embdID string) (*entity.SkillSearchConfig, error) { + spaceID = normalizeSpaceID(spaceID) + config, err := dao.GetByTenantAndEmbdID(tenantID, spaceID, embdID) + if err == nil { + return config, nil + } + + // Create default config + return dao.CreateWithTenantSpace(tenantID, spaceID, embdID) +} + +// CreateWithTenantSpace creates a new config for tenant+space +func (dao *SkillSearchConfigDAO) CreateWithTenantSpace(tenantID, spaceID, embdID string) (*entity.SkillSearchConfig, error) { + spaceID = normalizeSpaceID(spaceID) + timestamp := time.Now().UnixMilli() + defaultFieldConfig := entity.DefaultFieldConfig() + fieldConfigMap := entity.JSONMap{ + "name": map[string]interface{}{ + "enabled": defaultFieldConfig.Name.Enabled, + "weight": defaultFieldConfig.Name.Weight, + }, + "tags": map[string]interface{}{ + "enabled": defaultFieldConfig.Tags.Enabled, + "weight": defaultFieldConfig.Tags.Weight, + }, + "description": map[string]interface{}{ + "enabled": defaultFieldConfig.Description.Enabled, + "weight": defaultFieldConfig.Description.Weight, + }, + "content": map[string]interface{}{ + "enabled": defaultFieldConfig.Content.Enabled, + "weight": defaultFieldConfig.Content.Weight, + }, + } + + defaultConfig := &entity.SkillSearchConfig{ + ID: generateID(), + TenantID: tenantID, + SpaceID: spaceID, + EmbdID: embdID, + VectorSimilarityWeight: 0.3, + SimilarityThreshold: 0.2, + FieldConfig: fieldConfigMap, + TopK: 10, + Status: "1", + CreateTime: ×tamp, + } + + if err := dao.Create(defaultConfig); err != nil { + return nil, err + } + return defaultConfig, nil +} + +// DeleteAllByTenantSpace deletes all configs for a tenant+space (for cleanup before creating new one) +func (dao *SkillSearchConfigDAO) DeleteAllByTenantSpace(tenantID, spaceID string) error { + spaceID = normalizeSpaceID(spaceID) + return DB.Model(&entity.SkillSearchConfig{}). + Where("tenant_id = ? AND space_id = ?", tenantID, spaceID). + Update("status", "0").Error +} + +// DeleteAllByTenantSpaceExceptID deletes all active configs for a tenant+space except the specified ID +func (dao *SkillSearchConfigDAO) DeleteAllByTenantSpaceExceptID(tenantID, spaceID, exceptID string) error { + spaceID = normalizeSpaceID(spaceID) + return DB.Model(&entity.SkillSearchConfig{}). + Where("tenant_id = ? AND space_id = ? AND id != ? AND status = ?", tenantID, spaceID, exceptID, "1"). + Update("status", "0").Error +} + +// Update updates a skill search config with the given updates map +func (dao *SkillSearchConfigDAO) Update(id string, updates map[string]interface{}) error { + updates["update_time"] = time.Now() + return DB.Model(&entity.SkillSearchConfig{}).Where("id = ? AND status = ?", id, "1").Updates(updates).Error +} + +// UpdateByTenantID updates config by tenant ID +func (dao *SkillSearchConfigDAO) UpdateByTenantID(tenantID, spaceID string, updates map[string]interface{}) error { + updates["update_time"] = time.Now() + result := DB.Model(&entity.SkillSearchConfig{}).Where("tenant_id = ? AND space_id = ? AND status = ?", tenantID, normalizeSpaceID(spaceID), "1").Updates(updates) + return result.Error +} + +// UpdateByTenantAndEmbdID updates config by tenant ID and embedding ID +func (dao *SkillSearchConfigDAO) UpdateByTenantAndEmbdID(tenantID, spaceID, embdID string, updates map[string]interface{}) error { + updates["update_time"] = time.Now() + result := DB.Model(&entity.SkillSearchConfig{}).Where("tenant_id = ? AND space_id = ? AND embd_id = ? AND status = ?", tenantID, normalizeSpaceID(spaceID), embdID, "1").Updates(updates) + return result.Error +} + +// Delete deletes a skill search config by ID (soft delete) +func (dao *SkillSearchConfigDAO) Delete(id string) error { + return DB.Model(&entity.SkillSearchConfig{}).Where("id = ?", id).Update("status", "0").Error +} + +// generateID generates a unique ID +func generateID() string { + return strings.ReplaceAll(uuid.New().String(), "-", "")[:32] +} diff --git a/internal/dao/skill_space.go b/internal/dao/skill_space.go new file mode 100644 index 0000000000..2c0596f8a3 --- /dev/null +++ b/internal/dao/skill_space.go @@ -0,0 +1,141 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package dao + +import ( + "ragflow/internal/entity" + "strings" + "time" + + "github.com/google/uuid" +) + +// SkillSpaceDAO data access object for skills space +type SkillSpaceDAO struct{} + +// NewSkillSpaceDAO creates a new SkillSpaceDAO +func NewSkillSpaceDAO() *SkillSpaceDAO { + return &SkillSpaceDAO{} +} + +// Create creates a new skills space +func (dao *SkillSpaceDAO) Create(space *entity.SkillSpace) error { + return DB.Create(space).Error +} + +// GetByID retrieves a skills space by ID (active only) +func (dao *SkillSpaceDAO) GetByID(id string) (*entity.SkillSpace, error) { + var space entity.SkillSpace + err := DB.Where("id = ? AND status = ?", id, entity.SpaceStatusActive).First(&space).Error + if err != nil { + return nil, err + } + return &space, nil +} + +// GetByTenantID retrieves all skills spaces by tenant ID (active only) +func (dao *SkillSpaceDAO) GetByTenantID(tenantID string) ([]*entity.SkillSpace, error) { + var spaces []*entity.SkillSpace + err := DB.Where("tenant_id = ? AND status = ?", tenantID, entity.SpaceStatusActive).Order("create_time DESC").Find(&spaces).Error + return spaces, err +} + +// GetByTenantAndName retrieves a skills space by tenant ID and name (active only) +func (dao *SkillSpaceDAO) GetByTenantAndName(tenantID, name string) (*entity.SkillSpace, error) { + var space entity.SkillSpace + err := DB.Where("tenant_id = ? AND name = ? AND status = ?", tenantID, name, entity.SpaceStatusActive).First(&space).Error + if err != nil { + return nil, err + } + return &space, nil +} + +// GetByTenantAndNameAnyStatus retrieves a skills space by tenant ID and name regardless of status +func (dao *SkillSpaceDAO) GetByTenantAndNameAnyStatus(tenantID, name string) (*entity.SkillSpace, error) { + var space entity.SkillSpace + err := DB.Where("tenant_id = ? AND name = ?", tenantID, name).First(&space).Error + if err != nil { + return nil, err + } + return &space, nil +} + +// GetByIDAnyStatus retrieves a skills space by ID regardless of status +func (dao *SkillSpaceDAO) GetByIDAnyStatus(id string) (*entity.SkillSpace, error) { + var space entity.SkillSpace + err := DB.Where("id = ?", id).First(&space).Error + if err != nil { + return nil, err + } + return &space, nil +} + +// GetByFolderID retrieves a skills space by folder ID (active only) +func (dao *SkillSpaceDAO) GetByFolderID(folderID string) (*entity.SkillSpace, error) { + var space entity.SkillSpace + err := DB.Where("folder_id = ? AND status = ?", folderID, entity.SpaceStatusActive).First(&space).Error + if err != nil { + return nil, err + } + return &space, nil +} + +// Update updates a skills space +func (dao *SkillSpaceDAO) Update(space *entity.SkillSpace) error { + return DB.Save(space).Error +} + +// UpdateByID updates skills space by ID +func (dao *SkillSpaceDAO) UpdateByID(id string, updates map[string]interface{}) error { + updates["update_time"] = time.Now() + return DB.Model(&entity.SkillSpace{}).Where("id = ?", id).Updates(updates).Error +} + +// Delete deletes a skills space by ID (soft delete) +func (dao *SkillSpaceDAO) Delete(id string) error { + return DB.Model(&entity.SkillSpace{}).Where("id = ?", id).Update("status", entity.SpaceStatusDeleted).Error +} + +// CASStatus performs a compare-and-swap on the space status atomically +// Returns true if the update was applied, false if the current status didn't match expected +func (dao *SkillSpaceDAO) CASStatus(id string, expectedStatus, newStatus string) (bool, error) { + result := DB.Model(&entity.SkillSpace{}). + Where("id = ? AND status = ?", id, expectedStatus). + Update("status", newStatus) + if result.Error != nil { + return false, result.Error + } + return result.RowsAffected > 0, nil +} + +// DeletePermanentByName permanently deletes a skills space by tenant ID and name +// This is used to clean up previously deleted spaces (only deletes status='0' deleted spaces, NOT deleting spaces) +func (dao *SkillSpaceDAO) DeletePermanentByName(tenantID, name string) error { + return DB.Unscoped().Where("tenant_id = ? AND name = ? AND status = ?", tenantID, name, entity.SpaceStatusDeleted).Delete(&entity.SkillSpace{}).Error +} + +// CountByTenant counts skills spaces by tenant ID +func (dao *SkillSpaceDAO) CountByTenant(tenantID string) (int64, error) { + var count int64 + err := DB.Model(&entity.SkillSpace{}).Where("tenant_id = ? AND status = ?", tenantID, entity.SpaceStatusActive).Count(&count).Error + return count, err +} + +// generateSpaceID generates a unique ID +func generateSpaceID() string { + return strings.ReplaceAll(uuid.New().String(), "-", "")[:32] +} diff --git a/internal/engine/elasticsearch/client.go b/internal/engine/elasticsearch/client.go index bd10fa1673..b5680f065f 100644 --- a/internal/engine/elasticsearch/client.go +++ b/internal/engine/elasticsearch/client.go @@ -36,10 +36,16 @@ type elasticsearchEngine struct { // NewEngine creates an Elasticsearch engine func NewEngine(cfg interface{}) (*elasticsearchEngine, error) { + if cfg == nil { + return nil, fmt.Errorf("elasticsearch config is nil, please check your configuration file for 'doc_engine.es' settings") + } esConfig, ok := cfg.(*server.ElasticsearchConfig) if !ok { return nil, fmt.Errorf("invalid Elasticsearch config type, expected *config.ElasticsearchConfig") } + if esConfig == nil { + return nil, fmt.Errorf("elasticsearch config is nil, please check your configuration file for 'doc_engine.es' settings") + } // Create ES client client, err := elasticsearch.NewClient(elasticsearch.Config{ @@ -78,8 +84,8 @@ func NewEngine(cfg interface{}) (*elasticsearchEngine, error) { return engine, nil } -// Type returns the engine type -func (e *elasticsearchEngine) Type() string { +// GetType returns the engine type +func (e *elasticsearchEngine) GetType() string { return "elasticsearch" } @@ -243,3 +249,39 @@ func convertBytes(bytes int64) string { } return fmt.Sprintf("%d b", bytes) } + +// extractErrorReason extracts the error reason from Elasticsearch error response +// It tries to find the most specific error message in the response +func extractErrorReason(bodyBytes []byte) string { + var errResp map[string]interface{} + if err := json.Unmarshal(bodyBytes, &errResp); err != nil { + return "" + } + + // Try to get error from root_cause + if errorObj, ok := errResp["error"].(map[string]interface{}); ok { + if rootCauses, ok := errorObj["root_cause"].([]interface{}); ok && len(rootCauses) > 0 { + if rootCause, ok := rootCauses[0].(map[string]interface{}); ok { + if reason, ok := rootCause["reason"].(string); ok && reason != "" { + return reason + } + } + } + // Fallback to main error reason + if reason, ok := errorObj["reason"].(string); ok && reason != "" { + return reason + } + // Try failed_shards + if failedShards, ok := errorObj["failed_shards"].([]interface{}); ok && len(failedShards) > 0 { + if shard, ok := failedShards[0].(map[string]interface{}); ok { + if reason, ok := shard["reason"].(map[string]interface{}); ok { + if r, ok := reason["reason"].(string); ok && r != "" { + return r + } + } + } + } + } + + return "" +} diff --git a/internal/engine/elasticsearch/document.go b/internal/engine/elasticsearch/document.go new file mode 100644 index 0000000000..a79be0dd4c --- /dev/null +++ b/internal/engine/elasticsearch/document.go @@ -0,0 +1,259 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package elasticsearch + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + + "github.com/elastic/go-elasticsearch/v8/esapi" +) + +// IndexDocument indexes a single document +func (e *elasticsearchEngine) IndexDocument(ctx context.Context, indexName, docID string, doc interface{}) error { + if indexName == "" { + return fmt.Errorf("index name cannot be empty") + } + if docID == "" { + return fmt.Errorf("document id cannot be empty") + } + if doc == nil { + return fmt.Errorf("document cannot be nil") + } + + // Serialize document + data, err := json.Marshal(doc) + if err != nil { + return fmt.Errorf("failed to marshal document: %w", err) + } + + // Index document + req := esapi.IndexRequest{ + Index: indexName, + DocumentID: docID, + Body: bytes.NewReader(data), + Refresh: "true", + } + + res, err := req.Do(ctx, e.client) + if err != nil { + return fmt.Errorf("failed to index document: %w", err) + } + defer res.Body.Close() + + if res.IsError() { + body, _ := io.ReadAll(res.Body) + reason := extractErrorReason(body) + if reason != "" { + return fmt.Errorf("elasticsearch error: %s", reason) + } + return fmt.Errorf("elasticsearch returned error: %s, body: %s", res.Status(), string(body)) + } + + return nil +} + +// BulkIndex indexes documents in bulk +func (e *elasticsearchEngine) BulkIndex(ctx context.Context, indexName string, docs []interface{}) (interface{}, error) { + if indexName == "" { + return nil, fmt.Errorf("index name cannot be empty") + } + if len(docs) == 0 { + return nil, fmt.Errorf("documents cannot be empty") + } + + // Build bulk request + var buf bytes.Buffer + for _, doc := range docs { + docMap, ok := doc.(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("document must be map[string]interface{}") + } + + docID, hasID := docMap["_id"] + if !hasID { + return nil, fmt.Errorf("document missing _id field") + } + + // Delete _id field to avoid duplication + delete(docMap, "_id") + + // Add index operation + meta := map[string]interface{}{ + "_index": indexName, + "_id": docID, + } + metaData, _ := json.Marshal(meta) + docData, _ := json.Marshal(docMap) + + buf.Write(metaData) + buf.WriteByte('\n') + buf.Write(docData) + buf.WriteByte('\n') + } + + // Execute bulk request + req := esapi.BulkRequest{ + Body: &buf, + Refresh: "true", + } + + res, err := req.Do(ctx, e.client) + if err != nil { + return nil, fmt.Errorf("bulk index failed: %w", err) + } + defer res.Body.Close() + + if res.IsError() { + body, _ := io.ReadAll(res.Body) + reason := extractErrorReason(body) + if reason != "" { + return nil, fmt.Errorf("elasticsearch error: %s", reason) + } + return nil, fmt.Errorf("elasticsearch returned error: %s", res.Status()) + } + + // Parse response + var result map[string]interface{} + if err := json.NewDecoder(res.Body).Decode(&result); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + // Check for errors + if errors, ok := result["errors"].(bool); ok && errors { + // Get error details + if items, ok := result["items"].([]interface{}); ok && len(items) > 0 { + for _, item := range items { + if itemMap, ok := item.(map[string]interface{}); ok { + for _, op := range itemMap { + if opMap, ok := op.(map[string]interface{}); ok { + if errInfo, ok := opMap["error"].(map[string]interface{}); ok { + if reason, ok := errInfo["reason"].(string); ok { + return nil, fmt.Errorf("bulk index error: %s", reason) + } + } + } + } + } + } + } + return nil, fmt.Errorf("bulk index has errors") + } + + response := &BulkResponse{ + Took: int64(result["took"].(float64)), + Errors: result["errors"].(bool), + Indexed: len(docs), + } + + return response, nil +} + +// BulkResponse bulk operation response +type BulkResponse struct { + Took int64 + Errors bool + Indexed int +} + +// GetDocument gets a document +func (e *elasticsearchEngine) GetDocument(ctx context.Context, indexName, docID string) (interface{}, error) { + if indexName == "" { + return nil, fmt.Errorf("index name cannot be empty") + } + if docID == "" { + return nil, fmt.Errorf("document id cannot be empty") + } + + // Get document + req := esapi.GetRequest{ + Index: indexName, + DocumentID: docID, + } + + res, err := req.Do(ctx, e.client) + if err != nil { + return nil, fmt.Errorf("failed to get document: %w", err) + } + defer res.Body.Close() + + if res.StatusCode == 404 { + return nil, fmt.Errorf("document not found") + } + + if res.IsError() { + body, _ := io.ReadAll(res.Body) + reason := extractErrorReason(body) + if reason != "" { + return nil, fmt.Errorf("elasticsearch error: %s", reason) + } + return nil, fmt.Errorf("elasticsearch returned error: %s", res.Status()) + } + + // Parse response + var result map[string]interface{} + if err := json.NewDecoder(res.Body).Decode(&result); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + if found, ok := result["found"].(bool); !ok || !found { + return nil, fmt.Errorf("document not found") + } + + return result["_source"], nil +} + +// DeleteDocument deletes a document +func (e *elasticsearchEngine) DeleteDocument(ctx context.Context, indexName, docID string) error { + if indexName == "" { + return fmt.Errorf("index name cannot be empty") + } + if docID == "" { + return fmt.Errorf("document id cannot be empty") + } + + // Delete document + req := esapi.DeleteRequest{ + Index: indexName, + DocumentID: docID, + Refresh: "true", + } + + res, err := req.Do(ctx, e.client) + if err != nil { + return fmt.Errorf("failed to delete document: %w", err) + } + defer res.Body.Close() + + if res.StatusCode == 404 { + return fmt.Errorf("document not found") + } + + if res.IsError() { + body, _ := io.ReadAll(res.Body) + reason := extractErrorReason(body) + if reason != "" { + return fmt.Errorf("elasticsearch error: %s", reason) + } + return fmt.Errorf("elasticsearch returned error: %s, body: %s", res.Status(), string(body)) + } + + return nil +} diff --git a/internal/engine/elasticsearch/index.go b/internal/engine/elasticsearch/index.go index b0190697d1..7e601acae3 100644 --- a/internal/engine/elasticsearch/index.go +++ b/internal/engine/elasticsearch/index.go @@ -22,19 +22,13 @@ import ( "encoding/json" "fmt" "io" + "os" "github.com/elastic/go-elasticsearch/v8/esapi" ) // CreateDataset creates an index func (e *elasticsearchEngine) CreateDataset(ctx context.Context, indexName, datasetID string, vectorSize int, parserID string) error { - // Elasticsearch doesn't support vector_size or parser_id in the same way - // Build mapping for ES (if needed) - // TODO - mapping := map[string]interface{}{ - "dataset_id": datasetID, - } - if indexName == "" { return fmt.Errorf("index name cannot be empty") } @@ -48,6 +42,25 @@ func (e *elasticsearchEngine) CreateDataset(ctx context.Context, indexName, data return fmt.Errorf("index '%s' already exists", indexName) } + // Load mapping based on index type + var mapping map[string]interface{} + if datasetID == "skill" { + // Load skill-specific mapping + skillMapping, err := loadSkillMapping() + if err != nil { + return fmt.Errorf("failed to load skill mapping: %w", err) + } + mapping = skillMapping + } else { + // Default mapping for dataset + mapping = map[string]interface{}{ + "settings": map[string]interface{}{ + "number_of_shards": 1, + "number_of_replicas": 0, + }, + } + } + // Prepare request body var body io.Reader if mapping != nil { @@ -71,7 +84,12 @@ func (e *elasticsearchEngine) CreateDataset(ctx context.Context, indexName, data defer res.Body.Close() if res.IsError() { - return fmt.Errorf("elasticsearch returned error: %s", res.Status()) + bodyBytes, _ := io.ReadAll(res.Body) + reason := extractErrorReason(bodyBytes) + if reason != "" { + return fmt.Errorf("elasticsearch error: %s", reason) + } + return fmt.Errorf("elasticsearch returned error: %s, body: %s", res.Status(), string(bodyBytes)) } // Parse response @@ -88,6 +106,157 @@ func (e *elasticsearchEngine) CreateDataset(ctx context.Context, indexName, data return nil } +// loadSkillMapping loads the skill index mapping from config file +func loadSkillMapping() (map[string]interface{}, error) { + // Try multiple possible locations for the mapping file + possiblePaths := []string{ + "conf/skill_es_mapping.json", + "../conf/skill_es_mapping.json", + "/app/conf/skill_es_mapping.json", + } + + var data []byte + var err error + for _, path := range possiblePaths { + data, err = os.ReadFile(path) + if err == nil { + break + } + } + + if err != nil { + // Fallback to default skill mapping if file not found + return getDefaultSkillMapping(), nil + } + + var mapping map[string]interface{} + if err := json.Unmarshal(data, &mapping); err != nil { + return nil, fmt.Errorf("failed to parse skill mapping: %w", err) + } + + return mapping, nil +} + +// getDefaultSkillMapping returns the default skill index mapping +func getDefaultSkillMapping() map[string]interface{} { + return map[string]interface{}{ + "settings": map[string]interface{}{ + "index": map[string]interface{}{ + "number_of_shards": 1, + "number_of_replicas": 0, + "refresh_interval": "1000ms", + }, + }, + "mappings": map[string]interface{}{ + "dynamic": false, + "properties": map[string]interface{}{ + "skill_id": map[string]interface{}{ + "type": "keyword", + "store": true, + }, + "name": map[string]interface{}{ + "type": "text", + "index": false, + "store": true, + }, + "name_tks": map[string]interface{}{ + "type": "text", + "analyzer": "whitespace", + "store": true, + }, + "tags": map[string]interface{}{ + "type": "text", + "index": false, + "store": true, + }, + "tags_tks": map[string]interface{}{ + "type": "text", + "analyzer": "whitespace", + "store": true, + }, + "description": map[string]interface{}{ + "type": "text", + "index": false, + "store": true, + }, + "description_tks": map[string]interface{}{ + "type": "text", + "analyzer": "whitespace", + "store": true, + }, + "content": map[string]interface{}{ + "type": "text", + "index": false, + "store": true, + }, + "content_tks": map[string]interface{}{ + "type": "text", + "analyzer": "whitespace", + "store": true, + }, + "q_3072_vec": map[string]interface{}{ + "type": "dense_vector", + "dims": 3072, + "index": true, + "similarity": "cosine", + }, + "q_2560_vec": map[string]interface{}{ + "type": "dense_vector", + "dims": 2560, + "index": true, + "similarity": "cosine", + }, + "q_1536_vec": map[string]interface{}{ + "type": "dense_vector", + "dims": 1536, + "index": true, + "similarity": "cosine", + }, + "q_1024_vec": map[string]interface{}{ + "type": "dense_vector", + "dims": 1024, + "index": true, + "similarity": "cosine", + }, + "q_768_vec": map[string]interface{}{ + "type": "dense_vector", + "dims": 768, + "index": true, + "similarity": "cosine", + }, + "q_512_vec": map[string]interface{}{ + "type": "dense_vector", + "dims": 512, + "index": true, + "similarity": "cosine", + }, + "q_256_vec": map[string]interface{}{ + "type": "dense_vector", + "dims": 256, + "index": true, + "similarity": "cosine", + }, + "version": map[string]interface{}{ + "type": "keyword", + "store": true, + }, + "status": map[string]interface{}{ + "type": "keyword", + "store": true, + }, + "create_time": map[string]interface{}{ + "type": "long", + "store": true, + }, + "update_time": map[string]interface{}{ + "type": "long", + "store": true, + }, + }, + }, + } +} + // DropTable deletes an index func (e *elasticsearchEngine) DropTable(ctx context.Context, indexName string) error { if indexName == "" { @@ -115,6 +284,11 @@ func (e *elasticsearchEngine) DropTable(ctx context.Context, indexName string) e defer res.Body.Close() if res.IsError() { + bodyBytes, _ := io.ReadAll(res.Body) + reason := extractErrorReason(bodyBytes) + if reason != "" { + return fmt.Errorf("elasticsearch error: %s", reason) + } return fmt.Errorf("elasticsearch returned error: %s", res.Status()) } @@ -143,6 +317,11 @@ func (e *elasticsearchEngine) TableExists(ctx context.Context, indexName string) return false, nil } + bodyBytes, _ := io.ReadAll(res.Body) + reason := extractErrorReason(bodyBytes) + if reason != "" { + return false, fmt.Errorf("elasticsearch error: %s", reason) + } return false, fmt.Errorf("elasticsearch returned error: %s", res.Status()) } diff --git a/internal/engine/elasticsearch/search.go b/internal/engine/elasticsearch/search.go index 1f3935b069..507618be64 100644 --- a/internal/engine/elasticsearch/search.go +++ b/internal/engine/elasticsearch/search.go @@ -22,6 +22,7 @@ import ( "encoding/json" "fmt" "io" + "strings" "github.com/elastic/go-elasticsearch/v8/esapi" "go.uber.org/zap" @@ -63,17 +64,28 @@ func (e *elasticsearchEngine) searchUnified(ctx context.Context, req *types.Sear limit = 30 // default ES size } - // Build filter clauses (default: available=1, meaning available_int >= 1) - // Reference: rag/utils/es_conn.py L60-L78 - filterClauses := buildFilterClauses(req.KbIDs, 1) + // Check if this is a skill index + isSkillIndex := len(req.IndexNames) > 0 && strings.HasPrefix(req.IndexNames[0], "skill_") + + // Build filter clauses + var filterClauses []map[string]interface{} + if isSkillIndex { + filterClauses = buildSkillFilterClauses() + } else { + filterClauses = buildFilterClauses(req.KbIDs, 1) + } + + // Add filters from req.Filter + if req.Filter != nil && len(req.Filter) > 0 { + filterClauses = append(filterClauses, buildFilterFromMap(req.Filter)...) + } // Build search query body queryBody := make(map[string]interface{}) // Determine search type from MatchExprs var matchText string - var matchDense interface{} - var textWeight float64 = 1.0 + var matchDense *types.MatchDenseExpr var hasVectorMatch bool for _, expr := range req.MatchExprs { @@ -83,59 +95,82 @@ func (e *elasticsearchEngine) searchUnified(ctx context.Context, req *types.Sear switch e := expr.(type) { case string: matchText = e + case *types.MatchTextExpr: + matchText = e.MatchingText case *types.MatchDenseExpr: hasVectorMatch = true matchDense = e - textWeight = 0.3 // default, should be passed via SimilarityThreshold } } var vectorFieldName string - if !hasVectorMatch { + if !hasVectorMatch || matchDense == nil { // Keyword-only search - queryBody["query"] = buildESKeywordQuery(matchText, filterClauses, 1.0) + if isSkillIndex { + queryBody["query"] = buildSkillKeywordQuery(matchText, filterClauses, 1.0) + } else { + queryBody["query"] = buildESKeywordQuery(matchText, filterClauses, 1.0) + } } else { // Hybrid search: keyword + vector - // Calculate text weight (use SimilarityThreshold as text weight if provided) + textWeight := 0.7 // default: vector weight = 0.3 + vectorWeight := 0.3 + if matchDense.ExtraOptions != nil { + if vw, ok := matchDense.ExtraOptions["text_weight"].(float64); ok { + textWeight = vw + } + if vw, ok := matchDense.ExtraOptions["vector_weight"].(float64); ok { + vectorWeight = vw + } + } // Build boolean query for text match and filters - boolQuery := buildESKeywordQuery(matchText, filterClauses, 1.0) + var boolQuery map[string]interface{} + if isSkillIndex { + boolQuery = buildSkillKeywordQuery(matchText, filterClauses, 1.0) + } else { + boolQuery = buildESKeywordQuery(matchText, filterClauses, 1.0) + } // Add boost to the bool query (as in Python code) if boolMap, ok := boolQuery["bool"].(map[string]interface{}); ok { boolMap["boost"] = textWeight } + // Build kNN query - var vectorData []float64 - if md, ok := matchDense.(*types.MatchDenseExpr); ok { - vectorData = md.EmbeddingData - vectorFieldName = md.VectorColumnName - k := md.TopN - if k <= 0 { - k = req.Limit - } - if k <= 0 { - k = 1024 - } - numCandidates := k * 2 - - knnQuery := map[string]interface{}{ - "field": vectorFieldName, - "query_vector": vectorData, - "k": k, - "num_candidates": numCandidates, - "filter": boolQuery, - "similarity": 0.0, - } - - queryBody["knn"] = knnQuery - queryBody["query"] = boolQuery + vectorData := matchDense.EmbeddingData + vectorFieldName = matchDense.VectorColumnName + k := matchDense.TopN + if k <= 0 { + k = req.Limit } + if k <= 0 { + k = 1024 + } + numCandidates := k * 2 + + similarity := 0.0 + if matchDense.ExtraOptions != nil { + if sim, ok := matchDense.ExtraOptions["similarity"].(float64); ok { + similarity = sim + } + } + + knnQuery := map[string]interface{}{ + "field": vectorFieldName, + "query_vector": vectorData, + "k": k, + "num_candidates": numCandidates, + "similarity": similarity, + "boost": vectorWeight, + } + + queryBody["knn"] = knnQuery + queryBody["query"] = boolQuery // Add vector column to Source fields (matching Python ES: src.append(f"q_{len(q_vec)}_vec")) // Only modify Source if it was explicitly set by the caller if vectorFieldName != "" && len(req.SelectFields) > 0 { sourceFields := req.SelectFields - // Check if vector column already in source found := false for _, f := range sourceFields { if f == vectorFieldName { @@ -153,6 +188,14 @@ func (e *elasticsearchEngine) searchUnified(ctx context.Context, req *types.Sear queryBody["size"] = limit queryBody["from"] = offset + // Add sorting if specified + if req.OrderBy != nil { + sort := parseOrderByExpr(req.OrderBy) + if len(sort) > 0 { + queryBody["sort"] = sort + } + } + // Serialize query var buf bytes.Buffer if err := json.NewEncoder(&buf).Encode(queryBody); err != nil { @@ -228,7 +271,7 @@ func calculatePagination(page, size, topK int) (int, int) { return offset, RERANK_LIMIT } -// buildFilterClauses builds ES filter clauses from kb_ids, doc_ids and available_int +// buildFilterClauses builds ES filter clauses from kb_ids and available_int // Reference: rag/utils/es_conn.py L60-L78 // When available=0: available_int < 1 // When available!=0: NOT (available_int < 1) @@ -272,22 +315,96 @@ func buildFilterClauses(kbIDs []string, available int) []map[string]interface{} return filters } +// buildSkillFilterClauses builds ES filter clauses for skill index +// Skill index uses 'status' field instead of 'available_int' +func buildSkillFilterClauses() []map[string]interface{} { + // Filter for active skills (status = "1") + return []map[string]interface{}{ + { + "term": map[string]interface{}{ + "status": "1", + }, + }, + } +} + +// buildFilterFromMap converts a generic filter map to ES filter clauses +func buildFilterFromMap(filter map[string]interface{}) []map[string]interface{} { + var filters []map[string]interface{} + for field, value := range filter { + switch v := value.(type) { + case []string: + filters = append(filters, map[string]interface{}{ + "terms": map[string]interface{}{field: v}, + }) + case []interface{}: + filters = append(filters, map[string]interface{}{ + "terms": map[string]interface{}{field: v}, + }) + default: + filters = append(filters, map[string]interface{}{ + "term": map[string]interface{}{field: v}, + }) + } + } + return filters +} + // buildESKeywordQuery builds keyword-only search query for ES // Uses query_string if matchText is in query_string format, otherwise uses multi_match // boost is applied to the text match clause (query_string or multi_match) func buildESKeywordQuery(matchText string, filterClauses []map[string]interface{}, boost float64) map[string]interface{} { var mustClause map[string]interface{} - // Use query_string for complex queries - queryString := map[string]interface{}{ - "query": matchText, - "fields": []string{"title_tks^10", "title_sm_tks^5", "important_kwd^30", "important_tks^20", "question_tks^20", "content_ltks^2", "content_sm_ltks"}, - "type": "best_fields", - "minimum_should_match": "30%", - "boost": boost, + // Handle wildcard query (match all) + if matchText == "*" || matchText == "" { + mustClause = map[string]interface{}{ + "match_all": map[string]interface{}{}, + } + } else { + // Use query_string for complex queries + queryString := map[string]interface{}{ + "query": matchText, + "fields": []string{"title_tks^10", "title_sm_tks^5", "important_kwd^30", "important_tks^20", "question_tks^20", "content_ltks^2", "content_sm_ltks"}, + "type": "best_fields", + "minimum_should_match": "30%", + "boost": boost, + } + mustClause = map[string]interface{}{ + "query_string": queryString, + } } - mustClause = map[string]interface{}{ - "query_string": queryString, + + return map[string]interface{}{ + "bool": map[string]interface{}{ + "must": mustClause, + "filter": filterClauses, + }, + } +} + +// buildSkillKeywordQuery builds keyword-only search query for skill index +// Skill index uses different field names: name_tks, tags_tks, description_tks, content_tks +func buildSkillKeywordQuery(matchText string, filterClauses []map[string]interface{}, boost float64) map[string]interface{} { + var mustClause map[string]interface{} + + // Handle wildcard query (match all) + if matchText == "*" || matchText == "" { + mustClause = map[string]interface{}{ + "match_all": map[string]interface{}{}, + } + } else { + // Use query_string for complex queries with skill-specific fields + queryString := map[string]interface{}{ + "query": matchText, + "fields": []string{"name_tks^10", "tags_tks^5", "description_tks^3", "content_tks^1"}, + "type": "best_fields", + "minimum_should_match": "30%", + "boost": boost, + } + mustClause = map[string]interface{}{ + "query_string": queryString, + } } return map[string]interface{}{ @@ -306,18 +423,40 @@ func convertESResponse(esResp *SearchResponse, vectorFieldName string) []map[str chunks := make([]map[string]interface{}, len(esResp.Hits.Hits)) for i, hit := range esResp.Hits.Hits { - - //// vectorField is list of float64, which need to be converted to float32 - chunks[i] = hit.Source chunks[i]["_score"] = hit.Score chunks[i]["_id"] = hit.ID - //vectorField := hit.Source[vectorFieldName] - //chunks[i][vectorFieldName] = utility.Float64ToFloat32(vectorField) } return chunks } +// parseOrderByExpr parses the OrderBy expression into ES sort format +func parseOrderByExpr(orderBy *types.OrderByExpr) []map[string]interface{} { + if orderBy == nil || len(orderBy.Fields) == 0 { + return nil + } + + var result []map[string]interface{} + for _, field := range orderBy.Fields { + direction := "asc" + if field.Type == types.SortDesc { + direction = "desc" + } + + if field.Field == "_score" || field.Field == "score" { + result = append(result, map[string]interface{}{ + "_score": direction, + }) + } else { + result = append(result, map[string]interface{}{ + field.Field: direction, + }) + } + } + + return result +} + // Helper query builder functions (legacy) // BuildMatchTextQuery builds a text match query diff --git a/internal/engine/engine.go b/internal/engine/engine.go index 149f96ed00..19112d0dd4 100644 --- a/internal/engine/engine.go +++ b/internal/engine/engine.go @@ -53,6 +53,11 @@ type DocEngine interface { DropTable(ctx context.Context, indexName string) error TableExists(ctx context.Context, indexName string) (bool, error) + // Document operations (used by skill indexing) + IndexDocument(ctx context.Context, indexName, docID string, doc interface{}) error + DeleteDocument(ctx context.Context, indexName, docID string) error + BulkIndex(ctx context.Context, indexName string, docs []interface{}) (interface{}, error) + // Utility functions for search result processing GetFields(chunks []map[string]interface{}, fields []string) map[string]map[string]interface{} GetAggregation(chunks []map[string]interface{}, fieldName string) []map[string]interface{} @@ -62,6 +67,9 @@ type DocEngine interface { // Health check Ping(ctx context.Context) error Close() error + + // GetType returns the engine type + GetType() string } // Type returns the engine type (helper method for runtime type checking) diff --git a/internal/engine/infinity/client.go b/internal/engine/infinity/client.go index f3281d24ed..9b30602fb4 100644 --- a/internal/engine/infinity/client.go +++ b/internal/engine/infinity/client.go @@ -129,10 +129,16 @@ type infinityEngine struct { // NewEngine creates an Infinity engine func NewEngine(cfg interface{}) (*infinityEngine, error) { + if cfg == nil { + return nil, fmt.Errorf("infinity config is nil, please check your configuration file for 'doc_engine.infinity' settings") + } infConfig, ok := cfg.(*server.InfinityConfig) if !ok { return nil, fmt.Errorf("invalid infinity config type, expected *config.InfinityConfig") } + if infConfig == nil { + return nil, fmt.Errorf("infinity config is nil, please check your configuration file for 'doc_engine.infinity' settings") + } client, err := NewInfinityClient(infConfig) if err != nil { @@ -168,8 +174,8 @@ func NewEngine(cfg interface{}) (*infinityEngine, error) { return engine, nil } -// Type returns the engine type -func (e *infinityEngine) Type() string { +// GetType returns the engine type +func (e *infinityEngine) GetType() string { return "infinity" } diff --git a/internal/engine/infinity/common.go b/internal/engine/infinity/common.go index 663d50c744..199bd0b3c0 100644 --- a/internal/engine/infinity/common.go +++ b/internal/engine/infinity/common.go @@ -312,3 +312,27 @@ func buildFilterFromCondition(condition map[string]interface{}, tableColumns map } return strings.Join(conditions, " AND ") } + +// columnExists checks if a column exists in the table +func (e *infinityEngine) columnExists(table *infinity.Table, columnName string) (bool, error) { + colsResp, err := table.ShowColumns() + if err != nil { + return false, err + } + + result, ok := colsResp.(*infinity.QueryResult) + if !ok { + return false, fmt.Errorf("unexpected response type: %T", colsResp) + } + + // ShowColumns returns a result set where Data contains arrays of column values + if nameArr, ok := result.Data["name"]; ok { + for i := 0; i < len(nameArr); i++ { + colName, _ := nameArr[i].(string) + if colName == columnName { + return true, nil + } + } + } + return false, nil +} diff --git a/internal/engine/infinity/dataset.go b/internal/engine/infinity/dataset.go index 2043c6145e..b8814783ad 100644 --- a/internal/engine/infinity/dataset.go +++ b/internal/engine/infinity/dataset.go @@ -36,14 +36,28 @@ import ( // CreateDataset creates a table in Infinity // indexName is the table name prefix (e.g., "ragflow_<tenant_id>") // The full table name is built as "{indexName}_{datasetID}" +// For skill index (datasetID="skill"), tableName is just indexName and uses skill_infinity_mapping.json func (e *infinityEngine) CreateDataset(ctx context.Context, indexName, datasetID string, vectorSize int, parserID string) error { vecSize := vectorSize - // Build full table name: {indexName}_{datasetID} - tableName := fmt.Sprintf("%s_%s", indexName, datasetID) + // Determine table name and mapping file based on index type + var tableName string + var mappingFile string + + if datasetID == "skill" { + // Skill index: table name is just indexName (e.g., "skill_abc123_def456") + tableName = indexName + mappingFile = "skill_infinity_mapping.json" + logger.Info("Creating skill index table", zap.String("tableName", tableName), zap.String("mappingFile", mappingFile)) + } else { + // Regular document index: table name is {indexName}_{datasetID} + tableName = fmt.Sprintf("%s_%s", indexName, datasetID) + mappingFile = e.mappingFileName + logger.Info("Creating regular index table", zap.String("tableName", tableName), zap.String("mappingFile", mappingFile)) + } // Use configured schema - fpMapping := filepath.Join(utility.GetProjectRoot(), "conf", e.mappingFileName) + fpMapping := filepath.Join(utility.GetProjectRoot(), "conf", mappingFile) schemaData, err := os.ReadFile(fpMapping) if err != nil { @@ -61,54 +75,90 @@ func (e *infinityEngine) CreateDataset(ctx context.Context, indexName, datasetID return fmt.Errorf("Failed to get database: %w", err) } + // Determine vector column name + vectorColName := fmt.Sprintf("q_%d_vec", vecSize) + // Check if table already exists exists, err := e.TableExists(ctx, tableName) if err != nil { return fmt.Errorf("Failed to check if table exists: %w", err) } + + var table *infinity.Table if exists { - return fmt.Errorf("table '%s' already exists", tableName) - } - - // Build column definitions (preserving JSON order) - var columns infinity.TableSchema - for _, fieldName := range schema.Keys { - fieldInfo := schema.Fields[fieldName] - col := infinity.ColumnDefinition{ - Name: fieldName, - DataType: fieldInfo.Type, - Default: fieldInfo.Default, - // Comment: fieldInfo.Comment, + // Table exists, open it and check if vector column needs to be added + logger.Info("Table already exists, checking for vector column", zap.String("tableName", tableName)) + table, err = db.GetTable(tableName) + if err != nil { + return fmt.Errorf("Failed to open existing table %s: %w", tableName, err) } - columns = append(columns, &col) - } - // Add vector column - vectorColName := fmt.Sprintf("q_%d_vec", vecSize) - columns = append(columns, &infinity.ColumnDefinition{ - Name: vectorColName, - DataType: fmt.Sprintf("vector,%d,float", vecSize), - }) + // Check if vector column exists (for embedding model changes) + colExists, err := e.columnExists(table, vectorColName) + if err != nil { + logger.Warn("Failed to check column existence", zap.String("column", vectorColName), zap.Error(err)) + } - // Add chunk_data column for table parser - if parserID == "table" { + // Add new vector column if it doesn't exist (handles embedding model change) + if !colExists { + logger.Info("Adding new vector column for embedding model change", zap.String("column", vectorColName), zap.Int("size", vecSize)) + addColSchema := infinity.TableSchema{ + &infinity.ColumnDefinition{ + Name: vectorColName, + DataType: fmt.Sprintf("vector,%d,float", vecSize), + }, + } + if _, err := table.AddColumns(addColSchema); err != nil { + logger.Error("Failed to add vector column "+vectorColName, err) + return fmt.Errorf("Failed to add vector column %s: %w", vectorColName, err) + } + logger.Info("Successfully added vector column", zap.String("column", vectorColName)) + } + } else { + // Table doesn't exist, create it with vector column in the initial schema + logger.Info(fmt.Sprintf("Creating table with vector column: %s with dimension %d", vectorColName, vecSize)) + + // Build column definitions (preserving JSON order) + var columns infinity.TableSchema + for _, fieldName := range schema.Keys { + fieldInfo := schema.Fields[fieldName] + col := infinity.ColumnDefinition{ + Name: fieldName, + DataType: fieldInfo.Type, + Default: fieldInfo.Default, + // Comment: fieldInfo.Comment, + } + columns = append(columns, &col) + } + + // Add vector column columns = append(columns, &infinity.ColumnDefinition{ - Name: "chunk_data", - DataType: "json", - Default: "{}", + Name: vectorColName, + DataType: fmt.Sprintf("vector,%d,float", vecSize), }) + + // Add chunk_data column for table parser + if parserID == "table" { + columns = append(columns, &infinity.ColumnDefinition{ + Name: "chunk_data", + DataType: "json", + Default: "{}", + }) + } + + // Create table + table, err = db.CreateTable(tableName, columns, infinity.ConflictTypeIgnore) + if err != nil { + return fmt.Errorf("Failed to create table: %w", err) + } + logger.Debug("Infinity created table", zap.String("tableName", tableName)) } - // Create table - table, err := db.CreateTable(tableName, columns, infinity.ConflictTypeIgnore) - if err != nil { - return fmt.Errorf("Failed to create table: %w", err) - } - logger.Debug("Infinity created table", zap.String("tableName", tableName)) - - // Create HNSW index on vector column + // Create HNSW index on vector column with unique name based on vector size + // Use unique index name to avoid conflict when embedding model changes + vectorIndexName := fmt.Sprintf("q_%d_vec_idx", vecSize) _, err = table.CreateIndex( - "q_vec_idx", + vectorIndexName, infinity.NewIndexInfo(vectorColName, infinity.IndexTypeHnsw, map[string]string{ "M": "16", "ef_construction": "50", @@ -119,8 +169,9 @@ func (e *infinityEngine) CreateDataset(ctx context.Context, indexName, datasetID "", ) if err != nil { - return fmt.Errorf("Failed to create HNSW index: %w", err) + return fmt.Errorf("Failed to create HNSW index %s: %w", vectorIndexName, err) } + logger.Info("Created vector index", zap.String("indexName", vectorIndexName), zap.String("column", vectorColName)) // Create full-text indexes for varchar fields with analyzers for _, fieldName := range schema.Keys { diff --git a/internal/engine/infinity/document.go b/internal/engine/infinity/document.go new file mode 100644 index 0000000000..0bc2b2dbc3 --- /dev/null +++ b/internal/engine/infinity/document.go @@ -0,0 +1,239 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package infinity + +import ( + "context" + "fmt" + "strings" + + "go.uber.org/zap" + "ragflow/internal/logger" +) + +// IndexDocument indexes a single document +// For skill index (tableName starts with "skill_"), uses InsertSkill +// For regular document index, returns not implemented error +func (e *infinityEngine) IndexDocument(ctx context.Context, tableName, docID string, doc interface{}) error { + // Check if this is a skill index + if strings.HasPrefix(tableName, "skill_") { + return e.InsertSkill(ctx, tableName, docID, doc) + } + return fmt.Errorf("infinity insert not implemented for regular documents: waiting for official Go SDK") +} + +// InsertSkill inserts a skill document into skill index +// Auto-creates the table if it doesn't exist +func (e *infinityEngine) InsertSkill(ctx context.Context, tableName, docID string, doc interface{}) error { + db, err := e.client.conn.GetDatabase(e.client.dbName) + if err != nil { + return fmt.Errorf("failed to get database: %w", err) + } + + table, err := db.GetTable(tableName) + if err != nil { + // Table doesn't exist, try to create it + errMsg := strings.ToLower(err.Error()) + if !strings.Contains(errMsg, "not found") && !strings.Contains(errMsg, "doesn't exist") { + return fmt.Errorf("failed to get table %s: %w", tableName, err) + } + + // Cannot auto-create skill table without knowing the vector dimension + // The table should be created by SkillIndexerService.EnsureIndex before calling this + return fmt.Errorf("skill table %s does not exist, please ensure index is initialized first", tableName) + } + + // Transform doc to map + docMap, ok := doc.(map[string]interface{}) + if !ok { + return fmt.Errorf("invalid doc type, expected map[string]interface{}") + } + + // Prepare insert data + insertDoc := make(map[string]interface{}) + for k, v := range docMap { + insertDoc[k] = v + } + // Ensure skill_id is set (schema uses skill_id, not id) + insertDoc["skill_id"] = docID + + // Delete existing document with same skill_id + // Escape single quotes to prevent filter injection + docIDEscaped := strings.ReplaceAll(docID, "'", "''") + filter := fmt.Sprintf("skill_id = '%s'", docIDEscaped) + delResp, delErr := table.Delete(filter) + if delErr != nil { + logger.Warn(fmt.Sprintf("Failed to delete existing skill document: %v", delErr)) + } else if delResp.DeletedRows > 0 { + logger.Debug(fmt.Sprintf("Deleted %d existing skill document(s)", delResp.DeletedRows)) + } + + // Insert the document + _, err = table.Insert([]map[string]interface{}{insertDoc}) + if err != nil { + return fmt.Errorf("failed to insert skill document into %s: %w", tableName, err) + } + return nil +} + +// BulkIndex indexes documents in bulk +// For skill index (tableName starts with "skill_"), uses BulkInsertSkill +// For regular document index, returns not implemented error +func (e *infinityEngine) BulkIndex(ctx context.Context, tableName string, docs []interface{}) (interface{}, error) { + // Check if this is a skill index + if strings.HasPrefix(tableName, "skill_") { + inserted, err := e.BulkInsertSkill(ctx, tableName, docs) + return &BulkResponse{Inserted: inserted}, err + } + return nil, fmt.Errorf("infinity bulk insert not implemented for regular documents: waiting for official Go SDK") +} + +// BulkInsertSkill inserts multiple skill documents in bulk with upsert semantics. +// For each document, deletes existing rows with the same skill_id before inserting, +// matching the behavior of InsertSkill. Creates shallow copies of input maps to +// avoid mutating caller data. +func (e *infinityEngine) BulkInsertSkill(ctx context.Context, tableName string, docs []interface{}) (int, error) { + db, err := e.client.conn.GetDatabase(e.client.dbName) + if err != nil { + return 0, fmt.Errorf("failed to get database: %w", err) + } + + table, err := db.GetTable(tableName) + if err != nil { + return 0, fmt.Errorf("failed to get table %s: %w", tableName, err) + } + + // Collect skill_ids for upsert and create shallow copies of docs + skillIDs := make([]string, 0, len(docs)) + insertDocs := make([]map[string]interface{}, 0, len(docs)) + + for _, doc := range docs { + docMap, ok := doc.(map[string]interface{}) + if !ok { + logger.Warn("Invalid doc type in bulk insert, expected map[string]interface{}") + continue + } + + // Create shallow copy to avoid mutating caller's map + insertDoc := make(map[string]interface{}) + for k, v := range docMap { + insertDoc[k] = v + } + + // Ensure skill_id is set if id or skill_id exists in doc + var skillID string + if id, hasID := docMap["id"]; hasID { + skillID = fmt.Sprintf("%v", id) + insertDoc["skill_id"] = skillID + } else if sid, hasSkillID := docMap["skill_id"]; hasSkillID { + skillID = fmt.Sprintf("%v", sid) + } + + if skillID != "" { + skillIDs = append(skillIDs, skillID) + } + insertDocs = append(insertDocs, insertDoc) + } + + if len(insertDocs) == 0 { + logger.Warn("No valid documents to bulk insert", zap.String("tableName", tableName)) + return 0, nil + } + + // Upsert: delete existing documents with same skill_ids before inserting + for _, skillID := range skillIDs { + // Escape single quotes to prevent filter injection + docIDEscaped := strings.ReplaceAll(skillID, "'", "''") + filter := fmt.Sprintf("skill_id = '%s'", docIDEscaped) + delResp, delErr := table.Delete(filter) + if delErr != nil { + logger.Warn("Failed to delete existing skill document before bulk insert", + zap.String("tableName", tableName), + zap.String("skill_id", skillID), + zap.Error(delErr)) + } else if delResp.DeletedRows > 0 { + logger.Debug("Deleted existing skill document before bulk insert", + zap.String("tableName", tableName), + zap.String("skill_id", skillID), + zap.Int64("deletedRows", delResp.DeletedRows)) + } + } + + // Insert the documents + _, err = table.Insert(insertDocs) + if err != nil { + return 0, fmt.Errorf("failed to bulk insert skill documents: %w", err) + } + + logger.Debug("Bulk upserted skill documents", + zap.String("tableName", tableName), + zap.Int("count", len(insertDocs)), + zap.Int("skillIDs", len(skillIDs))) + return len(insertDocs), nil +} + +// BulkResponse bulk operation response +type BulkResponse struct { + Inserted int +} + +// GetDocument gets a document +func (e *infinityEngine) GetDocument(ctx context.Context, tableName, docID string) (interface{}, error) { + return nil, fmt.Errorf("infinity get document not implemented: waiting for official Go SDK") +} + +// DeleteDocument deletes a document by ID +func (e *infinityEngine) DeleteDocument(ctx context.Context, tableName, docID string) error { + if tableName == "" { + return fmt.Errorf("table name cannot be empty") + } + if docID == "" { + return fmt.Errorf("document id cannot be empty") + } + + db, err := e.client.conn.GetDatabase(e.client.dbName) + if err != nil { + return fmt.Errorf("failed to get database: %w", err) + } + + table, err := db.GetTable(tableName) + if err != nil { + return fmt.Errorf("failed to get table: %w", err) + } + + // Use filter to delete document by ID + // Skill index uses 'skill_id', regular indices use 'id' + idField := "id" + if strings.HasPrefix(tableName, "skill_") { + idField = "skill_id" + } + // Escape single quotes to prevent filter injection + docIDEscaped := strings.ReplaceAll(docID, "'", "''") + filter := fmt.Sprintf("%s = '%s'", idField, docIDEscaped) + resp, err := table.Delete(filter) + if err != nil { + return fmt.Errorf("failed to delete document: %w", err) + } + + logger.Debug("Deleted document from Infinity", + zap.String("tableName", tableName), + zap.String("docID", docID), + zap.String("idField", idField), + zap.Int64("deletedRows", resp.DeletedRows)) + + return nil +} diff --git a/internal/engine/infinity/search.go b/internal/engine/infinity/search.go index e82ba35223..52d780c777 100644 --- a/internal/engine/infinity/search.go +++ b/internal/engine/infinity/search.go @@ -40,7 +40,7 @@ import ( // It supports three matching types: MatchTextExpr (full-text), MatchDenseExpr (vector), and FusionExpr (combined). // If no match expressions are provided, Search relies solely on filter (e.g., doc_id, available_int) to find results. func (e *infinityEngine) Search(ctx context.Context, req *types.SearchRequest) (*types.SearchResult, error) { - logger.Info("Search in Infinity started", zap.Any("indexNames", req.IndexNames)) + logger.Debug("Search in Infinity started", zap.Any("indexNames", req.IndexNames)) if logger.IsDebugEnabled() { // Format match expressions for logging var matchExprsStr string @@ -88,16 +88,27 @@ func (e *infinityEngine) Search(ctx context.Context, req *types.SearchRequest) ( } isMetadataTable := false + isSkillIndex := false for _, idx := range req.IndexNames { if strings.HasPrefix(idx, "ragflow_doc_meta_") { isMetadataTable = true break } + if strings.HasPrefix(idx, "skill_") { + isSkillIndex = true + break + } } var outputColumns []string if isMetadataTable { outputColumns = []string{"id", "kb_id", "meta_fields"} + } else if isSkillIndex { + outputColumns = []string{ + "skill_id", "space_id", "folder_id", "name", "tags", "description", "content", + "version", "status", "create_time", "update_time", + } + outputColumns = convertSelectFields(outputColumns, true) } else { outputColumns = []string{ "id", "doc_id", "kb_id", "content_ltks", "content_with_weight", @@ -119,12 +130,24 @@ func (e *infinityEngine) Search(ctx context.Context, req *types.SearchRequest) ( continue } switch e := expr.(type) { + case string: + if e != "" { + hasTextMatch = true + matchText = &types.MatchTextExpr{ + MatchingText: e, + TopN: pageSize, + } + } case *types.MatchTextExpr: - hasTextMatch = true - matchText = e + if e.MatchingText != "" { + hasTextMatch = true + matchText = e + } case *types.MatchDenseExpr: - hasVectorMatch = true - matchDense = e + if len(e.EmbeddingData) > 0 { + hasVectorMatch = true + matchDense = e + } } } } @@ -132,14 +155,21 @@ func (e *infinityEngine) Search(ctx context.Context, req *types.SearchRequest) ( if hasTextMatch || hasVectorMatch { if hasTextMatch { outputColumns = append(outputColumns, "score()") - } else if hasVectorMatch { + } + // similarity() is only allowed by Infinity when there is ONLY MATCH VECTOR. + // When both text and vector matches exist (hybrid search with Fusion), + // only score() is valid — Fusion produces a unified SCORE column. + if hasVectorMatch && !hasTextMatch { outputColumns = append(outputColumns, "similarity()") } - if !slices.Contains(outputColumns, common.PAGERANK_FLD) { - outputColumns = append(outputColumns, common.PAGERANK_FLD) - } - if !slices.Contains(outputColumns, common.TAG_FLD) { - outputColumns = append(outputColumns, common.TAG_FLD) + // Skill index does not have pagerank_fea and tag_feas columns + if !isSkillIndex { + if !slices.Contains(outputColumns, common.PAGERANK_FLD) { + outputColumns = append(outputColumns, common.PAGERANK_FLD) + } + if !slices.Contains(outputColumns, common.TAG_FLD) { + outputColumns = append(outputColumns, common.TAG_FLD) + } } } @@ -147,7 +177,7 @@ func (e *infinityEngine) Search(ctx context.Context, req *types.SearchRequest) ( outputColumns = append(outputColumns, "row_id()") } - outputColumns = convertSelectFields(outputColumns) + outputColumns = convertSelectFields(outputColumns, isSkillIndex) if hasVectorMatch && matchDense != nil && matchDense.VectorColumnName != "" { outputColumns = append(outputColumns, matchDense.VectorColumnName) } @@ -167,11 +197,21 @@ func (e *infinityEngine) Search(ctx context.Context, req *types.SearchRequest) ( if req.Filter != nil { if availInt, ok := req.Filter["available_int"]; ok { filterParts = append(filterParts, fmt.Sprintf("available_int=%v", availInt)) + } else if status, ok := req.Filter["status"]; ok { + filterParts = append(filterParts, fmt.Sprintf("status='%s'", status)) + } else { + if isSkillIndex { + filterParts = append(filterParts, "status='1'") + } else { + filterParts = append(filterParts, "available_int=1") + } + } + } else { + if isSkillIndex { + filterParts = append(filterParts, "status='1'") } else { filterParts = append(filterParts, "available_int=1") } - } else { - filterParts = append(filterParts, "available_int=1") } } @@ -257,6 +297,13 @@ func (e *infinityEngine) Search(ctx context.Context, req *types.SearchRequest) ( var textFields []string if matchText != nil && len(matchText.Fields) > 0 { textFields = matchText.Fields + } else if isSkillIndex { + textFields = []string{ + "name^10", + "tags^5", + "description^3", + "content^1", + } } else { textFields = []string{ "title_tks^10", @@ -339,12 +386,16 @@ func (e *infinityEngine) Search(ctx context.Context, req *types.SearchRequest) ( vectorTopN = int(matchDense.TopN) } - denseFilterStr := filterStr - if denseFilterStr == "" { + denseFilterStr := filterStr + if denseFilterStr == "" { + if isSkillIndex { + denseFilterStr = "status='1'" + } else { denseFilterStr = "available_int=1" } + } - if hasTextMatch { + if hasTextMatch && fusionExpr == nil { fieldsStr := strings.Join(convertedFields, ",") filterFulltext := fmt.Sprintf("filter_fulltext('%s', '%s')", fieldsStr, questionText) denseFilterStr = fmt.Sprintf("(%s) AND %s", denseFilterStr, filterFulltext) @@ -354,13 +405,11 @@ func (e *infinityEngine) Search(ctx context.Context, req *types.SearchRequest) ( "filter": denseFilterStr, } - logger.Debug(fmt.Sprintf( - "MatchDenseExpr:\n"+ - " field=%s\n"+ - " topn=%d\n"+ - " extra_options=%v", - fieldName, vectorTopN, extraOptions, - )) + logger.Debug("MatchDense for hybrid search", + zap.String("fieldName", fieldName), + zap.String("distanceType", distanceType), + zap.Int("topN", vectorTopN), + zap.Bool("hasFusion", fusionExpr != nil)) table = table.MatchDense(fieldName, vectorData, dataType, distanceType, vectorTopN, extraOptions) } @@ -380,13 +429,11 @@ func (e *infinityEngine) Search(ctx context.Context, req *types.SearchRequest) ( fusionParams[k] = v } } - logger.Debug(fmt.Sprintf( - "FusionExpr:\n"+ - " method=%s\n"+ - " topn=%d\n"+ - " fusion_params=%v", - fusionMethod, fusionTopK, fusionParams, - )) + + logger.Debug("Applying Fusion for hybrid search", + zap.String("method", fusionMethod), + zap.Int("topN", fusionTopK), + zap.Any("params", fusionParams)) table = table.Fusion(fusionMethod, fusionTopK, fusionParams) } @@ -422,6 +469,12 @@ func (e *infinityEngine) Search(ctx context.Context, req *types.SearchRequest) ( // Execute query df, err := table.ToDataFrame() if err != nil { + logger.Warn("Infinity query failed", + zap.String("tableName", tableName), + zap.Bool("hasTextMatch", hasTextMatch), + zap.Bool("hasVectorMatch", hasVectorMatch), + zap.Bool("hasFusion", fusionExpr != nil), + zap.Error(err)) continue } @@ -437,7 +490,19 @@ func (e *infinityEngine) Search(ctx context.Context, req *types.SearchRequest) ( } // Apply field name mapping and row_id handling - GetFields(chunks, nil) + // Skill index uses different schema + // so we skip the document-specific field mappings + if !isSkillIndex { + GetFields(chunks, nil) + } else { + // For skill index, only handle ROW_ID -> row_id() mapping + for _, chunk := range chunks { + if val, ok := chunk["ROW_ID"]; ok { + chunk["row_id()"] = val + delete(chunk, "ROW_ID") + } + } + } // Parse total_hits_count from ExtraInfo var tableTotal int64 @@ -462,12 +527,19 @@ func (e *infinityEngine) Search(ctx context.Context, req *types.SearchRequest) ( if hasTextMatch || hasVectorMatch { scoreColumn := "" - if hasTextMatch { + if hasTextMatch && hasVectorMatch { + scoreColumn = "SCORE" + } else if hasTextMatch { scoreColumn = "SCORE" } else if hasVectorMatch { scoreColumn = "SIMILARITY" } - allResults = calculateScores(allResults, scoreColumn) + pagerankField := common.PAGERANK_FLD + if isSkillIndex { + pagerankField = "" // Skill index has no pagerank field + } + + allResults = calculateScores(allResults, scoreColumn, pagerankField) allResults = sortByScore(allResults, len(allResults)) } @@ -475,7 +547,7 @@ func (e *infinityEngine) Search(ctx context.Context, req *types.SearchRequest) ( allResults = allResults[:pageSize] } - logger.Info("Search in Infinity completed", zap.Any("indexNames", req.IndexNames), zap.Int("returnedRows", len(allResults)), zap.Int64("totalHits", totalHits)) + logger.Debug("Search in Infinity completed", zap.Int("returnedRows", len(allResults)), zap.Int64("totalHits", totalHits)) return &types.SearchResult{ Chunks: allResults, @@ -483,9 +555,9 @@ func (e *infinityEngine) Search(ctx context.Context, req *types.SearchRequest) ( }, nil } -// convertSelectFields converts RAG field names to Infinity column names for SELECT (output_columns). -// Example: docnm_kwd → docnm, content_ltks → content -func convertSelectFields(output []string) []string { +// convertSelectFields converts field names to Infinity format +// isSkillIndex indicates if this is a skill index (uses skill_id instead of id) +func convertSelectFields(output []string, isSkillIndex ...bool) []string { fieldMapping := map[string]string{ "docnm_kwd": "docnm", "title_tks": "docnm", @@ -501,6 +573,11 @@ func convertSelectFields(output []string) []string { "authors_sm_tks": "authors", } + skillIndex := false + if len(isSkillIndex) > 0 { + skillIndex = isSkillIndex[0] + } + needEmptyCount := false for i, field := range output { if field == "important_kwd" { @@ -522,15 +599,20 @@ func convertSelectFields(output []string) []string { } // Add id and empty count if needed + // For skill index, use skill_id instead of id hasID := false + idField := "id" + if skillIndex { + idField = "skill_id" + } for _, f := range result { - if f == "id" { + if f == idField { hasID = true break } } if !hasID { - result = append([]string{"id"}, result...) + result = append([]string{idField}, result...) } if needEmptyCount { @@ -540,8 +622,10 @@ func convertSelectFields(output []string) []string { return result } -// convertMatchingField converts RAG field names to Infinity full-text index names for MATCH expressions. -// Example: docnm_kwd → docnm@ft_docnm_rag_coarse, content_ltks → content@ft_content_rag_coarse +// convertMatchingField converts field names for matching +// For regular document indices: maps _tks/_kwd fields to column@index_name format +// For skill indices: maps raw field names to column@index_name format +// Infinity requires column@index_name when a column has multiple full-text indexes func convertMatchingField(fieldWeightStr string) string { // Split on ^ to get field name parts := strings.Split(fieldWeightStr, "^") @@ -562,6 +646,11 @@ func convertMatchingField(fieldWeightStr string) string { "authors_tks": "authors@ft_authors_rag_coarse", "authors_sm_tks": "authors@ft_authors_rag_fine", "tag_kwd": "tag_kwd@ft_tag_kwd_whitespace__", + // Skill index fields + "name": "name@ft_name_rag_coarse", + "tags": "tags@ft_tags_rag_coarse", + "description": "description@ft_description_rag_coarse", + "content": "content@ft_content_rag_coarse", } if newField, ok := fieldMapping[field]; ok { @@ -728,8 +817,8 @@ func equivalentConditionToStr(condition map[string]interface{}) string { return strings.Join(cond, " AND ") } -// calculateScores calculates _score = score_column + pagerank_fld -func calculateScores(chunks []map[string]interface{}, scoreColumn string) []map[string]interface{} { +// calculateScores calculates _score = score_column + pagerank +func calculateScores(chunks []map[string]interface{}, scoreColumn, pagerankField string) []map[string]interface{} { for i := range chunks { score := 0.0 if scoreVal, ok := chunks[i][scoreColumn]; ok { @@ -737,9 +826,11 @@ func calculateScores(chunks []map[string]interface{}, scoreColumn string) []map[ score += f } } - if prVal, ok := chunks[i][common.PAGERANK_FLD]; ok { - if f, ok := utility.ToFloat64(prVal); ok { - score += f + if pagerankField != "" { + if prVal, ok := chunks[i][pagerankField]; ok { + if f, ok := utility.ToFloat64(prVal); ok { + score += f + } } } chunks[i]["_score"] = score @@ -1008,4 +1099,4 @@ func (e *infinityEngine) GetHighlight(chunks []map[string]interface{}, keywords } return result -} +} \ No newline at end of file diff --git a/internal/entity/models/types.go b/internal/entity/models/types.go index c12f37c5f0..d27468cccb 100644 --- a/internal/entity/models/types.go +++ b/internal/entity/models/types.go @@ -75,14 +75,16 @@ type EmbeddingModel struct { ModelDriver ModelDriver ModelName *string APIConfig *APIConfig + MaxTokens int // Max input tokens for the embedding model, used for text truncation } // NewEmbeddingModel creates a new EmbeddingModel -func NewEmbeddingModel(driver ModelDriver, modelName *string, apiConfig *APIConfig) *EmbeddingModel { +func NewEmbeddingModel(driver ModelDriver, modelName *string, apiConfig *APIConfig, maxTokens int) *EmbeddingModel { return &EmbeddingModel{ ModelDriver: driver, ModelName: modelName, APIConfig: apiConfig, + MaxTokens: maxTokens, } } diff --git a/internal/entity/skill_search.go b/internal/entity/skill_search.go new file mode 100644 index 0000000000..3a31dfb486 --- /dev/null +++ b/internal/entity/skill_search.go @@ -0,0 +1,112 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package entity + +import "time" + +// FieldWeight represents the weight configuration for a field +type FieldWeight struct { + Enabled bool `json:"enabled"` + Weight float64 `json:"weight"` +} + +// FieldConfig represents the field configuration for skill indexing +type FieldConfig struct { + Name FieldWeight `json:"name"` + Tags FieldWeight `json:"tags"` + Description FieldWeight `json:"description"` + Content FieldWeight `json:"content"` +} + +// DefaultFieldConfig returns the default field configuration +func DefaultFieldConfig() FieldConfig { + return FieldConfig{ + Name: FieldWeight{Enabled: true, Weight: 3.0}, + Tags: FieldWeight{Enabled: true, Weight: 2.0}, + Description: FieldWeight{Enabled: true, Weight: 1.0}, + Content: FieldWeight{Enabled: false, Weight: 0.5}, + } +} + +// SkillSearchConfig represents the search configuration for skills +type SkillSearchConfig struct { + ID string `gorm:"column:id;primaryKey;size:32" json:"id"` + TenantID string `gorm:"column:tenant_id;size:32;not null;index" json:"tenant_id"` + SpaceID string `gorm:"column:space_id;size:128;not null;default:'default';index" json:"space_id"` + EmbdID string `gorm:"column:embd_id;size:128;not null" json:"embd_id"` + Status string `gorm:"column:status;size:1;default:1" json:"status"` + VectorSimilarityWeight float64 `gorm:"column:vector_similarity_weight;default:0.3" json:"vector_similarity_weight"` + SimilarityThreshold float64 `gorm:"column:similarity_threshold;default:0.2" json:"similarity_threshold"` + FieldConfig JSONMap `gorm:"column:field_config;type:json" json:"field_config"` + RerankID *string `gorm:"column:rerank_id;size:128" json:"rerank_id,omitempty"` + TenantRerankID *int64 `gorm:"column:tenant_rerank_id" json:"tenant_rerank_id,omitempty"` + TopK int64 `gorm:"column:top_k;default:10" json:"top_k"` + IndexVersion string `gorm:"column:index_version;size:32;default:'1.0.0'" json:"index_version"` + CreateTime *int64 `gorm:"column:create_time" json:"create_time,omitempty"` + UpdateTime *time.Time `gorm:"column:update_time" json:"update_time,omitempty"` +} + +// TableName returns the table name for SkillSearchConfig model +func (SkillSearchConfig) TableName() string { + return "skill_search_configs" +} + +// ToMap converts SkillSearchConfig to a map for JSON response +func (s *SkillSearchConfig) ToMap() map[string]interface{} { + result := map[string]interface{}{ + "id": s.ID, + "tenant_id": s.TenantID, + "space_id": s.SpaceID, + "embd_id": s.EmbdID, + "vector_similarity_weight": s.VectorSimilarityWeight, + "similarity_threshold": s.SimilarityThreshold, + "field_config": s.FieldConfig, + "top_k": s.TopK, + "index_version": s.IndexVersion, + "status": s.Status, + } + + if s.RerankID != nil { + result["rerank_id"] = *s.RerankID + } + if s.TenantRerankID != nil { + result["tenant_rerank_id"] = *s.TenantRerankID + } + if s.CreateTime != nil { + result["create_time"] = s.CreateTime + } + if s.UpdateTime != nil { + result["update_time"] = s.UpdateTime.Format("2006-01-02 15:04:05") + } + + return result +} + +// SkillSearchResult represents a skill search result +type SkillSearchResult struct { + SkillID string `json:"skill_id"` + FolderID string `json:"folder_id"` // File system folder ID for retrieving files + Name string `json:"name"` + Description string `json:"description"` + Tags []string `json:"tags"` + Score float64 `json:"score"` + BM25Score float64 `json:"bm25_score,omitempty"` + VectorScore float64 `json:"vector_score,omitempty"` + IndexVersion string `json:"index_version,omitempty"` + CreateTime int64 `json:"create_time,omitempty"` + Version string `json:"version,omitempty"` // Skill version from index +} diff --git a/internal/entity/skill_space.go b/internal/entity/skill_space.go new file mode 100644 index 0000000000..0e90a39817 --- /dev/null +++ b/internal/entity/skill_space.go @@ -0,0 +1,90 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package entity + +import "time" + +// Space status constants +const ( + SpaceStatusActive = "1" // Normal active space + SpaceStatusDeleted = "0" // Soft-deleted space + SpaceStatusDeleting = "2" // Space is being asynchronously deleted +) + +// SkillSpace represents a skills space (library) that contains skills +type SkillSpace struct { + ID string `gorm:"column:id;primaryKey;size:32" json:"id"` + TenantID string `gorm:"column:tenant_id;size:32;not null;index" json:"tenant_id"` + Name string `gorm:"column:name;size:128;not null" json:"name"` + FolderID string `gorm:"column:folder_id;size:32;not null" json:"folder_id"` + Description string `gorm:"column:description;type:text" json:"description"` + EmbdID string `gorm:"column:embd_id;size:128" json:"embd_id"` + RerankID string `gorm:"column:rerank_id;size:128" json:"rerank_id"` + TopK int `gorm:"column:top_k;default:10" json:"top_k"` + Status string `gorm:"column:status;size:1;default:1" json:"status"` + CreateTime *int64 `gorm:"column:create_time" json:"create_time,omitempty"` + UpdateTime *time.Time `gorm:"column:update_time" json:"update_time,omitempty"` +} + +// TableName returns the table name for SkillSpace model +func (SkillSpace) TableName() string { + return "skill_spaces" +} + +// StatusDescription returns a human-readable status string +func (s *SkillSpace) StatusDescription() string { + switch s.Status { + case SpaceStatusActive: + return "active" + case SpaceStatusDeleted: + return "deleted" + case SpaceStatusDeleting: + return "deleting" + default: + return "unknown" + } +} + +// ToMap converts SkillSpace to a map for JSON response +func (s *SkillSpace) ToMap() map[string]interface{} { + result := map[string]interface{}{ + "id": s.ID, + "tenant_id": s.TenantID, + "name": s.Name, + "folder_id": s.FolderID, + "top_k": s.TopK, + "status": s.StatusDescription(), + } + + if s.Description != "" { + result["description"] = s.Description + } + if s.EmbdID != "" { + result["embd_id"] = s.EmbdID + } + if s.RerankID != "" { + result["rerank_id"] = s.RerankID + } + if s.CreateTime != nil { + result["create_time"] = s.CreateTime + } + if s.UpdateTime != nil { + result["update_time"] = s.UpdateTime.Format("2006-01-02 15:04:05") + } + + return result +} diff --git a/internal/handler/skill_search.go b/internal/handler/skill_search.go new file mode 100644 index 0000000000..2e6b2a9583 --- /dev/null +++ b/internal/handler/skill_search.go @@ -0,0 +1,573 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package handler + +import ( + "fmt" + "net/http" + "ragflow/internal/common" + "ragflow/internal/engine" + "ragflow/internal/logger" + "ragflow/internal/service" + + "github.com/gin-gonic/gin" + "go.uber.org/zap" +) + +// SkillSearchHandler handles skill search HTTP requests +type SkillSearchHandler struct { + searchService *service.SkillSearchService + indexerService *service.SkillIndexerService + spaceService *service.SkillSpaceService + docEngine engine.DocEngine +} + +// NewSkillSearchHandler creates a new skill search handler +func NewSkillSearchHandler(docEngine engine.DocEngine) *SkillSearchHandler { + return &SkillSearchHandler{ + searchService: service.NewSkillSearchService(), + indexerService: service.NewSkillIndexerService(), + spaceService: service.NewSkillSpaceService(), + docEngine: docEngine, + } +} + +// GetConfig handles the get skill search config request +// @Summary Get Skill Search Config +// @Description Get the search configuration for skills +// @Tags skill-search +// @Accept json +// @Produce json +// @Security ApiKeyAuth +// @Param embd_id query string true "Embedding Model ID" +// @Param space_id query string false "Skill Space ID" +// @Success 200 {object} map[string]interface{} +// @Router /v1/skills/config [get] +func (h *SkillSearchHandler) GetConfig(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + embdID := c.Query("embd_id") + spaceID := c.Query("space_id") + + result, code, err := h.searchService.GetConfig(user.ID, spaceID, embdID) + if err != nil { + jsonError(c, code, err.Error()) + return + } + + jsonResponse(c, common.CodeSuccess, result, "success") +} + +// UpdateConfig handles the update skill search config request +// @Summary Update Skill Search Config +// @Description Update the search configuration for skills +// @Tags skill-search +// @Accept json +// @Produce json +// @Security ApiKeyAuth +// @Param request body service.UpdateConfigRequest true "config info" +// @Success 200 {object} map[string]interface{} +// @Router /v1/skills/config [post] +func (h *SkillSearchHandler) UpdateConfig(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + var req service.UpdateConfigRequest + if err := c.ShouldBindJSON(&req); err != nil { + jsonError(c, common.CodeDataError, err.Error()) + return + } + + req.TenantID = user.ID + + result, code, err := h.searchService.UpdateConfig(&req) + if err != nil { + jsonError(c, code, err.Error()) + return + } + + jsonResponse(c, common.CodeSuccess, result, "success") +} + +// Search handles the skill search request +// @Summary Search Skills +// @Description Search skills using configured search strategy +// @Tags skill-search +// @Accept json +// @Produce json +// @Security ApiKeyAuth +// @Param request body service.SearchRequest true "search query" +// @Success 200 {object} map[string]interface{} +// @Router /v1/skills/search [post] +func (h *SkillSearchHandler) Search(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + var req service.SearchRequest + if err := c.ShouldBindJSON(&req); err != nil { + jsonError(c, common.CodeDataError, err.Error()) + return + } + + req.TenantID = user.ID + + result, code, err := h.searchService.Search(c.Request.Context(), &req, h.docEngine) + if err != nil { + jsonError(c, code, err.Error()) + return + } + + jsonResponse(c, common.CodeSuccess, result, "success") +} + +// IndexSkillsRequest represents the request to index skills +type IndexSkillsRequest struct { + Skills []service.SkillInfo `json:"skills" binding:"required"` + SpaceID string `json:"space_id"` + EmbdID string `json:"embd_id"` // Optional, will use config's embd_id if empty +} + +// IndexSkills handles the index skills request +// @Summary Index Skills +// @Description Index skills for search. If embd_id is not provided, will use the one from skill search config. +// @Tags skill-search +// @Accept json +// @Produce json +// @Security ApiKeyAuth +// @Param request body IndexSkillsRequest true "skills to index" +// @Success 200 {object} map[string]interface{} +// @Router /v1/skills/index [post] +func (h *SkillSearchHandler) IndexSkills(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + var req IndexSkillsRequest + if err := c.ShouldBindJSON(&req); err != nil { + jsonError(c, common.CodeDataError, err.Error()) + return + } + + // If embd_id not provided, get from skill search config + embdID := req.EmbdID + if embdID == "" { + config, code, err := h.searchService.GetConfig(user.ID, req.SpaceID, "") + if err != nil { + jsonError(c, code, "failed to get skill search config: "+err.Error()) + return + } + val, ok := config["embd_id"].(string) + if !ok || val == "" { + jsonError(c, common.CodeDataError, "no embedding model configured in skill search config") + return + } + embdID = val + } + + // Ensure index exists before indexing (for both ES and Infinity) + logger.Info("Ensuring skill index exists before indexing", + zap.String("tenantID", user.ID), + zap.String("spaceID", req.SpaceID), + zap.String("engineType", h.docEngine.GetType()), + zap.Int("skillCount", len(req.Skills))) + + if h.docEngine.GetType() == "elasticsearch" { + if err := h.indexerService.EnsureIndex(c.Request.Context(), user.ID, req.SpaceID, h.docEngine, embdID); err != nil { + jsonError(c, common.CodeOperatingError, err.Error()) + return + } + } + + if err := h.indexerService.BatchIndexSkills(c.Request.Context(), user.ID, req.SpaceID, req.Skills, h.docEngine, embdID); err != nil { + logger.Error(fmt.Sprintf("Failed to batch index skills: tenantID=%s, spaceID=%s, error=%v", user.ID, req.SpaceID, err), err) + jsonError(c, common.CodeOperatingError, err.Error()) + return + } + + logger.Info("Successfully indexed skills", + zap.String("tenantID", user.ID), + zap.String("spaceID", req.SpaceID), + zap.Int("indexedCount", len(req.Skills))) + + jsonResponse(c, common.CodeSuccess, gin.H{ + "indexed_count": len(req.Skills), + }, "success") +} + +// ReindexRequest represents the request to reindex skills +type ReindexRequest struct { + SpaceID string `json:"space_id" binding:"required"` + EmbdID string `json:"embd_id"` // Optional, will use config's embd_id if empty +} + +// Reindex handles the reindex all skills request +// @Summary Reindex All Skills +// @Description Reindex all skills for a tenant. If embd_id is not provided, will use the one from skill search config. +// @Tags skill-search +// @Accept json +// @Produce json +// @Security ApiKeyAuth +// @Param request body ReindexRequest true "skills to reindex" +// @Success 200 {object} map[string]interface{} +// @Router /v1/skills/reindex [post] +func (h *SkillSearchHandler) Reindex(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + var req ReindexRequest + if err := c.ShouldBindJSON(&req); err != nil { + jsonError(c, common.CodeDataError, err.Error()) + return + } + + // If embd_id not provided, get from skill search config + embdID := req.EmbdID + if embdID == "" { + config, code, err := h.searchService.GetConfig(user.ID, req.SpaceID, "") + if err != nil { + jsonError(c, code, "failed to get skill search config: "+err.Error()) + return + } + val, ok := config["embd_id"].(string) + if !ok || val == "" { + jsonError(c, common.CodeDataError, "no embedding model configured in skill search config") + return + } + embdID = val + } + + result, err := h.indexerService.ReindexAll(c.Request.Context(), user.ID, req.SpaceID, h.docEngine, embdID) + if err != nil { + jsonError(c, common.CodeOperatingError, err.Error()) + return + } + + jsonResponse(c, common.CodeSuccess, result, "success") +} + +// DeleteSkillIndex handles the delete skill index request +// @Summary Delete Skill Index +// @Description Delete a skill's search index +// @Tags skill-search +// @Accept json +// @Produce json +// @Security ApiKeyAuth +// @Param skill_id query string true "Skill ID (skill name)" +// @Param space_id query string true "Space ID" +// @Success 200 {object} map[string]interface{} +// @Router /v1/skills/index [delete] +func (h *SkillSearchHandler) DeleteSkillIndex(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + skillID := c.Query("skill_id") + spaceID := c.Query("space_id") + if skillID == "" { + jsonError(c, common.CodeDataError, "skill_id is required") + return + } + + err := h.indexerService.DeleteSkillIndex(c.Request.Context(), user.ID, spaceID, skillID, h.docEngine) + if err != nil { + jsonError(c, common.CodeOperatingError, "failed to delete skill index") + return + } + + jsonResponse(c, common.CodeSuccess, true, "success") +} + +// InitializeIndex handles the initialize skill search index request +// @Summary Initialize Skill Search Index +// @Description Initialize the skill search index for a tenant +// @Tags skill-search +// @Accept json +// @Produce json +// @Security ApiKeyAuth +// @Param embd_id query string true "Embedding Model ID" +// @Param space_id query string false "Skill Space ID" +// @Success 200 {object} map[string]interface{} +// @Router /v1/skill/search/init [post] +func (h *SkillSearchHandler) InitializeIndex(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + embdID := c.Query("embd_id") + spaceID := c.Query("space_id") + if embdID == "" { + jsonError(c, common.CodeDataError, "embd_id is required") + return + } + + if err := h.indexerService.InitializeIndex(c.Request.Context(), user.ID, spaceID, h.docEngine, embdID); err != nil { + jsonError(c, common.CodeOperatingError, err.Error()) + return + } + + jsonResponse(c, common.CodeSuccess, gin.H{"initialized": true}, "success") +} + +// ==================== Skill Space Management ==================== + +// ListSpaces handles the list skill spaces request +// @Summary List Skill Spaces +// @Description List all skill spaces for the current tenant +// @Tags skill-space +// @Accept json +// @Produce json +// @Security ApiKeyAuth +// @Success 200 {object} map[string]interface{} +// @Router /api/v1/skills/spaces [get] +func (h *SkillSearchHandler) ListSpaces(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + result, code, err := h.spaceService.ListSpaces(user.ID) + if err != nil { + jsonError(c, code, err.Error()) + return + } + + jsonResponse(c, common.CodeSuccess, result, "success") +} + +// CreateSpaceRequest represents the request to create a skill space +type CreateSpaceRequest struct { + Name string `json:"name" binding:"required"` + Description string `json:"description"` + EmbdID string `json:"embd_id"` + RerankID string `json:"rerank_id"` +} + +// CreateSpace handles the create skill space request +// @Summary Create Skill Space +// @Description Create a new skill space with associated folder +// @Tags skill-space +// @Accept json +// @Produce json +// @Security ApiKeyAuth +// @Param request body CreateSpaceRequest true "space info" +// @Success 200 {object} map[string]interface{} +// @Router /api/v1/skills/spaces [post] +func (h *SkillSearchHandler) CreateSpace(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + var req CreateSpaceRequest + if err := c.ShouldBindJSON(&req); err != nil { + jsonError(c, common.CodeDataError, err.Error()) + return + } + + result, code, err := h.spaceService.CreateSpace(&service.CreateSpaceRequest{ + TenantID: user.ID, + Name: req.Name, + Description: req.Description, + EmbdID: req.EmbdID, + RerankID: req.RerankID, + }) + if err != nil { + jsonError(c, code, err.Error()) + return + } + + jsonResponse(c, common.CodeSuccess, result, "success") +} + +// GetSpace handles the get skill space request +// @Summary Get Skill Space +// @Description Get a skill space by ID +// @Tags skill-space +// @Accept json +// @Produce json +// @Security ApiKeyAuth +// @Param space_id path string true "Space ID" +// @Success 200 {object} map[string]interface{} +// @Router /api/v1/skills/spaces/{space_id} [get] +func (h *SkillSearchHandler) GetSpace(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + spaceID := c.Param("space_id") + if spaceID == "" { + jsonError(c, common.CodeDataError, "space_id is required") + return + } + + result, code, err := h.spaceService.GetSpace(spaceID, user.ID) + if err != nil { + jsonError(c, code, err.Error()) + return + } + + jsonResponse(c, common.CodeSuccess, result, "success") +} + +// UpdateSpaceRequest represents the request to update a skill space +type UpdateSpaceRequest struct { + Name string `json:"name"` + Description string `json:"description"` + EmbdID string `json:"embd_id"` + RerankID string `json:"rerank_id"` + TopK int `json:"top_k"` +} + +// UpdateSpace handles the update skill space request +// @Summary Update Skill Space +// @Description Update a skill space +// @Tags skill-space +// @Accept json +// @Produce json +// @Security ApiKeyAuth +// @Param space_id path string true "Space ID" +// @Param request body UpdateSpaceRequest true "space updates" +// @Success 200 {object} map[string]interface{} +// @Router /api/v1/skills/spaces/{space_id} [put] +func (h *SkillSearchHandler) UpdateSpace(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + spaceID := c.Param("space_id") + if spaceID == "" { + jsonError(c, common.CodeDataError, "space_id is required") + return + } + + var req UpdateSpaceRequest + if err := c.ShouldBindJSON(&req); err != nil { + jsonError(c, common.CodeDataError, err.Error()) + return + } + + result, code, err := h.spaceService.UpdateSpace(spaceID, user.ID, &service.UpdateSpaceRequest{ + Name: req.Name, + Description: req.Description, + EmbdID: req.EmbdID, + RerankID: req.RerankID, + TopK: req.TopK, + }) + if err != nil { + jsonError(c, code, err.Error()) + return + } + + jsonResponse(c, common.CodeSuccess, result, "success") +} + +// DeleteSpace handles the delete skill space request +// @Summary Delete Skill Space +// @Description Delete a skill space and its associated folder +// @Tags skill-space +// @Accept json +// @Produce json +// @Security ApiKeyAuth +// @Param space_id path string true "Space ID" +// @Success 202 {object} map[string]interface{} +// @Router /api/v1/skills/spaces/{space_id} [delete] +func (h *SkillSearchHandler) DeleteSpace(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + spaceID := c.Param("space_id") + if spaceID == "" { + jsonError(c, common.CodeDataError, "space_id is required") + return + } + + // Get Authorization header for Python API calls + authHeader := c.GetHeader("Authorization") + + code, err := h.spaceService.DeleteSpace(spaceID, user.ID, h.docEngine, authHeader) + if err != nil { + jsonError(c, code, err.Error()) + return + } + + // Return 202 Accepted since deletion is async + c.JSON(http.StatusAccepted, gin.H{ + "code": 0, + "data": gin.H{"deleting": true, "space_id": spaceID}, + "message": "success", + }) +} + +// GetSpaceByFolder handles the get skill space by folder ID request +// @Summary Get Skill Space by Folder +// @Description Get a skill space by its folder ID +// @Tags skill-space +// @Accept json +// @Produce json +// @Security ApiKeyAuth +// @Param folder_id query string true "Folder ID" +// @Success 200 {object} map[string]interface{} +// @Router /api/v1/skills/space/by-folder [get] +func (h *SkillSearchHandler) GetSpaceByFolder(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + folderID := c.Query("folder_id") + if folderID == "" { + jsonError(c, common.CodeDataError, "folder_id is required") + return + } + + result, code, err := h.spaceService.GetSpaceByFolderID(folderID, user.ID) + if err != nil { + jsonError(c, code, err.Error()) + return + } + + jsonResponse(c, common.CodeSuccess, result, "success") +} diff --git a/internal/router/router.go b/internal/router/router.go index 8c8d30dca2..46369ac093 100644 --- a/internal/router/router.go +++ b/internal/router/router.go @@ -38,6 +38,7 @@ type Router struct { searchHandler *handler.SearchHandler fileHandler *handler.FileHandler memoryHandler *handler.MemoryHandler + skillSearchHandler *handler.SkillSearchHandler providerHandler *handler.ProviderHandler } @@ -58,6 +59,7 @@ func NewRouter( searchHandler *handler.SearchHandler, fileHandler *handler.FileHandler, memoryHandler *handler.MemoryHandler, + skillSearchHandler *handler.SkillSearchHandler, providerHandler *handler.ProviderHandler, ) *Router { return &Router{ @@ -76,6 +78,7 @@ func NewRouter( searchHandler: searchHandler, fileHandler: fileHandler, memoryHandler: memoryHandler, + skillSearchHandler: skillSearchHandler, providerHandler: providerHandler, } } @@ -164,16 +167,38 @@ func (r *Router) Setup(engine *gin.Engine) { memory.GET("/:memory_id", r.memoryHandler.GetMemoryMessages) } - // TODO: Message routes - Implementation pending - depends on CanvasService, TaskService and embedding engine - // message := v1.Group("/messages") - // { - // message.POST("", r.memoryHandler.AddMessage) - // message.DELETE("/:memory_id/:message_id", r.memoryHandler.ForgetMessage) - // message.PUT("/:memory_id/:message_id", r.memoryHandler.UpdateMessage) - // message.GET("/search", r.memoryHandler.SearchMessage) - // message.GET("", r.memoryHandler.GetMessages) - // message.GET("/:memory_id/:message_id/content", r.memoryHandler.GetMessageContent) - // } + // TODO: Message routes - Implementation pending - depends on CanvasService, TaskService and embedding engine + // message := v1.Group("/messages") + // { + // message.POST("", r.memoryHandler.AddMessage) + // message.DELETE("/:memory_id/:message_id", r.memoryHandler.ForgetMessage) + // message.PUT("/:memory_id/:message_id", r.memoryHandler.UpdateMessage) + // message.GET("/search", r.memoryHandler.SearchMessage) + // message.GET("", r.memoryHandler.GetMessages) + // message.GET("/:memory_id/:message_id/content", r.memoryHandler.GetMessageContent) + // } + + // Skill search routes + skills := v1.Group("/skills") + { + // Skill Space management + skills.GET("/spaces", r.skillSearchHandler.ListSpaces) + skills.POST("/spaces", r.skillSearchHandler.CreateSpace) + skills.GET("/spaces/:space_id", r.skillSearchHandler.GetSpace) + skills.PUT("/spaces/:space_id", r.skillSearchHandler.UpdateSpace) + skills.DELETE("/spaces/:space_id", r.skillSearchHandler.DeleteSpace) + skills.GET("/space/by-folder", r.skillSearchHandler.GetSpaceByFolder) + + // Skill search config + skills.GET("/config", r.skillSearchHandler.GetConfig) + skills.POST("/config", r.skillSearchHandler.UpdateConfig) + + // Skill search and indexing + skills.POST("/search", r.skillSearchHandler.Search) + skills.POST("/index", r.skillSearchHandler.IndexSkills) + skills.DELETE("/index", r.skillSearchHandler.DeleteSkillIndex) + skills.POST("/reindex", r.skillSearchHandler.Reindex) + } chats := v1.Group("/chats") { @@ -181,16 +206,16 @@ func (r *Router) Setup(engine *gin.Engine) { chats.GET("/:chat_id", r.chatHandler.GetChat) } - searches := v1.Group("/searches") - { - searches.GET("", r.searchHandler.ListSearches) - searches.POST("", r.searchHandler.CreateSearch) - searches.GET("/:search_id", r.searchHandler.GetSearch) - searches.PUT("/:search_id", r.searchHandler.UpdateSearch) - searches.DELETE("/:search_id", r.searchHandler.DeleteSearch) - } + searches := v1.Group("/searches") + { + searches.GET("", r.searchHandler.ListSearches) + searches.POST("", r.searchHandler.CreateSearch) + searches.GET("/:search_id", r.searchHandler.GetSearch) + searches.PUT("/:search_id", r.searchHandler.UpdateSearch) + searches.DELETE("/:search_id", r.searchHandler.DeleteSearch) + } - file := v1.Group("/files") + file := v1.Group("/files") { file.POST("", r.fileHandler.UploadFile) file.GET("", r.fileHandler.ListFiles) diff --git a/internal/server/config.go b/internal/server/config.go index 0c2bd03f0f..d0a6ef03d3 100644 --- a/internal/server/config.go +++ b/internal/server/config.go @@ -606,20 +606,26 @@ func FromConfigFile(configPath string) error { } // Map doc_engine section to DocEngineConfig - if globalConfig != nil && globalConfig.DocEngine.Type == "" { - if v.IsSet("doc_engine") { - docEngineConfig := v.Sub("doc_engine") - if docEngineConfig != nil { - globalConfig.DocEngine.Type = EngineType(docEngineConfig.GetString("type")) + if globalConfig != nil { + // First, ensure engine type is set + if globalConfig.DocEngine.Type == "" { + if v.IsSet("doc_engine") { + docEngineConfig := v.Sub("doc_engine") + if docEngineConfig != nil { + globalConfig.DocEngine.Type = EngineType(docEngineConfig.GetString("type")) + } } } - // Also check legacy es section for backward compatibility + + // Map es section from top-level (service_conf.yaml format) if v.IsSet("es") { esConfig := v.Sub("es") if esConfig != nil { + // Set default engine type if not set if globalConfig.DocEngine.Type == "" { globalConfig.DocEngine.Type = EngineElasticsearch } + // Always populate ES config if es section exists if globalConfig.DocEngine.ES == nil { globalConfig.DocEngine.ES = &ElasticsearchConfig{ Hosts: esConfig.GetString("hosts"), @@ -629,17 +635,23 @@ func FromConfigFile(configPath string) error { } } } + + // Map infinity section from top-level (service_conf.yaml format) if v.IsSet("infinity") { infConfig := v.Sub("infinity") if infConfig != nil { + // Set default engine type if not set if globalConfig.DocEngine.Type == "" { globalConfig.DocEngine.Type = EngineInfinity } + // Always populate Infinity config if infinity section exists if globalConfig.DocEngine.Infinity == nil { globalConfig.DocEngine.Infinity = &InfinityConfig{ - URI: infConfig.GetString("uri"), - PostgresPort: infConfig.GetInt("postgres_port"), - DBName: infConfig.GetString("db_name"), + URI: infConfig.GetString("uri"), + PostgresPort: infConfig.GetInt("postgres_port"), + DBName: infConfig.GetString("db_name"), + MappingFileName: infConfig.GetString("mapping_file_name"), + DocMetaMappingFileName: infConfig.GetString("doc_meta_mapping_file_name"), } } } diff --git a/internal/service/model_service.go b/internal/service/model_service.go index 7b95b745c1..5916a1bdbe 100644 --- a/internal/service/model_service.go +++ b/internal/service/model_service.go @@ -925,16 +925,16 @@ func (m *ModelProviderService) GetModelByName(modelName string, tenantID string) // GetEmbeddingModel returns an EmbeddingModel wrapper for the given tenant func (m *ModelProviderService) GetEmbeddingModel(tenantID, compositeModelName string) (*modelModule.EmbeddingModel, error) { - driver, modelName, apiConfig, err := m.getModelConfig(tenantID, compositeModelName) + driver, modelName, apiConfig, maxTokens, err := m.getModelConfig(tenantID, compositeModelName) if err != nil { return nil, err } - return modelModule.NewEmbeddingModel(driver, &modelName, apiConfig), nil + return modelModule.NewEmbeddingModel(driver, &modelName, apiConfig, maxTokens), nil } // GetRerankModel returns a RerankModel wrapper for the given tenant func (m *ModelProviderService) GetRerankModel(tenantID, compositeModelName string) (*modelModule.RerankModel, error) { - driver, modelName, apiConfig, err := m.getModelConfig(tenantID, compositeModelName) + driver, modelName, apiConfig, _, err := m.getModelConfig(tenantID, compositeModelName) if err != nil { return nil, err } @@ -943,7 +943,7 @@ func (m *ModelProviderService) GetRerankModel(tenantID, compositeModelName strin // GetChatModel returns a ChatModel wrapper for the given tenant func (m *ModelProviderService) GetChatModel(tenantID, compositeModelName string) (*modelModule.ChatModel, error) { - driver, modelName, apiConfig, err := m.getModelConfig(tenantID, compositeModelName) + driver, modelName, apiConfig, _, err := m.getModelConfig(tenantID, compositeModelName) if err != nil { return nil, err } @@ -1024,20 +1024,20 @@ func (m *ModelProviderService) AddCustomModel(request *AddCustomModelRequest, us return common.CodeSuccess, nil } -// getModelConfig returns the model driver, model name, and API config for a model -func (m *ModelProviderService) getModelConfig(tenantID, compositeModelName string) (modelModule.ModelDriver, string, *modelModule.APIConfig, error) { +// getModelConfig returns the model driver, model name, API config, and max tokens for a model +func (m *ModelProviderService) getModelConfig(tenantID, compositeModelName string) (modelModule.ModelDriver, string, *modelModule.APIConfig, int, error) { modelName, instanceName, providerName, err := parseModelName(compositeModelName) if err != nil { - return nil, "", nil, err + return nil, "", nil, 0, err } // Check if provider exists provider, err := m.modelProviderDAO.GetByTenantIDAndProviderName(tenantID, providerName) if err != nil { - return nil, "", nil, err + return nil, "", nil, 0, err } if provider == nil { - return nil, "", nil, fmt.Errorf("provider %s not found", providerName) + return nil, "", nil, 0, fmt.Errorf("provider %s not found", providerName) } if instanceName == "" { @@ -1046,47 +1046,42 @@ func (m *ModelProviderService) getModelConfig(tenantID, compositeModelName strin instance, err := m.modelInstanceDAO.GetByProviderIDAndInstanceName(provider.ID, instanceName) if err != nil { - return nil, "", nil, err + return nil, "", nil, 0, err } if instance == nil { - return nil, "", nil, fmt.Errorf("instance %s not found for provider %s", instanceName, providerName) - } - - _, err = m.modelDAO.GetModelByProviderIDAndInstanceIDAndModelName(provider.ID, instance.ID, modelName) - if err != nil { - providerInfo := dao.GetModelProviderManager().FindProvider(providerName) - if providerInfo == nil { - return nil, "", nil, fmt.Errorf("provider %s not found", providerName) - } - - _, err = dao.GetModelProviderManager().GetModelByName(providerName, modelName) - if err != nil { - return nil, "", nil, fmt.Errorf("provider %s model %s not found", providerName, modelName) - } - - var extra map[string]string - err = json.Unmarshal([]byte(instance.Extra), &extra) - if err != nil { - return nil, "", nil, err - } - region := extra["region"] - - apiConfig := &modelModule.APIConfig{ApiKey: &instance.APIKey, Region: ®ion} - return providerInfo.ModelDriver, modelName, apiConfig, nil + return nil, "", nil, 0, fmt.Errorf("instance %s not found for provider %s", instanceName, providerName) } var extra map[string]string err = json.Unmarshal([]byte(instance.Extra), &extra) if err != nil { - return nil, "", nil, err + return nil, "", nil, 0, err } region := extra["region"] providerInfo := dao.GetModelProviderManager().FindProvider(providerName) if providerInfo == nil { - return nil, "", nil, fmt.Errorf("provider %s not found", providerName) + return nil, "", nil, 0, fmt.Errorf("provider %s not found", providerName) + } + + // Get model info to extract max_tokens + modelInfo, err := dao.GetModelProviderManager().GetModelByName(providerName, modelName) + maxTokens := 0 + if err == nil && modelInfo != nil { + maxTokens = modelInfo.MaxTokens + } + + _, err = m.modelDAO.GetModelByProviderIDAndInstanceIDAndModelName(provider.ID, instance.ID, modelName) + if err != nil { + _, err = dao.GetModelProviderManager().GetModelByName(providerName, modelName) + if err != nil { + return nil, "", nil, 0, fmt.Errorf("provider %s model %s not found", providerName, modelName) + } + + apiConfig := &modelModule.APIConfig{ApiKey: &instance.APIKey, Region: ®ion} + return providerInfo.ModelDriver, modelName, apiConfig, maxTokens, nil } apiConfig := &modelModule.APIConfig{ApiKey: &instance.APIKey, Region: ®ion} - return providerInfo.ModelDriver, modelName, apiConfig, nil + return providerInfo.ModelDriver, modelName, apiConfig, maxTokens, nil } diff --git a/internal/service/skill_indexer.go b/internal/service/skill_indexer.go new file mode 100644 index 0000000000..414d1ccb9c --- /dev/null +++ b/internal/service/skill_indexer.go @@ -0,0 +1,1036 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package service + +import ( + "context" + "encoding/json" + "fmt" + "path/filepath" + "ragflow/internal/dao" + "ragflow/internal/engine" + "ragflow/internal/entity" + "ragflow/internal/logger" + "ragflow/internal/storage" + "ragflow/internal/tokenizer" + "strings" + "time" + + "go.uber.org/zap" +) + +// SkillVersionInfo represents a skill version in the file system +type SkillVersionInfo struct { + SkillName string `json:"skill_name"` + Version string `json:"version"` + Description string `json:"description"` + Tags []string `json:"tags"` + Content string `json:"content"` +} + +// FileSystemClient defines the interface for accessing skill files +type FileSystemClient interface { + ListSkills(ctx context.Context, tenantID string) ([]SkillVersionInfo, error) + GetSkillContent(ctx context.Context, tenantID, skillName string) (*SkillVersionInfo, error) +} + +// defaultMaxLength is a safe default for embedding model max input length +const defaultMaxLength = 8191 + +// SkillIndexerService handles skill indexing operations +type SkillIndexerService struct { + configDAO *dao.SkillSearchConfigDAO + fileDAO *dao.FileDAO + spaceDAO *dao.SkillSpaceDAO + modelProvider *ModelProviderService +} + +// NewSkillIndexerService creates a new SkillIndexerService instance +func NewSkillIndexerService() *SkillIndexerService { + return &SkillIndexerService{ + configDAO: dao.NewSkillSearchConfigDAO(), + fileDAO: dao.NewFileDAO(), + spaceDAO: dao.NewSkillSpaceDAO(), + modelProvider: NewModelProviderService(), + } +} + +// isElasticsearch checks if the engine is Elasticsearch +func isElasticsearch(docEngine engine.DocEngine) bool { + return docEngine.GetType() == "elasticsearch" +} + +// IndexSkill indexes a single skill +// Uses skill_id as doc_id for direct mapping, with version control for incremental updates +// For ES: xxx fields store original content, xxx_tks fields store RAG-tokenized content (space-separated) +// For Infinity: only xxx fields with built-in rag-analyzer +func (s *SkillIndexerService) IndexSkill(ctx context.Context, tenantID, spaceID string, skill SkillInfo, docEngine engine.DocEngine, embdID string) error { + spaceID = normalizeSpaceID(spaceID) + + config, err := s.configDAO.GetOrCreate(tenantID, spaceID, embdID) + if err != nil { + return fmt.Errorf("failed to get config: %w", err) + } + + // Get field config + fieldConfig := entity.DefaultFieldConfig() + if config.FieldConfig != nil { + if fcJSON, err := json.Marshal(config.FieldConfig); err == nil { + json.Unmarshal(fcJSON, &fieldConfig) + } + } + + // Build vector text from enabled fields + vectorText := BuildVectorText(skill.Name, skill.Description, skill.Tags, skill.Content, fieldConfig) + + // Generate embedding (optional - continue on failure) + vector, err := s.generateEmbedding(ctx, vectorText, embdID, tenantID) + if err != nil { + logger.Warn(fmt.Sprintf("Failed to generate embedding for skill %s: %v. Continuing with text-only index.", skill.ID, err)) + } + + // Build document with RAG tokenization for ES + now := time.Now() + timestamp := now.UnixMilli() + + // Get embedding dimension by calling embedding API with test text + // This follows Python's approach: get dimension from actual embedding result + dimension, err := s.getEmbeddingDimension(ctx, tenantID, embdID) + if err != nil { + return fmt.Errorf("failed to get embedding dimension: %w", err) + } + vectorField := fmt.Sprintf("q_%d_vec", dimension) + + // Determine engine type + isES := isElasticsearch(docEngine) + + // Build base document + // Use skill.Version if available, otherwise use config.IndexVersion as fallback + skillVersion := skill.Version + if skillVersion == "" { + skillVersion = "1.0.0" + } + doc := map[string]interface{}{ + "skill_id": skill.ID, + "space_id": spaceID, + "folder_id": skill.FolderID, + "name": skill.Name, + "tags": strings.Join(skill.Tags, ", "), + "description": skill.Description, + "content": skill.Content, + "version": skillVersion, + "status": "1", + "create_time": timestamp, + "update_time": timestamp, + } + + // Add vector if available + if vector != nil { + doc[vectorField] = vector + } else if docEngine.GetType() == "infinity" { + // For Infinity: use zero vector as placeholder + doc[vectorField] = make([]float64, dimension) + } + + // For ES: add tokenized fields for BM25 search + // For Infinity: fields have built-in analyzer, no need for xxx_tks + if isES { + nameTokens, _ := tokenizer.Tokenize(skill.Name) + tagsText := strings.Join(skill.Tags, " ") + tagsTokens, _ := tokenizer.Tokenize(tagsText) + + doc["name_tks"] = nameTokens + doc["tags_tks"] = tagsTokens + + if fieldConfig.Description.Enabled { + descTokens, _ := tokenizer.Tokenize(skill.Description) + doc["description_tks"] = descTokens + } + if fieldConfig.Content.Enabled { + contentTokens, _ := tokenizer.Tokenize(skill.Content) + doc["content_tks"] = contentTokens + } + } + + indexName := getSkillIndexName(tenantID, spaceID) + + // For Infinity: ensure table exists with correct dimension BEFORE inserting + if docEngine.GetType() == "infinity" { + exists, _ := docEngine.TableExists(ctx, indexName) + if !exists { + logger.Info(fmt.Sprintf("Creating Infinity table with dimension %d", dimension)) + if err := s.createIndexWithDimension(ctx, tenantID, spaceID, docEngine, embdID, dimension); err != nil { + return fmt.Errorf("failed to create index with dimension %d: %w", dimension, err) + } + } + } + + // Delete old versions (both new format and old format with version suffix) + // This ensures only the latest version is indexed + logger.Debug(fmt.Sprintf("Deleting old versions of skill if exists: indexName=%s, skillName=%s", indexName, skill.Name)) + if err := s.DeleteSkillByName(ctx, tenantID, spaceID, skill.Name, docEngine); err != nil { + logger.Debug(fmt.Sprintf("No existing document to delete for skill %s (this is normal for new skills)", skill.Name)) + } + + // ES document ID cannot contain '/' - replace with '_' + docID := strings.ReplaceAll(skill.ID, "/", "_") + + logger.Info(fmt.Sprintf("Calling IndexDocument: indexName=%s, docID=%s, engineType=%s", indexName, docID, docEngine.GetType())) + if err := docEngine.IndexDocument(ctx, indexName, docID, doc); err != nil { + logger.Error(fmt.Sprintf("IndexDocument failed: indexName=%s, docID=%s", indexName, docID), err) + return fmt.Errorf("failed to index document: %w", err) + } + logger.Info(fmt.Sprintf("IndexDocument succeeded: indexName=%s, docID=%s", indexName, docID)) + + return nil +} + +// BatchIndexSkills indexes multiple skills in batch +// Optimized to use batch embedding API for better performance +func (s *SkillIndexerService) BatchIndexSkills(ctx context.Context, tenantID, spaceID string, skills []SkillInfo, docEngine engine.DocEngine, embdID string) error { + spaceID = normalizeSpaceID(spaceID) + if len(skills) == 0 { + return nil + } + + config, err := s.configDAO.GetOrCreate(tenantID, spaceID, embdID) + if err != nil { + return fmt.Errorf("failed to get config: %w", err) + } + + // Get field config + fieldConfig := entity.DefaultFieldConfig() + if config.FieldConfig != nil { + if fcJSON, err := json.Marshal(config.FieldConfig); err == nil { + json.Unmarshal(fcJSON, &fieldConfig) + } + } + + // Build vector texts for all skills + vectorTexts := make([]string, len(skills)) + for i, skill := range skills { + vectorTexts[i] = BuildVectorText(skill.Name, skill.Description, skill.Tags, skill.Content, fieldConfig) + } + + // Get embedding dimension FIRST by calling embedding API with test text + // This follows Python's approach: must get dimension before creating table + dimension, err := s.getEmbeddingDimension(ctx, tenantID, embdID) + if err != nil { + return fmt.Errorf("failed to get embedding dimension: %w", err) + } + logger.Info(fmt.Sprintf("Using embedding dimension: %d", dimension)) + vectorField := fmt.Sprintf("q_%d_vec", dimension) + + // Generate embeddings in batch + logger.Info(fmt.Sprintf("Generating embeddings for %d skills with embdID=%s", len(skills), embdID)) + vectors, err := s.generateEmbeddings(ctx, vectorTexts, embdID, tenantID) + if err != nil { + logger.Warn(fmt.Sprintf("Failed to generate embeddings: %v. Continuing with text-only index.", err)) + vectors = nil // Continue without vectors + } else { + logger.Info(fmt.Sprintf("Generated %d vectors", len(vectors))) + } + + // Ensure index exists with correct dimension + indexName := getSkillIndexName(tenantID, spaceID) + if docEngine.GetType() == "infinity" { + // For Infinity: must ensure table exists with correct dimension BEFORE inserting + logger.Info(fmt.Sprintf("Checking if index exists: %s", indexName)) + exists, err := docEngine.TableExists(ctx, indexName) + if err != nil { + logger.Warn(fmt.Sprintf("Error checking index existence: %v", err)) + } + logger.Info(fmt.Sprintf("Index exists: %v", exists)) + + if !exists { + // Only create if table doesn't exist + logger.Info(fmt.Sprintf("Creating index with actual dimension %d", dimension)) + if err := s.createIndexWithDimension(ctx, tenantID, spaceID, docEngine, embdID, dimension); err != nil { + return fmt.Errorf("failed to create index with dimension %d: %w", dimension, err) + } + logger.Info("Index created successfully") + } else { + logger.Info("Index already exists, skipping creation") + } + } else { + // For ES: just ensure index exists + if err := s.EnsureIndex(ctx, tenantID, spaceID, docEngine, embdID); err != nil { + return fmt.Errorf("failed to ensure index exists: %w", err) + } + } + + // Index all skills + now := time.Now() + timestamp := now.UnixMilli() + isES := isElasticsearch(docEngine) + + var indexErrors []string + for i, skill := range skills { + // Delete old versions (both new format and old format with version suffix) + // This ensures only the latest version is indexed + if err := s.DeleteSkillByName(ctx, tenantID, spaceID, skill.Name, docEngine); err != nil { + logger.Debug(fmt.Sprintf("No existing document to delete for skill %s (this is normal for new skills)", skill.Name)) + } + + // ES document ID cannot contain '/' - replace with '_' + docID := strings.ReplaceAll(skill.ID, "/", "_") + + // Use skill.Version if available, otherwise default to "1.0.0" + skillVersion := skill.Version + if skillVersion == "" { + skillVersion = "1.0.0" + } + + doc := map[string]interface{}{ + "skill_id": skill.ID, + "space_id": spaceID, + "folder_id": skill.FolderID, + "name": skill.Name, + "tags": strings.Join(skill.Tags, ", "), + "description": skill.Description, + "content": skill.Content, + "version": skillVersion, + "status": "1", + "create_time": timestamp, + "update_time": timestamp, + } + + // Add vector only if available + if vectors != nil && i < len(vectors) { + doc[vectorField] = vectors[i] + } else { + logger.Info(fmt.Sprintf("No vector for skill %s, creating text-only index", skill.ID)) + // For Infinity: use zero vector as placeholder (table schema requires vector column) + if docEngine.GetType() == "infinity" { + zeroVector := make([]float64, dimension) + doc[vectorField] = zeroVector + } + } + + // For ES: add tokenized fields for BM25 search + if isES { + nameTokens, _ := tokenizer.Tokenize(skill.Name) + tagsText := strings.Join(skill.Tags, " ") + tagsTokens, _ := tokenizer.Tokenize(tagsText) + + doc["name_tks"] = nameTokens + doc["tags_tks"] = tagsTokens + + if fieldConfig.Description.Enabled { + descTokens, _ := tokenizer.Tokenize(skill.Description) + doc["description_tks"] = descTokens + } + if fieldConfig.Content.Enabled { + contentTokens, _ := tokenizer.Tokenize(skill.Content) + doc["content_tks"] = contentTokens + } + } + + logger.Info("Batch: Calling IndexDocument", zap.String("indexName", indexName), zap.String("docID", docID), zap.Int("index", i)) + if err := docEngine.IndexDocument(ctx, indexName, docID, doc); err != nil { + logger.Error(fmt.Sprintf("Failed to index skill %s", skill.ID), err) + indexErrors = append(indexErrors, fmt.Sprintf("%s: %v", skill.ID, err)) + continue + } + } + + if len(indexErrors) > 0 { + return fmt.Errorf("failed to index %d skill(s): %s", len(indexErrors), strings.Join(indexErrors, "; ")) + } + + return nil +} + +// DeleteSkillIndex deletes a skill's index by skill ID +// Returns nil if the document doesn't exist (idempotent delete) +func (s *SkillIndexerService) DeleteSkillIndex(ctx context.Context, tenantID, spaceID, skillID string, docEngine engine.DocEngine) error { + spaceID = normalizeSpaceID(spaceID) + indexName := getSkillIndexName(tenantID, spaceID) + // ES document ID cannot contain '/' - replace with '_' + docID := strings.ReplaceAll(skillID, "/", "_") + if err := docEngine.DeleteDocument(ctx, indexName, docID); err != nil { + // Check if it's a "not found" error - this is OK, document might not have been indexed + if strings.Contains(err.Error(), "not found") { + logger.Debug(fmt.Sprintf("Document %s not found in index %s, treating as already deleted", skillID, indexName)) + return nil + } + logger.Error(fmt.Sprintf("Failed to delete document %s from index %s", skillID, indexName), err) + return err + } + return nil +} + +// DeleteSkillByName deletes a skill's index by skill name +// Deletes all versions: both new format (skillname) and old format (skillname_x.x.x) +func (s *SkillIndexerService) DeleteSkillByName(ctx context.Context, tenantID, spaceID, skillName string, docEngine engine.DocEngine) error { + spaceID = normalizeSpaceID(spaceID) + indexName := getSkillIndexName(tenantID, spaceID) + + docID := strings.ReplaceAll(skillName, "/", "_") + if err := docEngine.DeleteDocument(ctx, indexName, docID); err != nil { + logger.Debug(fmt.Sprintf("Document %s not found in index %s", skillName, indexName)) + } + + return nil +} + +// UpdateSkillVersion updates a skill's index when version changes +// Deletes old version and indexes new version +func (s *SkillIndexerService) UpdateSkillVersion(ctx context.Context, tenantID, spaceID string, skill SkillInfo, docEngine engine.DocEngine, embdID string) error { + // Delete old version first (upsert behavior) + if err := s.DeleteSkillByName(ctx, tenantID, spaceID, skill.Name, docEngine); err != nil { + // Log but don't fail - the document might not exist + logger.Debug(fmt.Sprintf("No existing index to delete for skill %s", skill.Name)) + } + + // Index new version + return s.IndexSkill(ctx, tenantID, spaceID, skill, docEngine, embdID) +} + +// ReindexAll reindexes all skills for a tenant +// Increments semantic version, deletes old table, and reindexes all skills from file system +// For Infinity: if embedding model changed (different dimension), recreates the table +// Behavior: +// 1. Delete the existing table +// 2. Traverse all skill folders under the space +// 3. For each skill, get the latest version +// 4. Reindex all skills +func (s *SkillIndexerService) ReindexAll(ctx context.Context, tenantID, spaceID string, docEngine engine.DocEngine, embdID string) (map[string]interface{}, error) { + spaceID = normalizeSpaceID(spaceID) + // Get current config and increment semantic version + config, err := s.configDAO.GetOrCreate(tenantID, spaceID, embdID) + if err != nil { + return nil, fmt.Errorf("failed to get config: %w", err) + } + + // Increment semantic version (e.g., "1.0.0" -> "1.0.1" or "1.0.9" -> "1.1.0") + newVersion := incrementSemanticVersion(config.IndexVersion) + if err := s.configDAO.UpdateByTenantID(tenantID, spaceID, map[string]interface{}{ + "index_version": newVersion, + }); err != nil { + return nil, fmt.Errorf("failed to update version: %w", err) + } + + // Get new embedding dimension first (needed for index creation) + newDimension, err := s.getEmbeddingDimension(ctx, tenantID, embdID) + if err != nil { + return nil, fmt.Errorf("failed to get new embedding dimension: %w", err) + } + logger.Info(fmt.Sprintf("ReindexAll: new embedding dimension is %d", newDimension)) + + // Delete existing index and recreate with new dimension (for both ES and Infinity) + indexName := getSkillIndexName(tenantID, spaceID) + exists, _ := docEngine.TableExists(ctx, indexName) + if exists { + logger.Info(fmt.Sprintf("ReindexAll: deleting existing index %s", indexName)) + if err := docEngine.DropTable(ctx, indexName); err != nil { + logger.Warn(fmt.Sprintf("ReindexAll: failed to delete existing index: %v", err)) + } + } + + // Create new index with correct dimension + logger.Info(fmt.Sprintf("ReindexAll: creating new index %s with dimension %d", indexName, newDimension)) + if err := s.createIndexWithDimension(ctx, tenantID, spaceID, docEngine, embdID, newDimension); err != nil { + return nil, fmt.Errorf("failed to create index with dimension %d: %w", newDimension, err) + } + + // Get space info to find folder ID + space, err := s.spaceDAO.GetByID(spaceID) + if err != nil { + return nil, fmt.Errorf("failed to get space: %w", err) + } + if space.TenantID != tenantID { + return nil, fmt.Errorf("space not found") + } + + // Find the actual space folder ID by space name (consistent with frontend behavior) + // Frontend uses space name to find folder, not space.FolderID which may be outdated + spaceFolderID, err := s.getSpaceFolderIDByName(tenantID, space.Name) + if err != nil { + return nil, fmt.Errorf("failed to find space folder: %w", err) + } + logger.Info(fmt.Sprintf("ReindexAll: found space folder ID %s for space %s (stored FolderID was %s)", spaceFolderID, space.Name, space.FolderID)) + + // Traverse all skill folders under the space + skills, err := s.getSkillsFromFileSystem(ctx, tenantID, spaceFolderID, spaceID) + if err != nil { + return nil, fmt.Errorf("failed to get skills from file system: %w", err) + } + + logger.Info(fmt.Sprintf("ReindexAll: found %d skills to index", len(skills))) + + // Index all skills with new version using batch indexing for better performance + if len(skills) > 0 { + logger.Info(fmt.Sprintf("ReindexAll: batch indexing %d skills", len(skills))) + if err := s.BatchIndexSkills(ctx, tenantID, spaceID, skills, docEngine, embdID); err != nil { + logger.Error("ReindexAll: batch indexing failed", err) + return nil, fmt.Errorf("failed to batch index skills: %w", err) + } + } + + // Clean up old version documents + if err := s.cleanupOldVersions(ctx, tenantID, spaceID, newVersion, docEngine); err != nil { + logger.Error("Failed to cleanup old versions", err) + } + + result := map[string]interface{}{ + "indexed_count": len(skills), + "total_skills": len(skills), + "version": newVersion, + "failed_count": 0, + } + + return result, nil +} + +// getSkillsFromFileSystem traverses the space folder and gets all skills with their latest version +func (s *SkillIndexerService) getSkillsFromFileSystem(ctx context.Context, tenantID, spaceFolderID, spaceID string) ([]SkillInfo, error) { + var skills []SkillInfo + + // Get all skill folders under the space + skillFolders, err := s.fileDAO.ListByParentID(spaceFolderID) + if err != nil { + return nil, fmt.Errorf("failed to list skill folders: %w", err) + } + + logger.Info(fmt.Sprintf("getSkillsFromFileSystem: found %d skill folders in space %s", len(skillFolders), spaceID)) + + for _, skillFolder := range skillFolders { + if skillFolder.Type != "folder" { + continue + } + + // Get all versions of this skill + versions, err := s.fileDAO.ListByParentID(skillFolder.ID) + if err != nil { + logger.Warn(fmt.Sprintf("failed to list versions for skill %s: %v", skillFolder.Name, err)) + continue + } + + if len(versions) == 0 { + logger.Info(fmt.Sprintf("no versions found for skill %s", skillFolder.Name)) + continue + } + + // Find the latest version (highest semantic version) + latestVersion := s.findLatestVersion(versions) + if latestVersion == nil { + logger.Warn(fmt.Sprintf("no valid version found for skill %s", skillFolder.Name)) + continue + } + + // Get skill content from the latest version folder + skillInfo, err := s.getSkillContentFromFolder(ctx, tenantID, skillFolder, latestVersion, spaceID) + if err != nil { + logger.Warn(fmt.Sprintf("failed to get skill content for %s: %v", skillFolder.Name, err)) + continue + } + + skills = append(skills, *skillInfo) + logger.Info(fmt.Sprintf("added skill %s version %s for indexing", skillFolder.Name, latestVersion.Name)) + } + + return skills, nil +} + +// findLatestVersion finds the latest semantic version from a list of version folders +func (s *SkillIndexerService) findLatestVersion(versions []*entity.File) *entity.File { + if len(versions) == 0 { + return nil + } + + var latest *entity.File + latestVersionNum := []int{-1, -1, -1} // major, minor, patch + + for _, v := range versions { + if v.Type != "folder" { + continue + } + + // Parse semantic version (e.g., "1.0.0") + parts := strings.Split(v.Name, ".") + if len(parts) != 3 { + // Not a valid semver, skip + continue + } + + var major, minor, patch int + fmt.Sscanf(parts[0], "%d", &major) + fmt.Sscanf(parts[1], "%d", &minor) + fmt.Sscanf(parts[2], "%d", &patch) + + // Compare versions + if major > latestVersionNum[0] || + (major == latestVersionNum[0] && minor > latestVersionNum[1]) || + (major == latestVersionNum[0] && minor == latestVersionNum[1] && patch > latestVersionNum[2]) { + latest = v + latestVersionNum = []int{major, minor, patch} + } + } + + return latest +} + +// getSkillContentFromFolder reads skill content from the version folder +func (s *SkillIndexerService) getSkillContentFromFolder(ctx context.Context, tenantID string, skillFolder, versionFolder *entity.File, spaceID string) (*SkillInfo, error) { + // Get all files in the version folder + files, err := s.fileDAO.ListByParentID(versionFolder.ID) + if err != nil { + return nil, fmt.Errorf("failed to list files in version folder: %w", err) + } + + var contentBuilder strings.Builder + var skillMdContent string + + for _, file := range files { + if file.Type == "folder" { + continue + } + + // Check if it's a text file + if !isTextFileForSkill(file.Name) { + continue + } + + // Get file content (this might need to be implemented based on your storage system) + fileContent, err := s.getFileContent(ctx, tenantID, file) + if err != nil { + logger.Warn(fmt.Sprintf("failed to get content for file %s: %v", file.Name, err)) + continue + } + + if len(fileContent) == 0 { + continue + } + + // Check if this is SKILL.md + if strings.ToLower(file.Name) == "skill.md" { + skillMdContent = string(fileContent) + } + + contentBuilder.WriteString(fmt.Sprintf("\n=== %s ===\n", file.Name)) + contentBuilder.Write(fileContent) + } + + // Parse SKILL.md for metadata + name, description, tags := s.parseSkillMetadata(skillMdContent, skillFolder.Name) + + // Use skill name as ID (without version suffix) + // This ensures all versions of the same skill share the same index document + skillID := name + if skillID == "" { + skillID = skillFolder.Name + } + + skillInfo := &SkillInfo{ + ID: skillID, + Name: name, + Description: description, + Tags: tags, + Content: contentBuilder.String(), + FolderID: skillFolder.ID, + } + + return skillInfo, nil +} + +// isTextFileForSkill checks if a file is a text file that should be indexed +func isTextFileForSkill(fileName string) bool { + ext := strings.ToLower(filepath.Ext(fileName)) + if ext != "" { + ext = ext[1:] // Remove leading dot + } + + textFileExtensions := map[string]bool{ + "md": true, "mdx": true, "txt": true, "json": true, "json5": true, + "yaml": true, "yml": true, "toml": true, "js": true, "cjs": true, "mjs": true, + "ts": true, "tsx": true, "jsx": true, "py": true, "sh": true, "rb": true, + "go": true, "rs": true, "swift": true, "kt": true, "java": true, "cs": true, + "cpp": true, "c": true, "h": true, "hpp": true, "sql": true, "csv": true, + "ini": true, "cfg": true, "env": true, "xml": true, "html": true, + "css": true, "scss": true, "sass": true, "svg": true, + } + + return textFileExtensions[ext] +} + +// getSpaceFolderIDByName finds the space folder ID by space name (consistent with frontend behavior) +// Frontend finds space folder by listing folders under skills folder and matching by name +func (s *SkillIndexerService) getSpaceFolderIDByName(tenantID, spaceName string) (string, error) { + // Get root folder + rootFolder, err := s.fileDAO.GetRootFolder(tenantID) + if err != nil { + return "", fmt.Errorf("failed to get root folder: %w", err) + } + + // Find skills folder under root + files, _, err := s.fileDAO.GetByPfID(tenantID, rootFolder.ID, 0, 0, "name", false, "") + if err != nil { + return "", fmt.Errorf("failed to list root folder contents: %w", err) + } + + var skillsFolderID string + for _, file := range files { + if file.Type == "folder" && file.Name == "skills" { + skillsFolderID = file.ID + break + } + } + + if skillsFolderID == "" { + return "", fmt.Errorf("skills folder not found for tenant %s", tenantID) + } + + // Find space folder by name under skills folder + spaceFolders, _, err := s.fileDAO.GetByPfID(tenantID, skillsFolderID, 0, 0, "name", false, "") + if err != nil { + return "", fmt.Errorf("failed to list skills folder contents: %w", err) + } + + for _, folder := range spaceFolders { + if folder.Type == "folder" && folder.Name == spaceName { + return folder.ID, nil + } + } + + return "", fmt.Errorf("space folder '%s' not found under skills folder", spaceName) +} + +// parseSkillMetadata parses SKILL.md content to extract metadata +func (s *SkillIndexerService) parseSkillMetadata(content, defaultName string) (name, description string, tags []string) { + name = defaultName + + if content == "" { + return name, "", nil + } + + // Parse YAML frontmatter + lines := strings.Split(content, "\n") + if len(lines) == 0 || strings.TrimSpace(lines[0]) != "---" { + return name, "", nil + } + + var endIndex int + found := false + for i := 1; i < len(lines); i++ { + if strings.TrimSpace(lines[i]) == "---" { + endIndex = i + found = true + break + } + } + + if !found { + return name, "", nil + } + + // Parse frontmatter lines + for i := 1; i < endIndex; i++ { + line := lines[i] + if strings.HasPrefix(line, "name:") { + name = strings.TrimSpace(strings.TrimPrefix(line, "name:")) + } else if strings.HasPrefix(line, "description:") { + description = strings.TrimSpace(strings.TrimPrefix(line, "description:")) + } else if strings.HasPrefix(line, "tags:") { + // Parse tags array + tagsLine := strings.TrimSpace(strings.TrimPrefix(line, "tags:")) + if strings.HasPrefix(tagsLine, "[") && strings.HasSuffix(tagsLine, "]") { + // Array format: [tag1, tag2] + tagsStr := strings.Trim(tagsLine, "[]") + tags = strings.Split(tagsStr, ",") + for i, tag := range tags { + tags[i] = strings.TrimSpace(tag) + } + } else if tagsLine != "" { + // Single tag or dash list + tags = []string{tagsLine} + } + } + } + + return name, description, tags +} + +// getFileContent retrieves the content of a file from storage +func (s *SkillIndexerService) getFileContent(ctx context.Context, tenantID string, file *entity.File) ([]byte, error) { + if file.Location == nil || *file.Location == "" { + return nil, fmt.Errorf("file location is empty") + } + + storageImpl := storage.GetStorageFactory().GetStorage() + if storageImpl == nil { + return nil, fmt.Errorf("storage not initialized") + } + + // Get file content from storage using parent folder ID as bucket (consistent with Python) + // Python: settings.STORAGE_IMPL.put(last_folder.id, location, blob) + // Go: should use file.ParentID as bucket, not tenantID + bucket := file.ParentID + if bucket == "" { + // Fallback to tenantID if ParentID is empty (should not happen) + bucket = tenantID + } + content, err := storageImpl.Get(bucket, *file.Location) + if err != nil { + return nil, fmt.Errorf("failed to get file from storage (bucket=%s, location=%s): %w", bucket, *file.Location, err) + } + + return content, nil +} + +// incrementSemanticVersion increments the patch version of a semantic version string +// Supports format: "major.minor.patch" (e.g., "1.0.0" -> "1.0.1") +// If version is empty or invalid, returns "1.0.0" +func incrementSemanticVersion(version string) string { + if version == "" { + return "1.0.0" + } + + parts := strings.Split(version, ".") + if len(parts) != 3 { + // Invalid format, reset to 1.0.0 + return "1.0.0" + } + + // Try to parse patch version + var major, minor, patch int + fmt.Sscanf(parts[0], "%d", &major) + fmt.Sscanf(parts[1], "%d", &minor) + fmt.Sscanf(parts[2], "%d", &patch) + + // Increment patch version + patch++ + if patch > 999 { + patch = 0 + minor++ + if minor > 999 { + minor = 0 + major++ + } + } + + return fmt.Sprintf("%d.%d.%d", major, minor, patch) +} + +// cleanupOldVersions removes documents with version less than current version +func (s *SkillIndexerService) cleanupOldVersions(ctx context.Context, tenantID, spaceID string, currentVersion string, docEngine engine.DocEngine) error { + // This is a placeholder - actual implementation would: + // 1. Search for documents where version < currentVersion (semantic version comparison) + // 2. Delete those documents + // The actual implementation depends on the search engine's query capabilities + // For now, we rely on the fact that skill_id is used as doc_id, + // so re-indexing the same skill_id will overwrite the document + return nil +} + +// InitializeIndex initializes the skill search index for a tenant +func (s *SkillIndexerService) InitializeIndex(ctx context.Context, tenantID, spaceID string, docEngine engine.DocEngine, embdID string) error { + // Check if index exists + indexName := getSkillIndexName(tenantID, spaceID) + + logger.Info("Checking skill index existence", zap.String("indexName", indexName), zap.String("tenantID", tenantID), zap.String("spaceID", spaceID)) + + exists, err := docEngine.TableExists(ctx, indexName) + if err != nil { + logger.Error("Failed to check index existence", err) + return fmt.Errorf("failed to check index existence: %w", err) + } + + if !exists { + logger.Info("Skill index does not exist, creating...", zap.String("indexName", indexName)) + return s.createIndex(ctx, tenantID, spaceID, docEngine, embdID) + } + + logger.Info("Skill search index already exists", zap.String("indexName", indexName)) + return nil +} + +// createIndex creates the skill index using mapping files +func (s *SkillIndexerService) createIndex(ctx context.Context, tenantID, spaceID string, docEngine engine.DocEngine, embdID string) error { + // Get embedding dimension by calling embedding API with test text + dimension, err := s.getEmbeddingDimension(ctx, tenantID, embdID) + if err != nil { + return fmt.Errorf("failed to get embedding dimension: %w", err) + } + return s.createIndexWithDimension(ctx, tenantID, spaceID, docEngine, embdID, dimension) +} + +// createIndexWithDimension creates the skill index with a specific vector dimension +func (s *SkillIndexerService) createIndexWithDimension(ctx context.Context, tenantID, spaceID string, docEngine engine.DocEngine, embdID string, dimension int) error { + indexName := getSkillIndexName(tenantID, spaceID) + + logger.Info(fmt.Sprintf("Creating skill index with dimension %d", dimension), + zap.String("indexName", indexName), + zap.String("spaceID", spaceID), + zap.Int("dimension", dimension), + zap.String("engineType", docEngine.GetType())) + + // For Infinity: check if table exists and needs recreation (dimension mismatch) + if docEngine.GetType() == "infinity" { + exists, err := docEngine.TableExists(ctx, indexName) + if err != nil { + logger.Warn(fmt.Sprintf("Error checking if index exists: %v", err)) + } + if exists { + logger.Info(fmt.Sprintf("Index exists, deleting for recreation with dimension %d", dimension), + zap.String("indexName", indexName)) + if err := docEngine.DropTable(ctx, indexName); err != nil { + logger.Warn(fmt.Sprintf("Failed to delete existing index: %v", err)) + } + } + } + + // Use the doc engine's CreateDataset method with skill-specific mapping + // The mapping file is loaded from conf/skill_es_mapping.json or conf/skill_infinity_mapping.json + err := docEngine.CreateDataset(ctx, indexName, "skill", dimension, "") + if err != nil { + logger.Error("Failed to create skill index", err) + return err + } + logger.Info("Successfully created skill index", zap.String("indexName", indexName)) + return nil +} + +// EnsureIndex ensures the skill index exists for a tenant +func (s *SkillIndexerService) EnsureIndex(ctx context.Context, tenantID, spaceID string, docEngine engine.DocEngine, embdID string) error { + return s.InitializeIndex(ctx, tenantID, spaceID, docEngine, embdID) +} + +// generateEmbedding generates embedding for text using the specified model +func (s *SkillIndexerService) generateEmbedding(ctx context.Context, text, embdID, tenantID string) ([]float64, error) { + if s.modelProvider == nil { + return nil, fmt.Errorf("model provider not set") + } + + if embdID == "" { + return nil, fmt.Errorf("embedding model ID not configured") + } + + embeddingModel, err := s.modelProvider.GetEmbeddingModel(tenantID, embdID) + if err != nil { + return nil, fmt.Errorf("failed to get embedding model: %w", err) + } + + // Truncate text to prevent exceeding model's max input length + maxLen := embeddingModel.MaxTokens + if maxLen <= 0 { + maxLen = defaultMaxLength + } + truncatedText := truncate(text, maxLen-10) + + vectors, err := embeddingModel.ModelDriver.Encode(embeddingModel.ModelName, []string{truncatedText}, embeddingModel.APIConfig, nil) + if err != nil { + return nil, fmt.Errorf("failed to encode text: %w", err) + } + if len(vectors) == 0 { + return nil, fmt.Errorf("embedding returned empty result") + } + + return vectors[0], nil +} + +// generateEmbeddings generates embeddings for multiple texts in batch +// This is more efficient than calling generateEmbedding individually +func (s *SkillIndexerService) generateEmbeddings(ctx context.Context, texts []string, embdID, tenantID string) ([][]float64, error) { + logger.Info(fmt.Sprintf("generateEmbeddings called: texts=%d, embdID=%s, tenantID=%s", len(texts), embdID, tenantID)) + + if s.modelProvider == nil { + return nil, fmt.Errorf("model provider not set") + } + + if embdID == "" { + return nil, fmt.Errorf("embedding model ID not configured") + } + + logger.Info(fmt.Sprintf("Getting embedding model for %s", embdID)) + embeddingModel, err := s.modelProvider.GetEmbeddingModel(tenantID, embdID) + if err != nil { + logger.Error(fmt.Sprintf("Failed to get embedding model: %v", err), err) + return nil, fmt.Errorf("failed to get embedding model: %w", err) + } + + // Truncate texts to prevent exceeding model's max input length + maxLen := embeddingModel.MaxTokens + if maxLen <= 0 { + maxLen = defaultMaxLength + } + truncatedTexts := make([]string, len(texts)) + for i, text := range texts { + truncatedTexts[i] = truncate(text, maxLen-10) + } + + logger.Info(fmt.Sprintf("Encoding %d texts", len(truncatedTexts))) + // Use batch encode API (consistent with Python's encode(texts: list)) + vectors, err := embeddingModel.ModelDriver.Encode(embeddingModel.ModelName, truncatedTexts, embeddingModel.APIConfig, nil) + if err != nil { + logger.Error(fmt.Sprintf("Failed to encode texts: %v", err), err) + return nil, fmt.Errorf("failed to encode texts: %w", err) + } + + logger.Info(fmt.Sprintf("Encoded successfully, got %d vectors", len(vectors))) + if len(vectors) > 0 { + logger.Info(fmt.Sprintf("Vector dimension: %d", len(vectors[0]))) + } + + return vectors, nil +} + +// truncate truncates text to maxLen characters +// Similar to Python's truncate function in rag/llm/embedding_model.py +func truncate(text string, maxLen int) string { + if maxLen <= 0 { + return text + } + runes := []rune(text) + if len(runes) <= maxLen { + return text + } + return string(runes[:maxLen]) +} + +// getEmbeddingDimension gets the embedding dimension by calling the embedding API with test text +// This follows Python's approach: use actual embedding result to determine dimension +// If embedding API fails, returns error (cannot create table without knowing dimension) +func (s *SkillIndexerService) getEmbeddingDimension(ctx context.Context, tenantID, embdID string) (int, error) { + if s.modelProvider == nil { + return 0, fmt.Errorf("model provider not set") + } + + if embdID == "" { + return 0, fmt.Errorf("embedding model ID not configured") + } + + embeddingModel, err := s.modelProvider.GetEmbeddingModel(tenantID, embdID) + if err != nil { + return 0, fmt.Errorf("failed to get embedding model: %w", err) + } + + // Use simple test text like Python does: embedding_model.encode(["ok"]) + testText := "ok" + vectors, err := embeddingModel.ModelDriver.Encode(embeddingModel.ModelName, []string{testText}, embeddingModel.APIConfig, nil) + if err != nil { + return 0, fmt.Errorf("failed to encode test text: %w", err) + } + + if len(vectors) == 0 || len(vectors[0]) == 0 { + return 0, fmt.Errorf("embedding returned empty vector") + } + + dimension := len(vectors[0]) + logger.Info(fmt.Sprintf("Got embedding dimension from API: %d", dimension)) + return dimension, nil +} diff --git a/internal/service/skill_search.go b/internal/service/skill_search.go new file mode 100644 index 0000000000..b96f076ca0 --- /dev/null +++ b/internal/service/skill_search.go @@ -0,0 +1,1039 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package service + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "ragflow/internal/common" + "ragflow/internal/dao" + "ragflow/internal/engine" + "ragflow/internal/engine/types" + "ragflow/internal/entity" + "ragflow/internal/logger" + "ragflow/internal/utility" + "strings" + + "github.com/google/uuid" + "go.uber.org/zap" +) + +// SkillSearchService handles business logic for skill search operations +type SkillSearchService struct { + configDAO *dao.SkillSearchConfigDAO + modelProvider *ModelProviderService +} + +// NewSkillSearchService creates a new SkillSearchService instance +func NewSkillSearchService() *SkillSearchService { + return &SkillSearchService{ + configDAO: dao.NewSkillSearchConfigDAO(), + modelProvider: NewModelProviderService(), + } +} + +// SetModelProvider sets the model provider for embedding generation +func (s *SkillSearchService) SetModelProvider(provider *ModelProviderService) { + s.modelProvider = provider +} + +// GetConfigRequest represents the request to get skill search config +type GetConfigRequest struct { + TenantID string `json:"tenant_id" binding:"required"` + SpaceID string `json:"space_id"` +} + +// GetConfig retrieves the search configuration for a tenant +func (s *SkillSearchService) GetConfig(tenantID, spaceID, embdID string) (map[string]interface{}, common.ErrorCode, error) { + spaceID = normalizeSpaceID(spaceID) + var config *entity.SkillSearchConfig + var err error + + if embdID == "" { + // If embd_id is not provided, get the latest config for the tenant + // Prioritize configs with non-empty embd_id (user-saved configs) + config, err = s.configDAO.GetLatestByTenantID(tenantID, spaceID) + if err != nil { + // No config found, return default config + config = &entity.SkillSearchConfig{ + TenantID: tenantID, + SpaceID: spaceID, + EmbdID: "", + VectorSimilarityWeight: 0.3, + SimilarityThreshold: 0.2, + FieldConfig: map[string]interface{}{ + "name": map[string]interface{}{"enabled": true, "weight": 3.0}, + "tags": map[string]interface{}{"enabled": true, "weight": 2.0}, + "description": map[string]interface{}{"enabled": true, "weight": 1.0}, + "content": map[string]interface{}{"enabled": false, "weight": 0.5}, + }, + TopK: 10, + } + } + } else { + config, err = s.configDAO.GetByTenantAndEmbdID(tenantID, spaceID, embdID) + if err != nil { + // Config not found, create default one + config, err = s.configDAO.GetOrCreate(tenantID, spaceID, embdID) + if err != nil { + return nil, common.CodeOperatingError, fmt.Errorf("failed to get or create config: %w", err) + } + } + } + + return config.ToMap(), common.CodeSuccess, nil +} + +// UpdateConfigRequest represents the request to update skill search config +type UpdateConfigRequest struct { + TenantID string `json:"tenant_id"` + SpaceID string `json:"space_id"` + EmbdID string `json:"embd_id" binding:"required"` + VectorSimilarityWeight float64 `json:"vector_similarity_weight"` + SimilarityThreshold float64 `json:"similarity_threshold"` + FieldConfig entity.FieldConfig `json:"field_config"` + RerankID string `json:"rerank_id"` + TopK int64 `json:"top_k"` +} + +// UpdateConfig updates the search configuration for a tenant +func (s *SkillSearchService) UpdateConfig(req *UpdateConfigRequest) (map[string]interface{}, common.ErrorCode, error) { + req.SpaceID = normalizeSpaceID(req.SpaceID) + // Validate vector_similarity_weight + if req.VectorSimilarityWeight < 0 || req.VectorSimilarityWeight > 1 { + return nil, common.CodeDataError, errors.New("vector_similarity_weight must be between 0 and 1") + } + + // Validate similarity_threshold + if req.SimilarityThreshold < 0 || req.SimilarityThreshold > 1 { + return nil, common.CodeDataError, errors.New("similarity_threshold must be between 0 and 1") + } + + // Validate top_k + if req.TopK <= 0 { + return nil, common.CodeDataError, errors.New("top_k must be positive") + } + + // Get or create config for this tenant+space (regardless of embd_id) + // Each tenant+space should have only ONE config, switching embd_id updates the existing config + config, err := s.configDAO.GetLatestByTenantID(req.TenantID, req.SpaceID) + if err != nil { + // No config exists, create a new one + config, err = s.configDAO.CreateWithTenantSpace(req.TenantID, req.SpaceID, req.EmbdID) + if err != nil { + return nil, common.CodeOperatingError, fmt.Errorf("failed to create config: %w", err) + } + } else { + // Config exists, clean up any other active records for this tenant+space + // to ensure only one active config per tenant+space + if err := s.configDAO.DeleteAllByTenantSpaceExceptID(req.TenantID, req.SpaceID, config.ID); err != nil { + logger.Warn("Failed to clean up duplicate configs", zap.Error(err)) + } + } + + fieldConfigMap := entity.JSONMap{ + "name": map[string]interface{}{ + "enabled": req.FieldConfig.Name.Enabled, + "weight": req.FieldConfig.Name.Weight, + }, + "tags": map[string]interface{}{ + "enabled": req.FieldConfig.Tags.Enabled, + "weight": req.FieldConfig.Tags.Weight, + }, + "description": map[string]interface{}{ + "enabled": req.FieldConfig.Description.Enabled, + "weight": req.FieldConfig.Description.Weight, + }, + "content": map[string]interface{}{ + "enabled": req.FieldConfig.Content.Enabled, + "weight": req.FieldConfig.Content.Weight, + }, + } + + updates := map[string]interface{}{ + "embd_id": req.EmbdID, // Always update embd_id to the new value + "vector_similarity_weight": req.VectorSimilarityWeight, + "similarity_threshold": req.SimilarityThreshold, + "field_config": fieldConfigMap, + "top_k": req.TopK, + } + + if req.RerankID != "" { + updates["rerank_id"] = req.RerankID + } + + // Update by config ID to ensure we update the correct record + if err := s.configDAO.Update(config.ID, updates); err != nil { + return nil, common.CodeOperatingError, fmt.Errorf("failed to update config: %w", err) + } + + // Refresh config + config, err = s.configDAO.GetByID(config.ID) + if err != nil { + return nil, common.CodeOperatingError, fmt.Errorf("failed to refresh config: %w", err) + } + + return config.ToMap(), common.CodeSuccess, nil +} + +// SearchRequest represents the skill search request +type SearchRequest struct { + TenantID string `json:"tenant_id"` // Set from user context, not from request body + SpaceID string `json:"space_id"` + Query string `json:"query"` // Empty query lists all skills (match_all) + Page int `json:"page"` + PageSize int `json:"page_size"` + SortBy string `json:"sort_by"` // Sort field: "name", "update_time", "create_time", "relevance" + SortOrder string `json:"sort_order"` // "asc" or "desc", default "desc" for time fields, "asc" for name +} + +// SearchResponse represents the skill search response +type SearchResponse struct { + Skills []entity.SkillSearchResult `json:"skills"` // Changed from "results" to match frontend + Total int64 `json:"total"` + Query string `json:"query"` + SearchType string `json:"search_type"` // "keyword", "vector", "hybrid" +} + +// Search performs skill search with the configured strategy +func (s *SkillSearchService) Search(ctx context.Context, req *SearchRequest, docEngine engine.DocEngine) (*SearchResponse, common.ErrorCode, error) { + req.SpaceID = normalizeSpaceID(req.SpaceID) + if req.Page <= 0 { + req.Page = 1 + } + if req.PageSize <= 0 { + req.PageSize = 10 + } + + // Check if index exists before searching + indexName := getSkillIndexName(req.TenantID, req.SpaceID) + logger.Debug("Searching skills", zap.String("indexName", indexName), zap.String("query", req.Query)) + + indexExists, err := docEngine.TableExists(ctx, indexName) + if err != nil { + logger.Error("Failed to check index existence", err) + return nil, common.CodeOperatingError, fmt.Errorf("failed to check index existence: %w", err) + } + logger.Debug("Index existence check", zap.String("indexName", indexName), zap.Bool("exists", indexExists)) + if !indexExists { + // Return empty result if index doesn't exist (no skills indexed yet) + // This allows listing skills via file system API as fallback + logger.Warn("Skill index does not exist, returning empty result", zap.String("indexName", indexName), zap.String("tenantID", req.TenantID), zap.String("spaceID", req.SpaceID)) + return &SearchResponse{ + Skills: []entity.SkillSearchResult{}, + Total: 0, + Query: req.Query, + SearchType: "keyword", + }, common.CodeSuccess, nil + } + + // Get config for search strategy + // Use GetLatestByTenantID to prioritize configs with non-empty embd_id + config, err := s.configDAO.GetLatestByTenantID(req.TenantID, req.SpaceID) + if err != nil { + // Use default config if not found + config = &entity.SkillSearchConfig{ + SpaceID: req.SpaceID, + VectorSimilarityWeight: 0.3, + SimilarityThreshold: 0.2, + FieldConfig: map[string]interface{}{ + "name": map[string]interface{}{"enabled": true, "weight": 3.0}, + "tags": map[string]interface{}{"enabled": true, "weight": 2.0}, + "description": map[string]interface{}{"enabled": true, "weight": 1.0}, + "content": map[string]interface{}{"enabled": false, "weight": 0.5}, + }, + TopK: 10, + } + } + + var results []entity.SkillSearchResult + searchType := "hybrid" + + // Check if embedding model is configured + hasEmbdConfig := config.EmbdID != "" + + switch { + case config.VectorSimilarityWeight == 0 || !hasEmbdConfig || req.Query == "": + // Pure keyword search (BM25) + // Also fallback to keyword search if no embedding model configured + // Or if query is empty (list all) + searchType = "keyword" + // For empty query (list all), pass threshold=0 to disable score filtering + threshold := config.SimilarityThreshold + if req.Query == "" { + threshold = 0 // Disable threshold for list all + } + results, err = s.keywordSearch(ctx, docEngine, indexName, req.Query, config, threshold, req.SortBy, req.SortOrder) + case config.VectorSimilarityWeight == 1 && req.Query != "": + // Pure vector search (skip if query is empty) + searchType = "vector" + results, err = s.vectorSearch(ctx, docEngine, indexName, req.Query, config, req.TenantID) + if err != nil { + logger.Warn("Vector search failed, falling back to keyword search", zap.Error(err)) + searchType = "keyword" + results, err = s.keywordSearch(ctx, docEngine, indexName, req.Query, config, config.SimilarityThreshold, req.SortBy, req.SortOrder) + } + default: + // Hybrid search (fallback to keyword if query is empty) + if req.Query == "" { + // Empty query: list all, disable threshold + results, err = s.keywordSearch(ctx, docEngine, indexName, req.Query, config, 0, req.SortBy, req.SortOrder) + } else { + results, err = s.hybridSearch(ctx, docEngine, indexName, req.Query, config, req.TenantID) + } + } + + if err != nil { + logger.Error("Skill search failed", err) + return nil, common.CodeOperatingError, fmt.Errorf("search failed: %w", err) + } + + // Apply pagination + total := int64(len(results)) + start := (req.Page - 1) * req.PageSize + end := start + req.PageSize + if start > int(total) { + start = int(total) + } + if end > int(total) { + end = int(total) + } + paginatedResults := results[start:end] + + return &SearchResponse{ + Skills: paginatedResults, + Total: total, + Query: req.Query, + SearchType: searchType, + }, common.CodeSuccess, nil +} + +// keywordSearch performs pure keyword search using BM25 +func (s *SkillSearchService) keywordSearch(ctx context.Context, docEngine engine.DocEngine, indexName, query string, config *entity.SkillSearchConfig, threshold float64, sortBy, sortOrder string) ([]entity.SkillSearchResult, error) { + // Build order_by for sorting + orderBy := buildOrderByExpr(sortBy, sortOrder, query == "") + + // Build MatchTextExpr for unified engine interface + // Note: MatchingText must be plain text, NOT ES query_string syntax. + // Infinity's MatchText expects plain text and tokenizes internally. + // ES's buildSkillKeywordQuery wraps it in a query_string query. + // Field names: Infinity uses raw names (name, tags, etc.), + // ES uses _tks suffix handled internally by elasticsearch/search.go + matchExpr := &types.MatchTextExpr{ + MatchingText: query, + // Skill index uses single tokenizer (rag-coarse) per field, no _sm variants needed. + // Infinity: convertMatchingField maps these to column@index_name format + // (e.g., name→name@ft_name_rag_coarse) + // ES: buildSkillKeywordQuery uses its own field list internally + Fields: []string{ + "name^10", + "tags^5", + "description^3", + "content^1", + }, + TopN: 100, + } + + // Use unified search request with analyzed query + searchReq := &types.SearchRequest{ + IndexNames: []string{indexName}, + Offset: 0, + Limit: 100, + MatchExprs: []interface{}{matchExpr}, + OrderBy: orderBy, + } + + searchResult, err := docEngine.Search(ctx, searchReq) + if err != nil { + return nil, err + } + + // Convert chunks to SkillSearchResult + return s.convertChunksToResults(searchResult.Chunks, threshold), nil +} + +// vectorSearch performs pure vector search +func (s *SkillSearchService) vectorSearch(ctx context.Context, docEngine engine.DocEngine, indexName, query string, config *entity.SkillSearchConfig, tenantID string) ([]entity.SkillSearchResult, error) { + // Get embedding for query + vector, err := s.getEmbedding(ctx, query, config.EmbdID, tenantID) + if err != nil { + logger.Warn("Vector search: failed to get embedding, will fallback to keyword search", + zap.String("embdID", config.EmbdID), + zap.Error(err)) + return nil, fmt.Errorf("failed to get embedding: %w", err) + } + logger.Debug("Vector search: successfully got embedding", + zap.String("embdID", config.EmbdID), + zap.Int("dimension", len(vector))) + + // Analyze query for potential keyword filtering + matchExpr := &types.MatchTextExpr{ + MatchingText: query, + Fields: []string{ + "name^10", + "tags^5", + "description^3", + "content^1", + }, + TopN: int(config.TopK), + } + + // Build MatchDenseExpr for vector search + vectorColumnName := fmt.Sprintf("q_%d_vec", len(vector)) + matchDense := &types.MatchDenseExpr{ + VectorColumnName: vectorColumnName, + EmbeddingData: vector, + EmbeddingDataType: "float", + DistanceType: "cosine", + TopN: int(config.TopK), + ExtraOptions: map[string]interface{}{ + "similarity": config.SimilarityThreshold, + }, + } + + // Use unified search request + searchReq := &types.SearchRequest{ + IndexNames: []string{indexName}, + Offset: 0, + Limit: 100, + MatchExprs: []interface{}{matchExpr, matchDense}, + } + + searchResult, err := docEngine.Search(ctx, searchReq) + if err != nil { + logger.Warn("Vector search: search execution failed", + zap.String("indexName", indexName), + zap.Error(err)) + return nil, err + } + + results := s.convertChunksToResults(searchResult.Chunks, config.SimilarityThreshold) + logger.Debug("Vector search: completed", + zap.Int("totalChunks", len(searchResult.Chunks)), + zap.Int("filteredResults", len(results))) + + // If no results, return error to trigger fallback + if len(results) == 0 { + logger.Info("Vector search: no results found, will fallback to keyword search", + zap.String("indexName", indexName), + zap.String("query", query)) + return nil, fmt.Errorf("vector search returned no results") + } + + return results, nil +} + +// hybridSearch performs hybrid search combining BM25 and vector search +func (s *SkillSearchService) hybridSearch(ctx context.Context, docEngine engine.DocEngine, indexName, query string, config *entity.SkillSearchConfig, tenantID string) ([]entity.SkillSearchResult, error) { + // Analyze query first: tokenize and extract keywords + matchExpr := &types.MatchTextExpr{ + MatchingText: query, + Fields: []string{ + "name^10", + "tags^5", + "description^3", + "content^1", + }, + TopN: int(config.TopK), + } + + // Get embedding for query + vector, err := s.getEmbedding(ctx, query, config.EmbdID, tenantID) + if err != nil { + logger.Warn("Hybrid search: failed to get embedding, falling back to keyword search", + zap.String("embdID", config.EmbdID), + zap.Error(err)) + // Fallback to keyword search with analyzed query + return s.executeKeywordSearch(ctx, docEngine, indexName, query, matchExpr, config) + } + logger.Debug("Hybrid search: successfully got embedding", + zap.String("embdID", config.EmbdID), + zap.Int("dimension", len(vector))) + + // Build MatchDenseExpr for hybrid search + vectorColumnName := fmt.Sprintf("q_%d_vec", len(vector)) + matchDense := &types.MatchDenseExpr{ + VectorColumnName: vectorColumnName, + EmbeddingData: vector, + EmbeddingDataType: "float", + DistanceType: "cosine", + TopN: int(config.TopK), + ExtraOptions: map[string]interface{}{ + "similarity": config.SimilarityThreshold, + "text_weight": 1.0 - config.VectorSimilarityWeight, + }, + } + + // Build FusionExpr for hybrid search (required by Infinity to combine text + vector scores) + textWeight := 1.0 - config.VectorSimilarityWeight + vectorWeight := config.VectorSimilarityWeight + fusionExpr := &types.FusionExpr{ + Method: "weighted_sum", + TopN: int(config.TopK), + FusionParams: map[string]interface{}{"weights": fmt.Sprintf("%.2f,%.2f", textWeight, vectorWeight)}, + } + + // Use unified search request for hybrid search with analyzed query + searchReq := &types.SearchRequest{ + IndexNames: []string{indexName}, + Offset: 0, + Limit: 100, + MatchExprs: []interface{}{matchExpr, matchDense, fusionExpr}, + } + + searchResult, err := docEngine.Search(ctx, searchReq) + if err != nil { + logger.Warn("Hybrid search: search execution failed, falling back to keyword search", + zap.String("indexName", indexName), + zap.Error(err)) + return s.executeKeywordSearch(ctx, docEngine, indexName, query, matchExpr, config) + } + + results := s.convertChunksToResults(searchResult.Chunks, config.SimilarityThreshold) + logger.Debug("Hybrid search completed", + zap.Int("totalChunks", len(searchResult.Chunks)), + zap.Int("filteredResults", len(results))) + + // If no results, fallback to keyword search + if len(results) == 0 { + logger.Info("Hybrid search: no results found, falling back to keyword search", + zap.String("indexName", indexName), + zap.String("query", query)) + return s.executeKeywordSearch(ctx, docEngine, indexName, query, matchExpr, config) + } + + return results, nil +} + +// executeKeywordSearch executes a keyword search (used for fallback) +func (s *SkillSearchService) executeKeywordSearch(ctx context.Context, docEngine engine.DocEngine, indexName, query string, matchExpr *types.MatchTextExpr, config *entity.SkillSearchConfig) ([]entity.SkillSearchResult, error) { + logger.Debug("Executing fallback keyword search", + zap.String("indexName", indexName), + zap.String("query", query)) + + searchReq := &types.SearchRequest{ + IndexNames: []string{indexName}, + Offset: 0, + Limit: 100, + MatchExprs: []interface{}{matchExpr}, + } + + searchResult, err := docEngine.Search(ctx, searchReq) + if err != nil { + logger.Error("Keyword search fallback failed", err) + return nil, err + } + + results := s.convertChunksToResults(searchResult.Chunks, config.SimilarityThreshold) + logger.Debug("Keyword search fallback completed", + zap.Int("totalChunks", len(searchResult.Chunks)), + zap.Int("results", len(results))) + + return results, nil +} + +// convertChunksToResults converts search chunks to SkillSearchResult +// Deduplicates by skill name, keeping only the highest scored result for each skill +func (s *SkillSearchService) convertChunksToResults(chunks []map[string]interface{}, threshold float64) []entity.SkillSearchResult { + // Use a map to deduplicate by skill name, keeping the highest scored version + skillMap := make(map[string]entity.SkillSearchResult) + + for _, chunk := range chunks { + // Get score + score := 0.0 + if scoreVal, ok := chunk["_score"].(float64); ok { + score = scoreVal + } + + // Extract BM25 and vector scores from Infinity columns + // Infinity returns "SCORE" for fulltext match and "SIMILARITY" for vector match + // Note: SCORE/SIMILARITY may be float32 or float64 depending on Infinity version + bm25Score := 0.0 + if scoreVal, ok := chunk["SCORE"]; ok { + if f, ok := utility.ToFloat64(scoreVal); ok { + bm25Score = f + } + } + vectorScore := 0.0 + if simVal, ok := chunk["SIMILARITY"]; ok { + if f, ok := utility.ToFloat64(simVal); ok { + vectorScore = f + } + } + // If _score is set but individual scores are 0, _score IS the BM25 score + if score > 0 && bm25Score == 0 && vectorScore == 0 { + bm25Score = score + } + + // Filter by threshold + if score < threshold { + continue + } + + // Extract fields + skillID := getString(chunk, "skill_id") + folderID := getString(chunk, "folder_id") + name := getString(chunk, "name") + description := getString(chunk, "description") + + // Extract tags (Infinity stores as comma-separated string, ES may return as string too) + var tags []string + if tagsVal, ok := chunk["tags"].([]interface{}); ok { + for _, tag := range tagsVal { + if tagStr, ok := tag.(string); ok { + tags = append(tags, tagStr) + } + } + } else if tagsStr, ok := chunk["tags"].(string); ok && tagsStr != "" { + for _, tag := range strings.Split(tagsStr, ",") { + tag = strings.TrimSpace(tag) + if tag != "" { + tags = append(tags, tag) + } + } + } + + // Use skill name as the deduplication key (skillID may contain version suffix) + skillKey := name + if skillKey == "" { + skillKey = skillID + } + + // Extract create_time + var createTime int64 + if ctVal, ok := chunk["create_time"].(float64); ok { + createTime = int64(ctVal) + } else if ctVal, ok := chunk["create_time"].(int64); ok { + createTime = ctVal + } + + // Extract version + version := getString(chunk, "version") + + result := entity.SkillSearchResult{ + SkillID: skillID, + FolderID: folderID, + Name: name, + Description: description, + Tags: tags, + Score: score, + BM25Score: bm25Score, + VectorScore: vectorScore, + CreateTime: createTime, + Version: version, + } + + // Keep only the highest scored result for each skill + if existing, ok := skillMap[skillKey]; !ok || score > existing.Score { + skillMap[skillKey] = result + } + } + + // Convert map to slice + var results []entity.SkillSearchResult + for _, result := range skillMap { + results = append(results, result) + } + + // Sort by score descending + sortResults(results) + + return results +} + +// getEmbedding generates embedding for text using the specified model +func (s *SkillSearchService) getEmbedding(ctx context.Context, text, embdID, tenantID string) ([]float64, error) { + if s.modelProvider == nil { + return nil, fmt.Errorf("model provider not set") + } + + if embdID == "" { + return nil, fmt.Errorf("embedding model ID not configured") + } + + embeddingModel, err := s.modelProvider.GetEmbeddingModel(tenantID, embdID) + if err != nil { + return nil, fmt.Errorf("failed to get embedding model: %w", err) + } + + // Truncate text to prevent exceeding model's max input length + maxLen := embeddingModel.MaxTokens + if maxLen <= 0 { + maxLen = defaultMaxLength + } + truncatedText := truncate(text, maxLen-10) + + vectors, err := embeddingModel.ModelDriver.Encode(embeddingModel.ModelName, []string{truncatedText}, embeddingModel.APIConfig, nil) + if err != nil { + return nil, fmt.Errorf("failed to encode query: %w", err) + } + if len(vectors) == 0 { + return nil, fmt.Errorf("embedding returned empty result") + } + + return vectors[0], nil +} + +// Helper functions +func getSkillIndexName(tenantID, spaceID string) string { + spaceID = normalizeSpaceID(spaceID) + spaceID = strings.ToLower(spaceID) + replacer := strings.NewReplacer("-", "_", "/", "_", "\\", "_", " ", "_", ".", "_", ":", "_") + sanitizedSpaceID := replacer.Replace(spaceID) + + // Generate unique, deterministic suffix from full IDs to avoid collisions + // Use SHA-256 hash of the combined tenantID and sanitizedSpaceID + hash := sha256.Sum256([]byte(tenantID + "_" + sanitizedSpaceID)) + hashStr := hex.EncodeToString(hash[:])[:16] // Take first 16 hex chars (64-bit entropy) + + // Use full IDs if they fit within reasonable length, otherwise use hash to ensure uniqueness + const maxIDLen = 32 // Maximum length for each ID component + uniqueTenant := tenantID + if len(tenantID) > maxIDLen { + uniqueTenant = tenantID[:maxIDLen] + "_" + hashStr[:8] + } + uniqueSpace := sanitizedSpaceID + if len(sanitizedSpaceID) > maxIDLen { + uniqueSpace = sanitizedSpaceID[:maxIDLen] + "_" + hashStr[8:16] + } + + return fmt.Sprintf("skill_%s_%s", uniqueTenant, uniqueSpace) +} + +func normalizeSpaceID(spaceID string) string { + spaceID = strings.TrimSpace(spaceID) + if spaceID == "" { + return "default" + } + return spaceID +} + + + +func getString(m map[string]interface{}, key string) string { + if v, ok := m[key].(string); ok { + return v + } + return "" +} + +func sortResults(results []entity.SkillSearchResult) { + // Simple bubble sort for now, could use sort.Slice + for i := 0; i < len(results); i++ { + for j := i + 1; j < len(results); j++ { + if results[j].Score > results[i].Score { + results[i], results[j] = results[j], results[i] + } + } + } +} + +// GenerateID generates a unique ID +func generateID() string { + return strings.ReplaceAll(uuid.New().String(), "-", "")[:32] +} + +// CalculateContentHash calculates SHA256 hash of skill content +func CalculateContentHash(name, description string, tags []string, content string) string { + h := sha256.New() + h.Write([]byte(name)) + h.Write([]byte(description)) + for _, tag := range tags { + h.Write([]byte(tag)) + } + h.Write([]byte(content)) + return hex.EncodeToString(h.Sum(nil)) +} + +// BuildVectorText builds the text for vector generation +func BuildVectorText(name, description string, tags []string, content string, fieldConfig entity.FieldConfig) string { + var parts []string + + if fieldConfig.Name.Enabled && name != "" { + parts = append(parts, name) + } + if fieldConfig.Tags.Enabled && len(tags) > 0 { + parts = append(parts, strings.Join(tags, " ")) + } + if fieldConfig.Description.Enabled && description != "" { + parts = append(parts, description) + } + if fieldConfig.Content.Enabled && content != "" { + parts = append(parts, content) + } + + return strings.Join(parts, "\n\n") +} + +// analyzeQuery analyzes the search query and extracts keywords +// Similar to Python's FulltextQueryer.question method +func (s *SkillSearchService) analyzeQuery(query string) (matchText string, keywords []string) { + if query == "" { + return "", nil + } + + // Clean and normalize query + cleaned := s.cleanQueryText(query) + + // Extract keywords by tokenizing + keywords = s.tokenize(cleaned) + + // Build match text for ES query_string + // Similar to Python's query building logic + matchText = s.buildMatchText(cleaned, keywords) + + return matchText, keywords +} + +// cleanQueryText cleans and normalizes query text +func (s *SkillSearchService) cleanQueryText(text string) string { + // Convert to lowercase + text = strings.ToLower(text) + + // Replace special characters with spaces + // Similar to Python: re.sub(r"[ :|\r\n\t,,。??/`!!&^%%()\[\]{}<>]+", " ", text) + specialChars := []string{ + ":", "|", "\r", "\n", "\t", ",", ",", "。", "?", "?", "/", "`", + "!", "!", "&", "^", "%", "(", ")", "[", "]", "{", "}", "<", ">", + } + for _, char := range specialChars { + text = strings.ReplaceAll(text, char, " ") + } + + // Remove extra spaces + fields := strings.Fields(text) + return strings.Join(fields, " ") +} + +// tokenize splits text into tokens/keywords +func (s *SkillSearchService) tokenize(text string) []string { + if text == "" { + return nil + } + + // Simple tokenization by splitting on whitespace + // For Chinese text, this keeps characters together + fields := strings.Fields(text) + + // Remove duplicates and empty strings + seen := make(map[string]bool) + var keywords []string + for _, field := range fields { + field = strings.TrimSpace(field) + if field == "" || seen[field] { + continue + } + seen[field] = true + keywords = append(keywords, field) + + // For longer tokens, also add sub-tokens (for Chinese fine-grained tokenization) + if len([]rune(field)) > 2 { + runes := []rune(field) + for i := 0; i < len(runes)-1; i++ { + bigram := string(runes[i : i+2]) + if !seen[bigram] { + seen[bigram] = true + keywords = append(keywords, bigram) + } + } + } + } + + // Limit keywords to avoid too many + if len(keywords) > 32 { + keywords = keywords[:32] + } + + return keywords +} + +// buildMatchText builds the match text for ES query_string +// Similar to Python's FulltextQueryer.question output +func (s *SkillSearchService) buildMatchText(originalText string, keywords []string) string { + if len(keywords) == 0 { + return originalText + } + + // Build boosted query for keywords + // Similar to Python: "(keyword1^weight1 keyword2^weight2 ...)" + var parts []string + + // Add the original text with high boost + if originalText != "" { + parts = append(parts, fmt.Sprintf("(\"%s\")^2.0", originalText)) + } + + // Add individual keywords with decreasing weights + for i, keyword := range keywords { + if keyword == "" { + continue + } + // First few keywords get higher weight + weight := 1.0 + if i < 3 { + weight = 1.5 + } else if i < 6 { + weight = 1.2 + } + + // Escape special characters in keyword + escaped := s.escapeQueryString(keyword) + parts = append(parts, fmt.Sprintf("(%s)^%.1f", escaped, weight)) + } + + // Join with OR operator + return strings.Join(parts, " OR ") +} + +// escapeQueryString escapes special characters for ES query_string +func (s *SkillSearchService) escapeQueryString(text string) string { + specialChars := []string{"\\", "+", "-", "=", "&&", "||", ">", "<", "!", "(", ")", "{", "}", "[", "]", "^", "\"", "~", "*", "?", ":", "/"} + result := text + for _, char := range specialChars { + result = strings.ReplaceAll(result, char, "\\"+char) + } + return result +} + +// SkillInfo represents skill information for indexing +type SkillInfo struct { + ID string `json:"id"` + FolderID string `json:"folder_id"` // File system folder ID for retrieving files + Name string `json:"name"` + Description string `json:"description"` + Tags []string `json:"tags"` + Content string `json:"content"` + Version string `json:"version"` // Skill version (e.g., "1.0.0") +} + +// IndexSkillsRequest represents the request to index skills +type IndexSkillsRequest struct { + TenantID string `json:"tenant_id" binding:"required"` + Skills []SkillInfo `json:"skills" binding:"required"` +} + +// ReindexRequest represents the request to reindex all skills +type ReindexRequest struct { + TenantID string `json:"tenant_id" binding:"required"` + SpaceID string `json:"space_id" binding:"required"` + EmbdID string `json:"embd_id"` // Optional, will use config's embd_id if empty +} + +// buildOrderBy builds the order_by string for sorting +// For empty queries (list all), default sort is by update_time desc +// For search queries, default sort is by relevance (score) +func (s *SkillSearchService) buildOrderBy(sortBy, sortOrder string, isEmptyQuery bool) string { + // Normalize sort_by + if sortBy == "" { + if isEmptyQuery { + sortBy = "update_time" + } else { + return "" // Use default relevance sorting for search + } + } + + // Normalize sort_order + order := strings.ToLower(sortOrder) + if order != "asc" && order != "desc" { + // Default order: desc for time fields, asc for name + if sortBy == "name" { + order = "asc" + } else { + order = "desc" + } + } + + // Map frontend field names to backend field names + fieldMapping := map[string]string{ + "name": "name", + "update_time": "update_time", + "create_time": "create_time", + "updateTime": "update_time", + "createTime": "create_time", + "relevance": "", // Empty means sort by score/relevance + "updated_at": "update_time", + "created_at": "create_time", + } + + backendField, ok := fieldMapping[sortBy] + if !ok { + backendField = sortBy + } + + if backendField == "" { + return "" // Relevance sorting + } + + return backendField + " " + order +} + +// buildOrderByExpr converts sort parameters to types.OrderByExpr for the unified engine interface +func buildOrderByExpr(sortBy, sortOrder string, isEmptyQuery bool) *types.OrderByExpr { + // Normalize sort_by + if sortBy == "" { + if isEmptyQuery { + sortBy = "update_time" + } else { + return nil // Use default relevance sorting for search + } + } + + // Normalize sort_order + order := strings.ToLower(sortOrder) + if order != "asc" && order != "desc" { + if sortBy == "name" { + order = "asc" + } else { + order = "desc" + } + } + + // Map frontend field names to backend field names + fieldMapping := map[string]string{ + "name": "name", + "update_time": "update_time", + "create_time": "create_time", + "updateTime": "update_time", + "createTime": "create_time", + "relevance": "", + "updated_at": "update_time", + "created_at": "create_time", + } + + backendField, ok := fieldMapping[sortBy] + if !ok { + backendField = sortBy + } + + if backendField == "" { + return nil // Relevance sorting + } + + orderType := types.SortAsc + if order == "desc" { + orderType = types.SortDesc + } + + return &types.OrderByExpr{ + Fields: []types.OrderByField{ + {Field: backendField, Type: orderType}, + }, + } +} diff --git a/internal/service/skill_space.go b/internal/service/skill_space.go new file mode 100644 index 0000000000..29d1b780f9 --- /dev/null +++ b/internal/service/skill_space.go @@ -0,0 +1,645 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package service + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "ragflow/internal/common" + "ragflow/internal/dao" + "ragflow/internal/engine" + "ragflow/internal/entity" + "ragflow/internal/logger" + "strings" + "sync" + "time" + + "github.com/google/uuid" + "go.uber.org/zap" +) + +// SkillSpaceService handles business logic for skills space operations +type SkillSpaceService struct { + spaceDAO *dao.SkillSpaceDAO + fileDAO *dao.FileDAO + configDAO *dao.SkillSearchConfigDAO + tenantDAO *dao.TenantDAO + skillsFolderCache map[string]string // tenant-keyed cache for skills folder ID + skillsFolderMu sync.RWMutex // protects skillsFolderCache + skillsFolderCreateMu sync.Map // tenant-scoped locks for folder creation + spaceCreateMu sync.Map // tenant-scoped locks for space creation (prevents TOCTOU races) +} + +// NewSkillSpaceService creates a new SkillSpaceService instance +func NewSkillSpaceService() *SkillSpaceService { + return &SkillSpaceService{ + spaceDAO: dao.NewSkillSpaceDAO(), + fileDAO: dao.NewFileDAO(), + configDAO: dao.NewSkillSearchConfigDAO(), + tenantDAO: dao.NewTenantDAO(), + skillsFolderCache: make(map[string]string), + } +} + +// CreateSpaceRequest represents the request to create a skills space +type CreateSpaceRequest struct { + TenantID string `json:"tenant_id" binding:"required"` + Name string `json:"name" binding:"required"` + Description string `json:"description"` + EmbdID string `json:"embd_id"` + RerankID string `json:"rerank_id"` +} + +// UpdateSpaceRequest represents the request to update a skills space +type UpdateSpaceRequest struct { + Name string `json:"name"` + Description string `json:"description"` + EmbdID string `json:"embd_id"` + RerankID string `json:"rerank_id"` + TopK int `json:"top_k"` +} + +// getSkillsFolderID gets or creates the skills folder for a tenant +// Uses tenant-scoped locking to prevent duplicate folder creation +func (s *SkillSpaceService) getSkillsFolderID(tenantID string) (string, error) { + // Return cached value if available (read lock) + s.skillsFolderMu.RLock() + if cachedID, ok := s.skillsFolderCache[tenantID]; ok && cachedID != "" { + s.skillsFolderMu.RUnlock() + return cachedID, nil + } + s.skillsFolderMu.RUnlock() + + // Acquire tenant-scoped creation lock + lock, _ := s.skillsFolderCreateMu.LoadOrStore(tenantID, &sync.Mutex{}) + lock.(*sync.Mutex).Lock() + defer lock.(*sync.Mutex).Unlock() + + // Double-check cache after acquiring lock + s.skillsFolderMu.RLock() + if cachedID, ok := s.skillsFolderCache[tenantID]; ok && cachedID != "" { + s.skillsFolderMu.RUnlock() + return cachedID, nil + } + s.skillsFolderMu.RUnlock() + + // Get root folder + rootFolder, err := s.fileDAO.GetRootFolder(tenantID) + if err != nil { + return "", fmt.Errorf("failed to get root folder: %w", err) + } + + // Look for skills folder under root + files, _, err := s.fileDAO.GetByPfID(tenantID, rootFolder.ID, 0, 0, "name", false, "") + if err != nil { + return "", fmt.Errorf("failed to list root folder contents: %w", err) + } + + for _, file := range files { + if file.Type == "folder" && file.Name == "skills" { + // Cache the result (write lock) + s.skillsFolderMu.Lock() + s.skillsFolderCache[tenantID] = file.ID + s.skillsFolderMu.Unlock() + return file.ID, nil + } + } + + // Skills folder not found, create it + logger.Info("Creating skills folder", zap.String("tenant_id", tenantID)) + folderID := generateSpaceID() + now := time.Now() + createTime := now.UnixMilli() + folder := &entity.File{ + ID: folderID, + ParentID: rootFolder.ID, + TenantID: tenantID, + CreatedBy: tenantID, + Name: "skills", + Type: "folder", + Size: 0, + SourceType: "system", + BaseModel: entity.BaseModel{ + CreateTime: &createTime, + UpdateTime: &createTime, + CreateDate: &now, + UpdateDate: &now, + }, + } + + if err := s.fileDAO.Create(folder); err != nil { + return "", fmt.Errorf("failed to create skills folder: %w", err) + } + + // Cache the result (write lock) + s.skillsFolderMu.Lock() + s.skillsFolderCache[tenantID] = folderID + s.skillsFolderMu.Unlock() + + return folderID, nil +} + +// CreateSpace creates a new skills space with associated folder +func (s *SkillSpaceService) CreateSpace(req *CreateSpaceRequest) (map[string]interface{}, common.ErrorCode, error) { + // Validate name + if req.Name == "" { + return nil, common.CodeDataError, fmt.Errorf("space name is required") + } + + // Tenant-scoped serialization to prevent concurrent create/delete races + tenantKey := req.TenantID + ":" + req.Name + mu, _ := s.spaceCreateMu.LoadOrStore(tenantKey, &sync.Mutex{}) + tenantMu := mu.(*sync.Mutex) + tenantMu.Lock() + defer func() { + tenantMu.Unlock() + s.spaceCreateMu.Delete(tenantKey) + }() + + // Double-check after acquiring lock: Check if space with same name already exists (active status) + existingSpace, err := s.spaceDAO.GetByTenantAndName(req.TenantID, req.Name) + if err != nil { + // Space doesn't exist, continue + } else if existingSpace != nil { + return nil, common.CodeDataError, fmt.Errorf("space with name '%s' already exists", req.Name) + } + + // Check if there's a space with the same name that is currently being deleted + existingSpaceAny, err := s.spaceDAO.GetByTenantAndNameAnyStatus(req.TenantID, req.Name) + if err == nil && existingSpaceAny != nil && existingSpaceAny.Status == entity.SpaceStatusDeleting { + return nil, common.CodeDataError, fmt.Errorf("space with name '%s' is being deleted, please try again later", req.Name) + } + + // Check if there's a deleted/non-active space with the same name and permanently delete it + // This handles the case where a previous creation failed partially + // Only delete non-active spaces (status != '1') to prevent TOCTOU race + if err := s.spaceDAO.DeletePermanentByName(req.TenantID, req.Name); err != nil { + logger.Warn("Failed to delete permanent space by name", zap.Error(err)) + } + + // Get skills folder ID + skillsFolderID, err := s.getSkillsFolderID(req.TenantID) + if err != nil { + logger.Error("Failed to get skills folder ID", err) + return nil, common.CodeOperatingError, err + } + + // Check if there's an existing folder with the same name under skills folder + // If exists, delete it to prevent duplicate folder names + existingFolders := s.fileDAO.Query(req.Name, skillsFolderID) + for _, f := range existingFolders { + if f.Type == "folder" && f.Name == req.Name { + logger.Info("Deleting existing space folder with same name", zap.String("folderID", f.ID), zap.String("name", req.Name)) + if err := s.deleteFolderRecursive(f.ID); err != nil { + logger.Warn("Failed to delete existing folder", zap.String("folderID", f.ID), zap.Error(err)) + } + break + } + } + + // Generate space ID and folder ID + spaceID := generateSpaceID() + folderID := generateSpaceID() + timestamp := time.Now().UnixMilli() + now := time.Now() + + // Create folder for the space under skills folder + folder := &entity.File{ + ID: folderID, + ParentID: skillsFolderID, + TenantID: req.TenantID, + CreatedBy: req.TenantID, + Name: req.Name, + Type: "folder", + Size: 0, + SourceType: "skill_space", + } + + if err := s.fileDAO.Create(folder); err != nil { + logger.Error("Failed to create space folder", err) + return nil, common.CodeOperatingError, fmt.Errorf("failed to create space folder: %w", err) + } + + // Create the space + space := &entity.SkillSpace{ + ID: spaceID, + TenantID: req.TenantID, + Name: req.Name, + FolderID: folderID, + Description: req.Description, + EmbdID: req.EmbdID, + RerankID: req.RerankID, + TopK: 10, + Status: "1", + CreateTime: ×tamp, + UpdateTime: &now, + } + + if err := s.spaceDAO.Create(space); err != nil { + // Rollback: delete the created folder + logger.Error("Failed to create space in database", err) + s.fileDAO.DeleteByIDs([]string{folderID}) + return nil, common.CodeOperatingError, fmt.Errorf("failed to create space: %w", err) + } + + // Create default search config for this space + defaultEmbdID := req.EmbdID + if defaultEmbdID == "" { + tenant, err := s.tenantDAO.GetByID(req.TenantID) + if err == nil && tenant != nil && tenant.EmbdID != "" { + defaultEmbdID = tenant.EmbdID + logger.Info("Using tenant default embedding model", zap.String("tenantID", req.TenantID), zap.String("embdID", defaultEmbdID)) + } else { + logger.Warn("Tenant has no default embedding model, skill search will not work until configured", zap.String("tenantID", req.TenantID)) + } + } + if defaultEmbdID != "" { + if _, err := s.configDAO.GetOrCreate(req.TenantID, spaceID, defaultEmbdID); err != nil { + logger.Warn("Failed to create skill search config for new space", + zap.String("tenantID", req.TenantID), + zap.String("spaceID", spaceID), + zap.String("embdID", defaultEmbdID), + zap.Error(err)) + } + } + + return space.ToMap(), common.CodeSuccess, nil +} + +// ListSpaces lists all skills spaces for a tenant +func (s *SkillSpaceService) ListSpaces(tenantID string) (map[string]interface{}, common.ErrorCode, error) { + spaces, err := s.spaceDAO.GetByTenantID(tenantID) + if err != nil { + return nil, common.CodeOperatingError, fmt.Errorf("failed to list spaces: %w", err) + } + + // Convert to maps + spaceList := make([]map[string]interface{}, len(spaces)) + for i, space := range spaces { + spaceList[i] = space.ToMap() + } + + return map[string]interface{}{ + "spaces": spaceList, + "total": len(spaceList), + }, common.CodeSuccess, nil +} + +// GetSpace retrieves a skills space by ID (includes deleting status for visibility) +func (s *SkillSpaceService) GetSpace(spaceID, tenantID string) (map[string]interface{}, common.ErrorCode, error) { + space, err := s.spaceDAO.GetByIDAnyStatus(spaceID) + if err != nil { + return nil, common.CodeDataError, fmt.Errorf("space not found") + } + + // Verify tenant ownership + if space.TenantID != tenantID { + return nil, common.CodeDataError, fmt.Errorf("space not found") + } + + // Return deleted spaces as not found + if space.Status == entity.SpaceStatusDeleted { + return nil, common.CodeDataError, fmt.Errorf("space not found") + } + + return space.ToMap(), common.CodeSuccess, nil +} + +// UpdateSpace updates a skills space +func (s *SkillSpaceService) UpdateSpace(spaceID string, tenantID string, req *UpdateSpaceRequest) (map[string]interface{}, common.ErrorCode, error) { + space, err := s.spaceDAO.GetByID(spaceID) + if err != nil { + return nil, common.CodeDataError, fmt.Errorf("space not found") + } + + // Verify tenant ownership + if space.TenantID != tenantID { + return nil, common.CodeDataError, fmt.Errorf("space not found") + } + + // Build updates + updates := make(map[string]interface{}) + + if req.Name != "" && req.Name != space.Name { + // Check if name already exists + existingSpace, _ := s.spaceDAO.GetByTenantAndName(tenantID, req.Name) + if existingSpace != nil && existingSpace.ID != spaceID { + return nil, common.CodeDataError, fmt.Errorf("space with name '%s' already exists", req.Name) + } + + originalName := space.Name + updates["name"] = req.Name + + // Update space first, then folder (atomic-like behavior with rollback on failure) + if err := s.spaceDAO.UpdateByID(spaceID, updates); err != nil { + return nil, common.CodeOperatingError, fmt.Errorf("failed to update space name: %w", err) + } + + // Update folder name as well - if this fails, rollback space name + if err := s.fileDAO.UpdateByID(space.FolderID, map[string]interface{}{"name": req.Name}); err != nil { + logger.Error("Failed to update folder name, rolling back space name", err) + // Rollback space name + if rollbackErr := s.spaceDAO.UpdateByID(spaceID, map[string]interface{}{"name": originalName}); rollbackErr != nil { + logger.Error("Failed to rollback space name after folder rename failure", rollbackErr) + } + return nil, common.CodeOperatingError, fmt.Errorf("failed to update folder name: %w", err) + } + + // Clear updates map since we've already applied name change + delete(updates, "name") + } + + if req.Description != space.Description { + updates["description"] = req.Description + } + if req.EmbdID != "" && req.EmbdID != space.EmbdID { + updates["embd_id"] = req.EmbdID + } + if req.RerankID != space.RerankID { + updates["rerank_id"] = req.RerankID + } + if req.TopK > 0 && req.TopK != space.TopK { + updates["top_k"] = req.TopK + } + + if len(updates) > 0 { + if err := s.spaceDAO.UpdateByID(spaceID, updates); err != nil { + return nil, common.CodeOperatingError, fmt.Errorf("failed to update space: %w", err) + } + } + + // Refresh space data + space, _ = s.spaceDAO.GetByID(spaceID) + return space.ToMap(), common.CodeSuccess, nil +} + +// getPythonServiceURL returns the Python service URL from environment or default +func getPythonServiceURL() string { + url := os.Getenv("PYTHON_SERVICE_URL") + if url == "" { + url = "http://127.0.0.1:9380" + } + // Ensure URL has scheme + if !strings.HasPrefix(url, "http://") && !strings.HasPrefix(url, "https://") { + url = "http://" + url + } + // Ensure URL has the API path + if !strings.HasSuffix(url, "/api/v1/files") { + url = strings.TrimSuffix(url, "/") + url = url + "/api/v1/files" + } + return url +} + +// deleteFolderViaPythonAPI calls Python backend API to delete folder and its storage +func (s *SkillSpaceService) deleteFolderViaPythonAPI(folderID, tenantID, authHeader string) error { + pythonURL := getPythonServiceURL() + + reqBody := map[string]interface{}{ + "ids": []string{folderID}, + } + jsonData, err := json.Marshal(reqBody) + if err != nil { + return fmt.Errorf("failed to marshal request: %w", err) + } + + req, err := http.NewRequest("DELETE", pythonURL, bytes.NewBuffer(jsonData)) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + // Use request context with timeout to prevent indefinite blocking + deleteCtx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + req = req.WithContext(deleteCtx) + + req.Header.Set("Content-Type", "application/json") + // Extract raw token from "Bearer <token>" format if present + // Python backend needs the raw token for authentication + authToken := authHeader + if strings.HasPrefix(strings.ToLower(authHeader), "bearer ") { + authToken = strings.TrimSpace(authHeader[7:]) + } + req.Header.Set("Authorization", authToken) + // Set tenant ID header for Python backend + req.Header.Set("X-tenant-id", tenantID) + + logger.Info("Calling Python API to delete folder", zap.String("folderID", folderID), zap.String("tenantID", tenantID)) + + client := &http.Client{Timeout: 60 * time.Second} + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("failed to call Python API: %w", err) + } + defer resp.Body.Close() + + body, _ := io.ReadAll(resp.Body) + logger.Info("Python API delete folder response", zap.String("folderID", folderID), zap.Int("status", resp.StatusCode), zap.String("body", string(body))) + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("Python API returned status %d: %s", resp.StatusCode, string(body)) + } + + // Parse response to check if deletion was successful + var result map[string]interface{} + if err := json.Unmarshal(body, &result); err != nil { + return fmt.Errorf("failed to parse response: %w", err) + } + + if code, ok := result["code"].(float64); !ok || int(code) != 0 { + message := "unknown error" + if msg, ok := result["message"].(string); ok { + message = msg + } + return fmt.Errorf("Python API returned error: %s", message) + } + + logger.Info("Successfully deleted folder via Python API", zap.String("folderID", folderID)) + return nil +} + +// DeleteSpace starts asynchronous deletion of a skills space and returns immediately. +// The space status is set to "deleting" and the actual cleanup runs in a background goroutine. +func (s *SkillSpaceService) DeleteSpace(spaceID, tenantID string, docEngine engine.DocEngine, authHeader string) (common.ErrorCode, error) { + // Get space regardless of status (could be retrying a failed delete) + space, err := s.spaceDAO.GetByIDAnyStatus(spaceID) + if err != nil { + return common.CodeDataError, fmt.Errorf("space not found") + } + + // Verify tenant ownership + if space.TenantID != tenantID { + return common.CodeDataError, fmt.Errorf("space not found") + } + + // If already deleting, return success (idempotent) + if space.Status == entity.SpaceStatusDeleting { + logger.Info("Space is already being deleted", zap.String("spaceID", spaceID)) + return common.CodeSuccess, nil + } + + // If already deleted, return success (idempotent) + if space.Status == entity.SpaceStatusDeleted { + logger.Info("Space is already deleted", zap.String("spaceID", spaceID)) + return common.CodeSuccess, nil + } + + // CAS: status must be "1" (active) → "2" (deleting) to prevent concurrent deletes + swapped, err := s.spaceDAO.CASStatus(spaceID, entity.SpaceStatusActive, entity.SpaceStatusDeleting) + if err != nil { + return common.CodeOperatingError, fmt.Errorf("failed to update space status: %w", err) + } + if !swapped { + // Another request already changed the status + return common.CodeOperatingError, fmt.Errorf("space is being modified by another request") + } + + logger.Info("Space marked as deleting, starting async cleanup", zap.String("spaceID", spaceID), zap.String("tenantID", tenantID)) + + // Launch async deletion in background goroutine + go s.asyncDeleteSpace(spaceID, space.FolderID, tenantID, docEngine, authHeader) + + return common.CodeSuccess, nil +} + +// asyncDeleteSpace performs the actual deletion work in the background. +// It deletes the search index, removes files via Python API, and soft-deletes the space record. +func (s *SkillSpaceService) asyncDeleteSpace(spaceID, folderID, tenantID string, docEngine engine.DocEngine, authHeader string) { + defer func() { + if r := recover(); r != nil { + logger.Warn("Panic in asyncDeleteSpace, marking space as deleted", zap.Any("recover", r), zap.String("spaceID", spaceID)) + _, _ = s.spaceDAO.CASStatus(spaceID, entity.SpaceStatusDeleting, entity.SpaceStatusDeleted) + } + }() + + // Step 1: Delete the search index + if docEngine != nil { + indexName := getSkillIndexName(tenantID, spaceID) + logger.Info("Async deleting space index", zap.String("index", indexName), zap.String("spaceID", spaceID)) + deleteCtx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + if err := docEngine.DropTable(deleteCtx, indexName); err != nil { + logger.Warn("Failed to delete space index during async delete", zap.String("index", indexName), zap.Error(err)) + // Continue with other cleanup steps + } else { + logger.Info("Successfully deleted space index", zap.String("index", indexName)) + } + cancel() + } + + // Step 2: Delete folder and storage via Python API + logger.Info("Async deleting space folder via Python API", zap.String("folderID", folderID), zap.String("spaceID", spaceID)) + if err := s.deleteFolderViaPythonAPI(folderID, tenantID, authHeader); err != nil { + logger.Error(fmt.Sprintf("Failed to delete space folder via Python API during async delete, spaceID=%s", spaceID), err) + // Retry once with a delay + time.Sleep(5 * time.Second) + if retryErr := s.deleteFolderViaPythonAPI(folderID, tenantID, authHeader); retryErr != nil { + logger.Error(fmt.Sprintf("Retry failed to delete space folder, marking space as deleted anyway, spaceID=%s", spaceID), retryErr) + // Mark as deleted even if folder deletion fails - orphaned folders can be cleaned up later + } + } else { + logger.Info("Successfully deleted space folder via Python API", zap.String("folderID", folderID)) + } + + // Step 3: Soft delete the space record (status "2" → "0") + // First, permanently remove any previously deleted spaces with the same tenant+name + // to avoid UNIQUE INDEX constraint violation when changing status from "2" to "0" + space, err := s.spaceDAO.GetByIDAnyStatus(spaceID) + if err == nil && space != nil { + _ = s.spaceDAO.DeletePermanentByName(space.TenantID, space.Name) + } + + swapped, err := s.spaceDAO.CASStatus(spaceID, entity.SpaceStatusDeleting, entity.SpaceStatusDeleted) + if err != nil { + logger.Error(fmt.Sprintf("Failed to update space status to deleted, spaceID=%s", spaceID), err) + return + } + if !swapped { + logger.Warn("Space status was not 'deleting' when trying to mark as deleted", zap.String("spaceID", spaceID)) + return + } + + logger.Info("Successfully completed async space deletion", zap.String("spaceID", spaceID)) +} + +// deleteFolderRecursive recursively deletes a folder and all its contents +func (s *SkillSpaceService) deleteFolderRecursive(folderID string) error { + // Get all children + children, err := s.fileDAO.ListByParentID(folderID) + if err != nil { + logger.Error(fmt.Sprintf("Failed to list children for folder %s", folderID), err) + return err + } + + logger.Info("Deleting folder contents", zap.String("folder_id", folderID), zap.Int("child_count", len(children))) + + // Collect file IDs (non-folder) and recurse into subfolders + var fileIDs []string + for _, child := range children { + if child.Type == "folder" { + logger.Debug("Recursively deleting child folder", zap.String("folder_id", child.ID), zap.String("folder_name", child.Name)) + if err := s.deleteFolderRecursive(child.ID); err != nil { + logger.Warn("Failed to delete child folder", zap.String("folder_id", child.ID), zap.Error(err)) + } + } else { + // Collect non-folder files for batch deletion + logger.Debug("Collecting file for deletion", zap.String("file_id", child.ID), zap.String("file_name", child.Name)) + fileIDs = append(fileIDs, child.ID) + } + } + + // Delete all non-folder files in batch + if len(fileIDs) > 0 { + logger.Info("Deleting files in folder", zap.String("folder_id", folderID), zap.Int("file_count", len(fileIDs))) + if _, err := s.fileDAO.DeleteByIDs(fileIDs); err != nil { + logger.Warn("Failed to delete files in folder", zap.String("folder_id", folderID), zap.Strings("file_ids", fileIDs), zap.Error(err)) + // Continue to delete folder even if file deletion fails + } + } + + // Delete the folder itself + logger.Info("Deleting folder", zap.String("folder_id", folderID)) + _, err = s.fileDAO.DeleteByIDs([]string{folderID}) + if err != nil { + logger.Error(fmt.Sprintf("Failed to delete folder %s", folderID), err) + } + return err +} + +// GetSpaceByFolderID retrieves a skills space by its folder ID +func (s *SkillSpaceService) GetSpaceByFolderID(folderID, tenantID string) (map[string]interface{}, common.ErrorCode, error) { + space, err := s.spaceDAO.GetByFolderID(folderID) + if err != nil { + return nil, common.CodeDataError, fmt.Errorf("space not found for folder") + } + + // Verify tenant ownership + if space.TenantID != tenantID { + return nil, common.CodeDataError, fmt.Errorf("space not found") + } + + return space.ToMap(), common.CodeSuccess, nil +} + +// generateSpaceID generates a unique ID for space +func generateSpaceID() string { + return strings.ReplaceAll(uuid.New().String(), "-", "")[:32] +} diff --git a/test/testcases/test_http_api/test_file_app/test_file_routes.py b/test/testcases/test_http_api/test_file_app/test_file_routes.py index e0cbe5f84d..93774d2908 100644 --- a/test/testcases/test_http_api/test_file_app/test_file_routes.py +++ b/test/testcases/test_http_api/test_file_app/test_file_routes.py @@ -221,7 +221,7 @@ def test_upload_file_success_uses_new_service_layer(monkeypatch): monkeypatch.setattr( module.FileService, "create_folder", - lambda _file, parent_id, _names, _len_id: SimpleNamespace(id=parent_id), + lambda _file, parent_id, _names, _len_id, *_args: SimpleNamespace(id=parent_id), ) monkeypatch.setattr(module.settings, "STORAGE_IMPL", SimpleNamespace( obj_exist=lambda *_args, **_kwargs: False, diff --git a/web/.env.production b/web/.env.production index f33f3bef5c..977e2151ca 100644 --- a/web/.env.production +++ b/web/.env.production @@ -1 +1,2 @@ -VITE_BASE_URL='/' \ No newline at end of file +VITE_BASE_URL='/' +API_PROXY_SCHEME='python' diff --git a/web/src/assets/svg/home-icon/skill-folder.svg b/web/src/assets/svg/home-icon/skill-folder.svg new file mode 100644 index 0000000000..ceff7ce661 --- /dev/null +++ b/web/src/assets/svg/home-icon/skill-folder.svg @@ -0,0 +1,19 @@ +<svg width="32" height="32" viewBox="0 0 32 32" fill="none" xmlns="http://www.w3.org/2000/svg"> +<path d="M14.667 20.0004H17.3337C18.0409 20.0004 18.7192 19.7194 19.2193 19.2193C19.7194 18.7192 20.0003 18.0409 20.0003 17.3337C20.0003 16.6265 19.7194 15.9482 19.2193 15.4481C18.7192 14.948 18.0409 14.667 17.3337 14.667H13.3337C12.5337 14.667 11.867 14.9337 11.467 15.467L4.00033 22.667M9.33366 28.0004L11.467 26.1337C11.867 25.6004 12.5337 25.3337 13.3337 25.3337H18.667C20.1337 25.3337 21.467 24.8004 22.4003 23.7337L28.5337 17.867C29.0482 17.3808 29.3485 16.7101 29.3685 16.0025C29.3885 15.2948 29.1266 14.6082 28.6403 14.0937C28.1541 13.5792 27.4834 13.2789 26.7757 13.2589C26.0681 13.2389 25.3815 13.5008 24.867 13.987L19.267 19.187M2.66699 21.3337L10.667 29.3337" stroke="url(#paint0_linear_2839_15297)" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/> +<path d="M12.2916 5.3066L12.4457 4.95317C12.7159 4.32755 13.2105 3.82586 13.832 3.54695L14.3069 3.33605C14.3645 3.30967 14.4133 3.26729 14.4476 3.21394C14.4818 3.1606 14.5 3.09853 14.5 3.03514C14.5 2.97174 14.4818 2.90968 14.4476 2.85633C14.4133 2.80298 14.3645 2.7606 14.3069 2.73422L13.8587 2.53416C13.2212 2.24802 12.7178 1.72795 12.4524 1.08126L12.2941 0.699488C12.2706 0.640617 12.2301 0.590136 12.1777 0.554573C12.1252 0.51901 12.0633 0.5 12 0.5C11.9367 0.5 11.8748 0.51901 11.8223 0.554573C11.7699 0.590136 11.7294 0.640617 11.7059 0.699488L11.5476 1.08126C11.2824 1.72826 10.779 2.24866 10.1413 2.535L9.69312 2.73422C9.63551 2.7606 9.58667 2.80298 9.55244 2.85633C9.5182 2.90968 9.5 2.97174 9.5 3.03514C9.5 3.09853 9.5182 3.1606 9.55244 3.21394C9.58667 3.26729 9.63551 3.30967 9.69312 3.33605L10.168 3.54695C10.7845 3.82202 11.2794 4.32299 11.5543 4.95317L11.7084 5.3066C11.7326 5.36394 11.7732 5.41286 11.8251 5.44726C11.8769 5.48166 11.9378 5.5 12 5.5C12.0622 5.5 12.1231 5.48166 12.1749 5.44726C12.2268 5.41286 12.2674 5.36394 12.2916 5.3066Z" fill="url(#paint1_linear_2839_15297)"/> +<path d="M16 8.968L17.2307 8.45867M17.2307 5.39733L16 4.88667M19.396 3.232L18.8853 2M19.396 10.624L18.8853 11.8547M22.4573 3.232L22.968 2M22.9667 11.856L22.4573 10.624M24.6227 5.39733L25.8547 4.88667M24.6227 8.45867L25.8547 8.96933M24.9267 6.928C24.9267 9.13714 23.1358 10.928 20.9267 10.928C18.7175 10.928 16.9267 9.13714 16.9267 6.928C16.9267 4.71886 18.7175 2.928 20.9267 2.928C23.1358 2.928 24.9267 4.71886 24.9267 6.928Z" stroke="url(#paint2_linear_2839_15297)" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/> +<defs> +<linearGradient id="paint0_linear_2839_15297" x1="9.00049" y1="3.00195" x2="30.0005" y2="29.002" gradientUnits="userSpaceOnUse"> +<stop stop-color="#00BEB4"/> +<stop offset="1" stop-color="#00BE6F"/> +</linearGradient> +<linearGradient id="paint1_linear_2839_15297" x1="6.31224" y1="0.503555" x2="13.7116" y2="10.3087" gradientUnits="userSpaceOnUse"> +<stop stop-color="#01BEB3"/> +<stop offset="1" stop-color="#01C48D"/> +</linearGradient> +<linearGradient id="paint2_linear_2839_15297" x1="5.91825" y1="3.62484" x2="23.9137" y2="28.7657" gradientUnits="userSpaceOnUse"> +<stop stop-color="#00BEB4"/> +<stop offset="1" stop-color="#00BE6F"/> +</linearGradient> +</defs> +</svg> diff --git a/web/src/assets/svg/home-icon/skill-space.svg b/web/src/assets/svg/home-icon/skill-space.svg new file mode 100644 index 0000000000..cb8822bff3 --- /dev/null +++ b/web/src/assets/svg/home-icon/skill-space.svg @@ -0,0 +1,14 @@ +<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg"> +<g clip-path="url(#clip0_2839_15291)"> +<path d="M3.99986 0.570312C4.90917 0.570313 5.78124 0.931536 6.42423 1.57452C7.06721 2.2175 7.42843 3.08957 7.42843 3.99888C7.42843 4.9082 7.06721 5.78027 6.42423 6.42325C5.78124 7.06623 4.90917 7.42746 3.99986 7.42746C3.09055 7.42746 2.21848 7.06623 1.57549 6.42325C0.932512 5.78027 0.571289 4.9082 0.571289 3.99888C0.571289 3.08957 0.932512 2.2175 1.57549 1.57452C2.21848 0.931536 3.09055 0.570313 3.99986 0.570312ZM3.99986 1.71317C3.6997 1.71317 3.40247 1.77229 3.12516 1.88716C2.84784 2.00203 2.59586 2.17039 2.38362 2.38264C2.17137 2.59489 2.003 2.84686 1.88814 3.12418C1.77327 3.40149 1.71415 3.69872 1.71415 3.99888C1.71415 4.29905 1.77327 4.59627 1.88814 4.87359C2.003 5.1509 2.17137 5.40288 2.38362 5.61513C2.59586 5.82738 2.84784 5.99574 3.12516 6.11061C3.40247 6.22548 3.6997 6.2846 3.99986 6.2846C4.60607 6.2846 5.18745 6.04378 5.6161 5.61513C6.04476 5.18647 6.28557 4.60509 6.28557 3.99888C6.28557 3.39267 6.04476 2.81129 5.6161 2.38264C5.18745 1.95399 4.60607 1.71317 3.99986 1.71317ZM3.99986 8.57031C4.90917 8.57031 5.78124 8.93154 6.42423 9.57452C7.06721 10.2175 7.42843 11.0896 7.42843 11.9989C7.42843 12.9082 7.06721 13.7803 6.42423 14.4233C5.78124 15.0662 4.90917 15.4275 3.99986 15.4275C3.09055 15.4275 2.21848 15.0662 1.57549 14.4233C0.932512 13.7803 0.571289 12.9082 0.571289 11.9989C0.571289 11.0896 0.932512 10.2175 1.57549 9.57452C2.21848 8.93154 3.09055 8.57031 3.99986 8.57031ZM3.99986 9.71317C3.6997 9.71317 3.40247 9.77229 3.12516 9.88716C2.84784 10.002 2.59586 10.1704 2.38362 10.3826C2.17137 10.5949 2.003 10.8469 1.88814 11.1242C1.77327 11.4015 1.71415 11.6987 1.71415 11.9989C1.71415 12.299 1.77327 12.5963 1.88814 12.8736C2.003 13.1509 2.17137 13.4029 2.38362 13.6151C2.59586 13.8274 2.84784 13.9957 3.12516 14.1106C3.40247 14.2255 3.6997 14.2846 3.99986 14.2846C4.60607 14.2846 5.18745 14.0438 5.6161 13.6151C6.04476 13.1865 6.28557 12.6051 6.28557 11.9989C6.28557 11.3927 6.04476 10.8113 5.6161 10.3826C5.18745 9.95399 4.60607 9.71317 3.99986 9.71317ZM11.9999 8.57031C12.9092 8.57031 13.7812 8.93154 14.4242 9.57452C15.0672 10.2175 15.4284 11.0896 15.4284 11.9989C15.4284 12.9082 15.0672 13.7803 14.4242 14.4233C13.7812 15.0662 12.9092 15.4275 11.9999 15.4275C11.0905 15.4275 10.2185 15.0662 9.57549 14.4233C8.93251 13.7803 8.57129 12.9082 8.57129 11.9989C8.57129 11.0896 8.93251 10.2175 9.57549 9.57452C10.2185 8.93154 11.0905 8.57031 11.9999 8.57031ZM11.9999 9.71317C11.6997 9.71317 11.4025 9.77229 11.1252 9.88716C10.8478 10.002 10.5959 10.1704 10.3836 10.3826C10.1714 10.5949 10.003 10.8469 9.88814 11.1242C9.77327 11.4015 9.71415 11.6987 9.71415 11.9989C9.71415 12.299 9.77327 12.5963 9.88814 12.8736C10.003 13.1509 10.1714 13.4029 10.3836 13.6151C10.5959 13.8274 10.8478 13.9957 11.1252 14.1106C11.4025 14.2255 11.6997 14.2846 11.9999 14.2846C12.6061 14.2846 13.1875 14.0438 13.6161 13.6151C14.0448 13.1865 14.2856 12.6051 14.2856 11.9989C14.2856 11.3927 14.0448 10.8113 13.6161 10.3826C13.1875 9.95399 12.6061 9.71317 11.9999 9.71317ZM12.1896 7.7326L12.401 7.24803C12.7717 6.39027 13.4501 5.70243 14.3027 5.32003L14.9541 5.03088C15.0332 4.99472 15.1002 4.93661 15.1471 4.86347C15.1941 4.79032 15.2191 4.70523 15.2191 4.61831C15.2191 4.53139 15.1941 4.4463 15.1471 4.37316C15.1002 4.30002 15.0332 4.24191 14.9541 4.20574L14.3393 3.93146C13.4648 3.53914 12.7742 2.82609 12.4101 1.93946L12.193 1.41603C12.1608 1.33531 12.1052 1.2661 12.0333 1.21734C11.9614 1.16858 11.8765 1.14252 11.7896 1.14252C11.7027 1.14252 11.6178 1.16858 11.5459 1.21734C11.474 1.2661 11.4183 1.33531 11.3861 1.41603L11.169 1.93946C10.8052 2.82652 10.1146 3.54001 9.23986 3.9326L8.625 4.20574C8.54596 4.24191 8.47898 4.30002 8.43201 4.37316C8.38505 4.4463 8.36008 4.53139 8.36008 4.61831C8.36008 4.70523 8.38505 4.79032 8.43201 4.86347C8.47898 4.93661 8.54596 4.99472 8.625 5.03088L9.27643 5.32003C10.1221 5.69717 10.801 6.38403 11.1781 7.24803L11.3896 7.7326C11.4228 7.81121 11.4785 7.87829 11.5496 7.92545C11.6208 7.97261 11.7042 7.99776 11.7896 7.99776C11.8749 7.99776 11.9584 7.97261 12.0295 7.92545C12.1007 7.87829 12.1563 7.81121 12.1896 7.7326Z" fill="url(#paint0_linear_2839_15291)"/> +</g> +<defs> +<linearGradient id="paint0_linear_2839_15291" x1="3.98714" y1="1.14745" x2="14.1304" y2="14.5961" gradientUnits="userSpaceOnUse"> +<stop stop-color="#01BEB3"/> +<stop offset="1" stop-color="#01C48D"/> +</linearGradient> +<clipPath id="clip0_2839_15291"> +<rect width="16" height="16" fill="white"/> +</clipPath> +</defs> +</svg> diff --git a/web/src/assets/svg/home-icon/skills.svg b/web/src/assets/svg/home-icon/skills.svg new file mode 100644 index 0000000000..82d5b73007 --- /dev/null +++ b/web/src/assets/svg/home-icon/skills.svg @@ -0,0 +1,19 @@ +<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg"> +<g clip-path="url(#clip0_2839_15284)"> +<path d="M7.33325 9.9987H8.66659C9.02021 9.9987 9.35935 9.85822 9.60939 9.60817C9.85944 9.35813 9.99992 9.01899 9.99992 8.66537C9.99992 8.31174 9.85944 7.9726 9.60939 7.72256C9.35935 7.47251 9.02021 7.33203 8.66659 7.33203H6.66659C6.26659 7.33203 5.93325 7.46537 5.73325 7.73203L1.99992 11.332M4.66659 13.9987L5.73325 13.0654C5.93325 12.7987 6.26659 12.6654 6.66659 12.6654H9.33325C10.0666 12.6654 10.7333 12.3987 11.1999 11.8654L14.2666 8.93203C14.5238 8.68892 14.674 8.35356 14.684 7.99974C14.694 7.64593 14.563 7.30262 14.3199 7.04536C14.0768 6.7881 13.7414 6.63796 13.3876 6.62796C13.0338 6.61795 12.6905 6.74892 12.4333 6.99203L9.63325 9.59203M1.33325 10.6654L5.33325 14.6654M12.1666 4.9987C12.1666 5.9652 11.3831 6.7487 10.4166 6.7487C9.45009 6.7487 8.66659 5.9652 8.66659 4.9987C8.66659 4.0322 9.45009 3.2487 10.4166 3.2487C11.3831 3.2487 12.1666 4.0322 12.1666 4.9987ZM5.99992 2.58203C5.99992 3.6866 5.10449 4.58203 3.99992 4.58203C2.89535 4.58203 1.99992 3.6866 1.99992 2.58203C1.99992 1.47746 2.89535 0.582031 3.99992 0.582031C5.10449 0.582031 5.99992 1.47746 5.99992 2.58203Z" stroke="url(#paint0_linear_2839_15284)" stroke-linecap="round" stroke-linejoin="round"/> +<path d="M8.42495 3.13396L8.51743 2.9219C8.67955 2.54653 8.9763 2.24552 9.3492 2.07817L9.63413 1.95163C9.6687 1.9358 9.698 1.91037 9.71854 1.87837C9.73908 1.84636 9.75 1.80912 9.75 1.77108C9.75 1.73304 9.73908 1.69581 9.71854 1.6638C9.698 1.63179 9.6687 1.60636 9.63413 1.59053L9.3652 1.4705C8.9827 1.29881 8.68067 0.986768 8.52143 0.598757L8.42645 0.369693C8.41238 0.33437 8.38805 0.304082 8.35659 0.282744C8.32513 0.261406 8.288 0.25 8.25 0.25C8.212 0.25 8.17487 0.261406 8.14341 0.282744C8.11195 0.304082 8.08762 0.33437 8.07355 0.369693L7.97857 0.598757C7.81944 0.986956 7.51739 1.29919 7.1348 1.471L6.86587 1.59053C6.8313 1.60636 6.802 1.63179 6.78146 1.6638C6.76092 1.69581 6.75 1.73304 6.75 1.77108C6.75 1.80912 6.76092 1.84636 6.78146 1.87837C6.802 1.91037 6.8313 1.9358 6.86587 1.95163L7.1508 2.07817C7.5207 2.24321 7.81762 2.5438 7.98257 2.9219L8.07505 3.13396C8.08959 3.16836 8.11393 3.19772 8.14505 3.21836C8.17616 3.23899 8.21267 3.25 8.25 3.25C8.28733 3.25 8.32384 3.23899 8.35495 3.21836C8.38607 3.19772 8.41041 3.16836 8.42495 3.13396Z" fill="url(#paint1_linear_2839_15284)"/> +</g> +<defs> +<linearGradient id="paint0_linear_2839_15284" x1="4.5" y1="1.5" x2="15" y2="14.5" gradientUnits="userSpaceOnUse"> +<stop stop-color="#00BEB4"/> +<stop offset="1" stop-color="#00BE6F"/> +</linearGradient> +<linearGradient id="paint1_linear_2839_15284" x1="4.83734" y1="0.252133" x2="9.27694" y2="6.13523" gradientUnits="userSpaceOnUse"> +<stop stop-color="#01BEB3"/> +<stop offset="1" stop-color="#01C48D"/> +</linearGradient> +<clipPath id="clip0_2839_15284"> +<rect width="16" height="16" fill="white"/> +</clipPath> +</defs> +</svg> diff --git a/web/src/components/empty/constant.tsx b/web/src/components/empty/constant.tsx index 641920041d..9c24350878 100644 --- a/web/src/components/empty/constant.tsx +++ b/web/src/components/empty/constant.tsx @@ -11,6 +11,7 @@ export enum EmptyCardType { Chat = 'chat', Search = 'search', Memory = 'memory', + Skills = 'skills', } export const EmptyCardData = { @@ -39,4 +40,9 @@ export const EmptyCardData = { titleKey: 'empty.memoryTitle', notFoundKey: 'empty.notFoundMemory', }, + [EmptyCardType.Skills]: { + icon: <HomeIcon name="skills" width={'24'} />, + titleKey: 'empty.skillsTitle', + notFoundKey: 'empty.notFoundSkills', + }, }; diff --git a/web/src/components/icon-font.tsx b/web/src/components/icon-font.tsx index d08d951db7..fc48e53d79 100644 --- a/web/src/components/icon-font.tsx +++ b/web/src/components/icon-font.tsx @@ -2,6 +2,7 @@ import { FileIconMap } from '@/constants/file'; import { cn } from '@/lib/utils'; import { getExtension } from '@/utils/document-util'; import { CSSProperties } from 'react'; +import SvgIcon from './svg-icon'; type IconFontType = { name: string; @@ -36,6 +37,14 @@ export function FileIcon({ type, }: IconFontType & { type?: string }) { const isFolder = type === 'folder'; + const isSkills = type === 'skills'; + if (isSkills) { + return ( + <span className={cn('size-4', className)}> + <SvgIcon name="home-icon/skills" width={16} height={16} /> + </span> + ); + } return ( <span className={cn('size-4', className)}> <IconFont diff --git a/web/src/components/list-filter-bar/index.tsx b/web/src/components/list-filter-bar/index.tsx index 9cd12d4c42..4e1e306d58 100644 --- a/web/src/components/list-filter-bar/index.tsx +++ b/web/src/components/list-filter-bar/index.tsx @@ -16,6 +16,7 @@ interface IProps { searchString?: string; onSearchChange?: ChangeEventHandler<HTMLInputElement>; showFilter?: boolean; + showSearch?: boolean; leftPanel?: ReactNode; preChildren?: ReactNode; } @@ -58,6 +59,7 @@ export default function ListFilterBar({ searchString, onSearchChange, showFilter = true, + showSearch = true, leftPanel, value, onChange, @@ -121,12 +123,14 @@ export default function ListFilterBar({ </FilterPopover> )} - <SearchInput - value={searchString} - onChange={onSearchChange} - className="w-32" - role="searchbox" - ></SearchInput> + {showSearch && ( + <SearchInput + value={searchString} + onChange={onSearchChange} + className="w-32" + role="searchbox" + ></SearchInput> + )} {children} </div> </div> diff --git a/web/src/components/ui/sheet.tsx b/web/src/components/ui/sheet.tsx index 56a1b029d2..d8d16a6e6f 100644 --- a/web/src/components/ui/sheet.tsx +++ b/web/src/components/ui/sheet.tsx @@ -21,7 +21,7 @@ const SheetOverlay = React.forwardRef< >(({ className, ...props }, ref) => ( <SheetPrimitive.Overlay className={cn( - 'fixed inset-0 z-50 bg-black/80 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0', + 'fixed inset-0 z-50 bg-black/80 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:pointer-events-none', className, )} {...props} @@ -31,7 +31,7 @@ const SheetOverlay = React.forwardRef< SheetOverlay.displayName = SheetPrimitive.Overlay.displayName; const sheetVariants = cva( - 'fixed z-50 gap-4 bg-bg-base rounded-lg p-6 shadow-lg transition ease-in-out data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:duration-300 data-[state=open]:duration-500', + 'fixed z-50 gap-4 bg-bg-base rounded-lg p-6 shadow-lg transition ease-in-out data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:duration-300 data-[state=open]:duration-500 data-[state=closed]:pointer-events-none', { variants: { side: { diff --git a/web/src/components/ui/slider.tsx b/web/src/components/ui/slider.tsx index 992f9ebed3..0b6967c8fd 100644 --- a/web/src/components/ui/slider.tsx +++ b/web/src/components/ui/slider.tsx @@ -17,15 +17,15 @@ const Slider = React.forwardRef< )} {...props} > - <SliderPrimitive.Track className="relative h-2 w-full grow overflow-hidden rounded-full bg-colors-background-inverse-strong"> - <SliderPrimitive.Range className="absolute h-full bg-primary" /> + <SliderPrimitive.Track className="relative h-1 w-full grow overflow-hidden rounded-full bg-border-button"> + <SliderPrimitive.Range className="absolute h-full bg-accent-primary" /> </SliderPrimitive.Track> <SliderPrimitive.Thumb className=" - block h-5 w-5 rounded-full border-2 border-primary bg-colors-text-core-standard transition-colors ring-offset-background + block h-2.5 w-2.5 rounded-full border-2 border-accent-primary bg-white ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-foreground focus-visible:ring-offset-2 - disabled:pointer-events-none disabled:opacity-50" + disabled:pointer-events-none disabled:opacity-50 cursor-pointer" /> </SliderPrimitive.Root> )); diff --git a/web/src/components/ui/spin.tsx b/web/src/components/ui/spin.tsx index 45e2a95d34..30db32b673 100644 --- a/web/src/components/ui/spin.tsx +++ b/web/src/components/ui/spin.tsx @@ -26,13 +26,16 @@ export const Spin: React.FC<SpinProps> = ({ className, children, }) => { + // When used without children (standalone), don't show mask background + const hasChildren = React.Children.count(children) > 0; + return ( <div className={cn( 'relative', { 'after:content-[""] after:absolute after:inset-0 after:z-10 after:bg-text-primary/40 after:transition-all after:duration-300 h-full w-full': - spinning, + spinning && hasChildren, }, className, )} @@ -40,7 +43,9 @@ export const Spin: React.FC<SpinProps> = ({ {spinning && ( <div className={cn( - 'absolute inset-0 z-10 flex items-center justify-center bg-text-primary/30', + hasChildren + ? 'absolute inset-0 z-10 flex items-center justify-center bg-text-primary/30' + : 'flex items-center justify-center', minSizeClasses[size], )} > diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index fa8a4c25f1..ee4da4d148 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -62,6 +62,7 @@ export default { openInNewTab: 'Chat in new tab', previousPage: 'Previous', nextPage: 'Next', + previous: 'Previous', add: 'Add', remove: 'Remove', search: 'Search', @@ -113,12 +114,174 @@ export default { setting: 'User settings', logout: 'Log out', fileManager: 'File', + skills: 'Skills', flow: 'Agent', search: 'Search', welcome: 'Welcome to', dataset: 'Dataset', memories: 'Memory', }, + skills: { + title: 'Skills', + selectSpace: 'Select a skill space to get started', + spacePlaceholder: 'Enter space name', + createSpace: 'Create skill space', + createSpaceTitle: 'Create new skill space', + createSpaceDescription: + 'Create a new space to organize and manage your skills.', + spaceName: 'Space name', + spaceNamePlaceholder: 'e.g., my-space', + spaceNameRequired: 'Please enter space name', + noSpaces: 'No skill space yet. Create your first one!', + enterSpace: 'Enter', + spaceCreated: 'Skill space created successfully', + spaceDeleted: 'Skill space deleted successfully', + fetchError: 'Failed to fetch skills', + deleteSpaceTitle: 'Delete skill space', + deleteSpaceDescription: + 'Are you sure you want to delete this skill space? This action cannot be undone and all skills in this space will be permanently deleted.', + deleteSpaceName: 'Space name', + uploadSuccess: 'Skill uploaded successfully', + uploadError: 'Failed to upload skill', + deleteSuccess: 'Skill deleted successfully', + deleteError: 'Failed to delete skill', + skillExists: + 'A skill with this name already exists. Please delete it first or use a different name.', + uploadSkill: 'Upload skill', + searchPlaceholder: 'Search skills...', + noSkills: 'No skills yet. Upload your first skill.', + noSearchResults: 'No skills matching your search', + filesCount: '{{count}} files', + foldersCount: '{{count}} folders', + pageInfo: 'Page {{current}} of {{total}}', + totalSkills: '{{total}} skills total', + backToSkills: 'Back to skills', + selectFileToView: 'Select a file to view', + skillName: 'Skill name', + skillNamePlaceholder: 'e.g., my-awesome-skill', + skillNameHelp: 'Only letters, numbers, hyphens and underscores allowed', + source: 'Source', + version: 'Version', + skillVersion: 'Version', + skillVersionPlaceholder: 'e.g., 1.0.0', + versionFormatHelp: 'Version must be in semver format (e.g., 1.0.0)', + versionRequired: 'Version is required', + selectFilesOrFolder: 'Select files or folder', + uploadDescription: + 'Upload skill files. You can drag and drop files or select a folder.', + selectFolder: 'Select folder', + dragFilesHint: 'or drag files below', + dragFilesTitle: 'Drag skill folder here', + dragFilesDescription: + 'Drag and drop a skill folder here, or use the "Select Folder" button below.', + filesSelected: '{{count}} files selected', + uploading: 'Uploading...', + files: 'Files', + noFiles: 'No files', + versionHistory: 'Version history', + selectVersion: 'Select version to preview', + latest: 'Latest', + metadata: { + basic: 'Basic info', + emoji: 'Emoji', + skillKey: 'Skill key', + always: 'Always active', + primaryEnv: 'Primary Environment Variable', + requires: 'Requirements', + requiredBins: 'Required Binaries', + requiredEnv: 'Required Environment Variables', + anyBins: 'At Least One Required', + install: 'Dependencies', + links: 'Links', + homepage: 'Homepage', + repository: 'Repository', + documentation: 'Documentation', + }, + validation: { + missing_skill_md: + 'Invalid skill: SKILL.md not found. Please ensure your skill directory contains a valid SKILL.md file.', + invalid_frontmatter: + 'Invalid skill: SKILL.md must have valid frontmatter (start and end with ---).', + missing_name: + 'Invalid skill: SKILL.md frontmatter must include a "name" field.', + invalid_name_format: + 'Invalid skill: "name" must be lowercase and URL-safe (letters, numbers, hyphens only).', + invalid_version: + 'Invalid skill: "version" must be valid semver (e.g., 1.0.0).', + invalid_metadata: 'Invalid skill: metadata contains invalid fields.', + invalid_file_type: 'Invalid skill: Only text-based files are allowed.', + invalid_path: 'Invalid skill: File path contains invalid characters.', + file_too_large: + 'Invalid skill: Individual file size exceeds 5MB limit.', + total_size_exceeded: + 'Invalid skill: Total bundle size exceeds 50MB limit.', + no_files: 'No files selected. Please select a skill folder.', + noValidFiles: 'No valid files found. Please check your selection.', + junkFilesFound: + 'Temporary files detected (e.g., .DS_Store). Please remove them before uploading.', + read_failed: 'Invalid skill: Failed to read SKILL.md file.', + invalid: 'Invalid skill format.', + valid: 'Valid skill format. Ready to upload.', + versionExists: + 'This version already exists. Please use a different version number.', + error: 'Validation failed', + }, + parsedMetadata: 'Parsed from SKILL.md:', + addSkill: 'Add Skill', + upload: 'Upload', + importFromGit: 'Import from Git', + gitPlatform: 'Platform', + repoUrl: 'Repository URL', + repoUrlHelp: 'Supports repository URL with optional path', + accessToken: 'Access Token', + githubTokenHelp: + 'For private repos or higher rate limits (5000 req/hour)', + giteeTokenHelp: 'For private repos or higher rate limits (2000 req/hour)', + rateLimitInfo: 'Rate Limit Info', + githubRateLimit: + 'Public repos: 60 requests/hour per IP. Use token for 5000 req/hour.', + giteeRateLimit: + 'Public repos: 1000 requests/hour per IP. Use token for 2000 req/hour.', + import: 'Import', + importing: 'Importing...', + configureSearch: 'Configure Search', + }, + skillSearch: { + configTitle: 'Skill Search Configuration', + configDesc: 'Configure how skills are indexed and searched', + embeddingModel: 'Embedding Model', + embeddingModelPlaceholder: 'Select an embedding model', + vectorSimilarityWeight: 'Vector Similarity Weight', + similarityThreshold: 'Similarity Threshold', + topK: 'Top K Results', + indexFields: 'Index Fields', + indexFieldsDesc: 'Select which fields to include in the search index', + fieldName: 'Name', + fieldNameDesc: 'Skill name', + fieldTags: 'Tags', + fieldTagsDesc: 'Skill tags', + fieldDescription: 'Description', + fieldDescriptionDesc: 'Skill description', + fieldContent: 'Content', + fieldContentDesc: 'Skill content (e.g., README)', + weight: 'Weight', + pureVector: 'Vector Only', + hybrid: 'Hybrid', + keyword: 'Keyword', + vector: 'Vector', + keywordOnly: 'Keyword Only', + balanced: 'Balanced', + vectorOnly: 'Vector Only', + reindex: 'Reindex All', + reindexing: 'Reindexing...', + reindexSuccess: 'Reindexed successfully', + pleaseSelectEmbeddingModel: 'Please select an embedding model', + saveSuccess: 'Saved successfully', + saveError: 'Failed to save', + semanticSearchPlaceholder: 'Search skills by meaning...', + switchToSemantic: 'Switch to semantic search', + switchToLocal: 'Switch to local search', + }, memories: { llmTooltip: 'Analyzes conversation content, extracts key information, and generates structured memory summaries.', @@ -2628,6 +2791,8 @@ Important structured information may include: names, dates, locations, events, k notFoundSearch: 'Search app not found', memoryTitle: 'No memory created yet', notFoundMemory: 'Memory not found', + skillsTitle: 'No skill space created yet', + notFoundSkills: 'Skill space not found', addNow: 'Add Now', }, diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index eb2047e355..8bbcc6a7e3 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -52,6 +52,7 @@ export default { openInNewTab: '在新标签页中聊天', previousPage: '上一页', nextPage: '下一页', + previous: '上一步', add: '添加', remove: '移除', search: '搜索', @@ -97,12 +98,166 @@ export default { setting: '用户设置', logout: '登出', fileManager: '文件管理', + skills: '技能', flow: '智能体', search: '搜索', welcome: '欢迎来到', dataset: '知识库', memories: '记忆', }, + skills: { + title: '技能', + selectSpace: '选择一个技能空间开始使用', + spacePlaceholder: '输入中心名称', + createSpace: '创建技能空间', + createSpaceTitle: '创建新技能空间', + createSpaceDescription: '创建一个新的中心来组织和管理您的技能。', + spaceName: '技能空间名称', + spaceNamePlaceholder: '例如:my-space', + spaceNameRequired: '请输入中心名称', + noSpaces: '暂无技能空间,创建您的第一个!', + enterSpace: '进入', + spaceCreated: '技能空间创建成功', + spaceDeleted: '技能空间删除成功', + fetchError: '获取技能列表失败', + deleteSpaceTitle: '删除技能空间', + deleteSpaceDescription: + '确定要删除此技能空间吗?此操作无法撤销,该中心中的所有技能将被永久删除。', + deleteSpaceName: '中心名称', + uploadSuccess: '技能上传成功', + uploadError: '技能上传失败', + deleteSuccess: '技能删除成功', + deleteError: '技能删除失败', + skillExists: '同名技能已存在,请先删除或使用其他名称', + uploadSkill: '上传技能', + searchPlaceholder: '搜索技能...', + noSkills: '暂无技能,上传您的第一个技能', + noSearchResults: '没有找到匹配的技能', + filesCount: '{{count}} 个文件', + foldersCount: '{{count}} 个文件夹', + pageInfo: '第 {{current}} 页,共 {{total}} 页', + totalSkills: '共 {{total}} 个技能', + backToSkills: '返回技能列表', + selectFileToView: '选择文件查看', + skillName: '技能名称', + skillNamePlaceholder: '例如:my-awesome-skill', + skillNameHelp: '只允许字母、数字、连字符和下划线', + source: '来源', + version: '版本', + skillVersion: '版本号', + skillVersionPlaceholder: '例如:1.0.0', + versionFormatHelp: '版本号必须符合语义化版本格式(例如:1.0.0)', + versionRequired: '版本号为必填项', + selectFilesOrFolder: '选择文件或文件夹', + uploadDescription: '上传技能文件。您可以拖拽文件或选择文件夹上传。', + selectFolder: '选择文件夹', + dragFilesHint: '或拖拽文件到下方', + dragFilesTitle: '拖拽技能文件夹到此处', + dragFilesDescription: + '将技能文件夹拖拽到此处,或使用下方的"选择文件夹"按钮。', + filesSelected: '已选择 {{count}} 个文件', + uploading: '上传中...', + files: '文件', + noFiles: '没有文件', + versionHistory: '版本历史', + selectVersion: '选择版本预览', + latest: '最新', + metadata: { + basic: '基本信息', + emoji: '表情符号', + skillKey: '技能键名', + always: '始终激活', + primaryEnv: '主环境变量', + requires: '运行要求', + requiredBins: '必需的二进制文件', + requiredEnv: '必需的环境变量', + anyBins: '至少需要一个', + install: '依赖项', + links: '链接', + homepage: '主页', + repository: '代码仓库', + documentation: '文档', + }, + validation: { + missing_skill_md: + '无效的技能:未找到 SKILL.md 文件。请确保技能目录包含有效的 SKILL.md 文件。', + invalid_frontmatter: + '无效的技能:SKILL.md 必须包含有效的 frontmatter(以 --- 开头和结尾)。', + missing_name: '无效的技能:SKILL.md frontmatter 必须包含 "name" 字段。', + invalid_name_format: + '无效的技能:"name" 必须是小写且 URL 安全的(仅字母、数字、连字符)。', + invalid_version: + '无效的技能:"version" 必须是有效的语义化版本(例如:1.0.0)。', + invalid_metadata: '无效的技能:metadata 包含无效字段。', + invalid_file_type: '无效的技能:只允许文本类型的文件。', + invalid_path: '无效的技能:文件路径包含无效字符。', + file_too_large: '无效的技能:单个文件大小超过 5MB 限制。', + total_size_exceeded: '无效的技能:总包大小超过 50MB 限制。', + no_files: '未选择文件。请选择技能文件夹。', + noValidFiles: '未找到有效文件,请检查您的选择。', + junkFilesFound: '检测到临时文件(如 .DS_Store),请删除后再上传。', + read_failed: '无效的技能:无法读取 SKILL.md 文件。', + invalid: '无效的技能格式。', + valid: '有效的技能格式,可以上传。', + error: '验证失败', + versionExists: '该版本已存在,请使用不同的版本号。', + }, + parsedMetadata: '从 SKILL.md 解析:', + addSkill: '添加技能', + upload: '本地上传', + importFromGit: '从 Git 导入', + gitPlatform: '平台', + repoUrl: '仓库地址', + repoUrlHelp: '支持带路径的仓库地址', + accessToken: '访问令牌', + githubTokenHelp: '用于私有仓库或更高配额(5000 次/小时)', + giteeTokenHelp: '用于私有仓库或更高配额(2000 次/小时)', + rateLimitInfo: '速率限制说明', + githubRateLimit: + '公开仓库:每小时 60 次请求/IP。使用令牌可达 5000 次/小时。', + giteeRateLimit: + '公开仓库:每小时 1000 次请求/IP。使用令牌可达 2000 次/小时。', + import: '导入', + importing: '导入中...', + configureSearch: '配置搜索', + }, + skillSearch: { + configTitle: '技能搜索配置', + configDesc: '配置技能的索引和搜索方式', + embeddingModel: '嵌入模型', + embeddingModelPlaceholder: '选择嵌入模型', + vectorSimilarityWeight: '向量相似度权重', + similarityThreshold: '相似度阈值', + topK: '返回结果数量', + indexFields: '索引字段', + indexFieldsDesc: '选择要包含在搜索索引中的字段', + fieldName: '名称', + fieldNameDesc: '技能名称', + fieldTags: '标签', + fieldTagsDesc: '技能标签', + fieldDescription: '描述', + fieldDescriptionDesc: '技能描述', + fieldContent: '内容', + fieldContentDesc: '技能内容(如 README)', + weight: '权重', + pureKeyword: '仅关键词', + pureVector: '仅向量', + hybrid: '混合搜索', + keyword: '关键词', + vector: '向量', + keywordOnly: '仅关键词', + balanced: '平衡', + vectorOnly: '仅向量', + reindex: '重建索引', + reindexing: '重建索引中...', + reindexSuccess: '重建索引成功', + pleaseSelectEmbeddingModel: '请选择嵌入模型', + saveSuccess: '保存成功', + saveError: '保存失败', + semanticSearchPlaceholder: '按语义搜索技能...', + switchToSemantic: '切换到语义搜索', + switchToLocal: '切换到本地搜索', + }, memories: { llmTooltip: '分析对话内容,提取关键信息,并生成结构化的记忆摘要。', embeddingModelTooltip: @@ -2309,6 +2464,8 @@ Tokenizer 会根据所选方式将内容存储为对应的数据结构。`, notFoundSearch: '未查询到搜索应用', memoryTitle: '尚未创建记忆', notFoundMemory: '未查询到记忆', + skillsTitle: '尚未创建技能空间', + notFoundSkills: '未查询到技能空间', addNow: '立即添加', }, diff --git a/web/src/pages/files/action-cell.tsx b/web/src/pages/files/action-cell.tsx index 9e5285b60b..5d479b1012 100644 --- a/web/src/pages/files/action-cell.tsx +++ b/web/src/pages/files/action-cell.tsx @@ -49,6 +49,7 @@ export function ActionCell({ const { downloadFile } = useDownloadFile(); const isFolder = isFolderType(record.type); + const isSkillsFolder = isFolder && record.name.toLowerCase() === 'skills'; const extension = getExtension(record.name); const isKnowledgeBase = isKnowledgeBaseType(record.source_type); @@ -77,6 +78,10 @@ export function ActionCell({ handleRemoveFile([documentId]); }, [handleRemoveFile, documentId]); + if (isSkillsFolder) { + return null; + } + return ( <section className="flex gap-2 items-center text-text-sub-title-invert opacity-0 group-hover:opacity-100 transition-opacity"> {isKnowledgeBase || ( diff --git a/web/src/pages/files/files-table.tsx b/web/src/pages/files/files-table.tsx index 472ed0386f..339b21583f 100644 --- a/web/src/pages/files/files-table.tsx +++ b/web/src/pages/files/files-table.tsx @@ -41,6 +41,7 @@ import { formatDate } from '@/utils/date'; import { pick } from 'lodash'; import { useMemo } from 'react'; import { useTranslation } from 'react-i18next'; +import { useNavigate } from 'react-router'; import { ActionCell } from './action-cell'; import { useHandleConnectToKnowledge, useRenameCurrentFile } from './hooks'; import { KnowledgeCell } from './knowledge-cell'; @@ -49,6 +50,8 @@ import { UseMoveDocumentShowType } from './use-move-file'; import { useNavigateToOtherFolder } from './use-navigate-to-folder'; import { isFolderType, isKnowledgeBaseType } from './util'; +declare const __API_PROXY_SCHEME__: string; + type FilesTableProps = Pick< ReturnType<typeof useFetchFileList>, 'files' | 'loading' | 'pagination' | 'setPagination' | 'total' @@ -76,6 +79,7 @@ export function FilesTable({ keyPrefix: 'fileManager', }); const navigateToOtherFolder = useNavigateToOtherFolder(); + const navigate = useNavigate(); const { connectToKnowledgeVisible, hideConnectToKnowledgeModal, @@ -93,6 +97,44 @@ export function FilesTable({ fileRenameLoading, } = useRenameCurrentFile(); + // Check if skills feature is enabled (only in hybrid or go mode) + const isSkillsEnabled = useMemo(() => { + const scheme = + typeof __API_PROXY_SCHEME__ !== 'undefined' + ? __API_PROXY_SCHEME__ + : 'python'; + return scheme === 'hybrid' || scheme === 'go'; + }, []); + + // Sort files with skills folder first, then by time + // Filter out skills folder if not in hybrid/go mode + const sortedFiles = useMemo(() => { + if (!files) return []; + + // Filter out skills folder if feature is disabled + const filteredFiles = isSkillsEnabled + ? files + : files.filter((file) => { + const isSkills = + isFolderType(file.type) && file.name.toLowerCase() === 'skills'; + return !isSkills; + }); + + return [...filteredFiles].sort((a, b) => { + const aIsSkills = + isFolderType(a.type) && a.name.toLowerCase() === 'skills'; + const bIsSkills = + isFolderType(b.type) && b.name.toLowerCase() === 'skills'; + + // Skills folder always comes first + if (aIsSkills && !bIsSkills) return -1; + if (!aIsSkills && bIsSkills) return 1; + + // Then sort by create_time desc (newest first) + return (b.create_time || 0) - (a.create_time || 0); + }); + }, [files, isSkillsEnabled]); + const columns: ColumnDef<IFile>[] = [ { id: 'select', @@ -141,9 +183,12 @@ export function FilesTable({ const type = row.original.type; const id = row.original.id; const isFolder = isFolderType(type); + const isSkillsFolder = isFolder && name.toLowerCase() === 'skills'; const handleNameClick = () => { - if (isFolder) { + if (isSkillsFolder) { + navigate('/files/skills'); + } else if (isFolder) { navigateToOtherFolder(id); } }; @@ -156,7 +201,7 @@ export function FilesTable({ onClick={handleNameClick} className="max-w-full p-0 flex justify-start gap-2 text-text-primary" > - <FileIcon name={name} type={type} /> + <FileIcon name={name} type={isSkillsFolder ? 'skills' : type} /> <span className="truncate">{name}</span> </Button> @@ -250,7 +295,7 @@ export function FilesTable({ }, [pagination]); const table = useReactTable({ - data: files || [], + data: sortedFiles, columns, onSortingChange: setSorting, onColumnFiltersChange: setColumnFilters, @@ -263,7 +308,13 @@ export function FilesTable({ getRowId: (row) => row.id, // Use file ID instead of row index manualPagination: true, //we're doing manual "server-side" pagination enableRowSelection(row) { - return !isKnowledgeBaseType(row.original.source_type); + const name = row.original.name; + const type = row.original.type; + const isSkillsFolder = + isFolderType(type) && name.toLowerCase() === 'skills'; + // Skills folder is not selectable when enabled (it's a special entry) + // When disabled, it's already filtered out + return !isKnowledgeBaseType(row.original.source_type) && !isSkillsFolder; }, state: { sorting, diff --git a/web/src/pages/skills/components/code-viewer.tsx b/web/src/pages/skills/components/code-viewer.tsx new file mode 100644 index 0000000000..e6a5c297d4 --- /dev/null +++ b/web/src/pages/skills/components/code-viewer.tsx @@ -0,0 +1,118 @@ +import { useIsDarkTheme } from '@/components/theme-provider'; +import { Badge } from '@/components/ui/badge'; +import React, { memo } from 'react'; +import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter'; +import { + oneDark, + oneLight, +} from 'react-syntax-highlighter/dist/esm/styles/prism'; + +interface CodeViewerProps { + content: string; + filename: string; +} + +const EXT_LANG: Record<string, string> = { + ts: 'typescript', + tsx: 'tsx', + js: 'javascript', + jsx: 'jsx', + py: 'python', + rs: 'rust', + go: 'go', + rb: 'ruby', + java: 'java', + kt: 'kotlin', + swift: 'swift', + c: 'c', + cpp: 'cpp', + h: 'c', + hpp: 'cpp', + cs: 'csharp', + css: 'css', + scss: 'scss', + less: 'less', + html: 'html', + xml: 'xml', + json: 'json', + yaml: 'yaml', + yml: 'yaml', + toml: 'toml', + sh: 'bash', + bash: 'bash', + zsh: 'bash', + sql: 'sql', + dockerfile: 'docker', + lua: 'lua', + r: 'r', + dart: 'dart', + php: 'php', + pl: 'perl', + ex: 'elixir', + exs: 'elixir', + erl: 'erlang', + hs: 'haskell', + vim: 'vim', + ini: 'ini', + cfg: 'ini', +}; + +const getLang = (filename: string): string => { + const lower = filename.toLowerCase(); + if (lower === 'dockerfile' || lower.startsWith('dockerfile.')) + return 'docker'; + if (lower === 'makefile' || lower === 'gnumakefile') return 'makefile'; + const ext = lower.split('.').pop() ?? ''; + return EXT_LANG[ext] || ext || 'text'; +}; + +const CodeViewer: React.FC<CodeViewerProps> = ({ content, filename }) => { + const isDarkTheme = useIsDarkTheme(); + const language = getLang(filename); + + const lineCount = content.split('\n').length; + const charCount = content.length; + + // Format file size + const formatSize = (bytes: number): string => { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; + return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; + }; + + return ( + <div> + {/* File Header */} + <div className="flex items-center justify-between px-4 py-3 border-b bg-background"> + <span className="font-semibold">{filename}</span> + <div className="flex items-center gap-2"> + <Badge variant="secondary">{language}</Badge> + <span className="text-xs text-muted-foreground"> + {lineCount} lines | {formatSize(charCount)} + </span> + </div> + </div> + + {/* Code Content */} + <div className="bg-bg-component"> + <SyntaxHighlighter + language={language} + style={isDarkTheme ? oneDark : oneLight} + showLineNumbers + lineNumberStyle={{ minWidth: 40, paddingRight: 16 }} + customStyle={{ + margin: 0, + padding: '16px', + fontSize: 13, + lineHeight: 1.6, + backgroundColor: 'transparent', + }} + > + {content || '// Empty file'} + </SyntaxHighlighter> + </div> + </div> + ); +}; + +export default memo(CodeViewer); diff --git a/web/src/pages/skills/components/create-space-dialog.tsx b/web/src/pages/skills/components/create-space-dialog.tsx new file mode 100644 index 0000000000..006add21da --- /dev/null +++ b/web/src/pages/skills/components/create-space-dialog.tsx @@ -0,0 +1,75 @@ +import { Button } from '@/components/ui/button'; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from '@/components/ui/dialog'; +import { Input } from '@/components/ui/input'; +import React from 'react'; +import { useTranslation } from 'react-i18next'; + +interface CreateSpaceDialogProps { + open: boolean; + onOpenChange: (open: boolean) => void; + spaceInput: string; + onSpaceInputChange: (value: string) => void; + onCreate: () => void; +} + +export const CreateSpaceDialog: React.FC<CreateSpaceDialogProps> = ({ + open, + onOpenChange, + spaceInput, + onSpaceInputChange, + onCreate, +}) => { + const { t } = useTranslation(); + + return ( + <Dialog open={open} onOpenChange={onOpenChange}> + <DialogContent className="sm:max-w-[425px]"> + <DialogHeader> + <DialogTitle> + {t('skills.createSpaceTitle') || 'Create New Skill Space'} + </DialogTitle> + <DialogDescription> + {t('skills.createSpaceDescription') || + 'Create a new space to organize and manage your skills.'} + </DialogDescription> + </DialogHeader> + <div className="py-4"> + <label className="text-sm font-medium mb-2 block"> + {t('skills.spaceName') || 'Space Name'} + </label> + <Input + placeholder={t('skills.spaceNamePlaceholder') || 'e.g., my-space'} + value={spaceInput} + onChange={(e) => onSpaceInputChange(e.target.value)} + onKeyDown={(e) => { + if (e.key === 'Enter' && spaceInput.trim()) { + onCreate(); + } + }} + /> + </div> + <DialogFooter> + <Button + variant="outline" + onClick={() => { + onOpenChange(false); + onSpaceInputChange(''); + }} + > + {t('common.cancel')} + </Button> + <Button onClick={onCreate} disabled={!spaceInput.trim()}> + {t('common.create')} + </Button> + </DialogFooter> + </DialogContent> + </Dialog> + ); +}; diff --git a/web/src/pages/skills/components/delete-selected-spaces-dialog.tsx b/web/src/pages/skills/components/delete-selected-spaces-dialog.tsx new file mode 100644 index 0000000000..ce60c0798e --- /dev/null +++ b/web/src/pages/skills/components/delete-selected-spaces-dialog.tsx @@ -0,0 +1,50 @@ +import { Button } from '@/components/ui/button'; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from '@/components/ui/dialog'; +import React from 'react'; +import { useTranslation } from 'react-i18next'; + +interface DeleteSelectedSpacesDialogProps { + open: boolean; + onOpenChange: (open: boolean) => void; + selectedCount: number; + onDelete: () => void; +} + +export const DeleteSelectedSpacesDialog: React.FC< + DeleteSelectedSpacesDialogProps +> = ({ open, onOpenChange, selectedCount, onDelete }) => { + const { t } = useTranslation(); + + return ( + <Dialog open={open} onOpenChange={onOpenChange}> + <DialogContent className="sm:max-w-[425px]"> + <DialogHeader> + <DialogTitle> + {t('skills.deleteSelectedTitle') || 'Delete Selected Spaces'} + </DialogTitle> + <DialogDescription> + {t('skills.deleteSelectedDescription', { count: selectedCount }) || + `Are you sure you want to delete ${selectedCount} selected spaces? This action cannot be undone.`} + </DialogDescription> + </DialogHeader> + <DialogFooter> + <Button variant="outline" onClick={() => onOpenChange(false)}> + {t('common.cancel')} + </Button> + <Button variant="destructive" onClick={onDelete}> + {t('common.delete')} + </Button> + </DialogFooter> + </DialogContent> + </Dialog> + ); +}; + +export default DeleteSelectedSpacesDialog; diff --git a/web/src/pages/skills/components/delete-space-dialog.tsx b/web/src/pages/skills/components/delete-space-dialog.tsx new file mode 100644 index 0000000000..70e7cf98b0 --- /dev/null +++ b/web/src/pages/skills/components/delete-space-dialog.tsx @@ -0,0 +1,60 @@ +import { Button } from '@/components/ui/button'; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from '@/components/ui/dialog'; +import React from 'react'; +import { useTranslation } from 'react-i18next'; +import type { SkillSpace } from '../types'; + +interface DeleteSpaceDialogProps { + open: boolean; + onOpenChange: (open: boolean) => void; + spaceToDelete: SkillSpace | null; + onDelete: () => void; +} + +export const DeleteSpaceDialog: React.FC<DeleteSpaceDialogProps> = ({ + open, + onOpenChange, + spaceToDelete, + onDelete, +}) => { + const { t } = useTranslation(); + + return ( + <Dialog open={open} onOpenChange={onOpenChange}> + <DialogContent className="sm:max-w-[425px]"> + <DialogHeader> + <DialogTitle> + {t('skills.deleteSpaceTitle') || 'Delete Skill Space'} + </DialogTitle> + <DialogDescription> + {t('skills.deleteSpaceDescription') || + 'Are you sure you want to delete this skill space? This action cannot be undone and all skills in this space will be permanently deleted.'} + </DialogDescription> + </DialogHeader> + <div className="py-4"> + <p className="text-sm text-text-secondary"> + {t('skills.deleteSpaceName') || 'Space name'}:{' '} + <strong>{spaceToDelete?.name}</strong> + </p> + </div> + <DialogFooter> + <Button variant="outline" onClick={() => onOpenChange(false)}> + {t('common.cancel')} + </Button> + <Button variant="destructive" onClick={onDelete}> + {t('common.delete')} + </Button> + </DialogFooter> + </DialogContent> + </Dialog> + ); +}; + +export default DeleteSpaceDialog; diff --git a/web/src/pages/skills/components/markdown-viewer.tsx b/web/src/pages/skills/components/markdown-viewer.tsx new file mode 100644 index 0000000000..12937ed32c --- /dev/null +++ b/web/src/pages/skills/components/markdown-viewer.tsx @@ -0,0 +1,147 @@ +import { useIsDarkTheme } from '@/components/theme-provider'; +import React, { memo } from 'react'; +import ReactMarkdown from 'react-markdown'; +import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter'; +import { + oneDark, + oneLight, +} from 'react-syntax-highlighter/dist/esm/styles/prism'; +import remarkGfm from 'remark-gfm'; + +interface MarkdownViewerProps { + content: string; +} + +// Remove YAML frontmatter from content +const removeFrontmatter = (content: string): string => { + const lines = content.split('\n'); + if (lines[0]?.trim() === '---') { + const endIndex = lines.slice(1).findIndex((line) => line.trim() === '---'); + if (endIndex !== -1) { + return lines.slice(endIndex + 2).join('\n'); + } + } + return content; +}; + +const MarkdownViewer: React.FC<MarkdownViewerProps> = ({ content }) => { + const isDarkTheme = useIsDarkTheme(); + const cleanContent = removeFrontmatter(content); + + return ( + <div className="markdown-body max-w-[900px] mx-auto"> + <ReactMarkdown + remarkPlugins={[remarkGfm]} + components={{ + h1: ({ children }) => ( + <h1 className="text-3xl font-bold mb-2 text-text-primary"> + {children} + </h1> + ), + h2: ({ children }) => ( + <h2 className="text-2xl font-bold mt-4 mb-2 text-text-primary"> + {children} + </h2> + ), + h3: ({ children }) => ( + <h3 className="text-xl font-bold mt-4 mb-2 text-text-primary"> + {children} + </h3> + ), + h4: ({ children }) => ( + <h4 className="text-lg font-bold mt-4 mb-2 text-text-primary"> + {children} + </h4> + ), + p: ({ children }) => ( + <p className="text-text-primary mb-2 leading-relaxed">{children}</p> + ), + code: ({ className, children }) => { + const match = /language-(\w+)/.exec(className || ''); + const language = match ? match[1] : ''; + + if (language) { + return ( + <SyntaxHighlighter + style={isDarkTheme ? oneDark : oneLight} + language={language} + PreTag="div" + customStyle={{ + backgroundColor: 'var(--bg-component)', + borderRadius: '8px', + marginBottom: '1em', + }} + > + {String(children).replace(/\n$/, '')} + </SyntaxHighlighter> + ); + } + + return ( + <code + className={`${className} bg-bg-elevated text-text-primary px-1.5 py-0.5 rounded font-mono text-sm`} + > + {children} + </code> + ); + }, + img: ({ src, alt }) => ( + <img src={src} alt={alt} className="max-w-full h-auto rounded" /> + ), + table: ({ children }) => ( + <table className="w-full border-collapse mb-4">{children}</table> + ), + th: ({ children }) => ( + <th className="border border-border-secondary px-3 py-2 bg-bg-elevated font-semibold text-text-primary text-left"> + {children} + </th> + ), + td: ({ children }) => ( + <td className="border border-border-secondary px-3 py-2 text-text-primary"> + {children} + </td> + ), + li: ({ children }) => ( + <li className="text-text-primary">{children}</li> + ), + a: ({ children, href }) => ( + <a href={href} className="text-accent-primary hover:underline"> + {children} + </a> + ), + blockquote: ({ children }) => ( + <blockquote className="border-l-4 border-border-secondary pl-4 italic text-text-secondary my-4"> + {children} + </blockquote> + ), + hr: () => <hr className="border-border-secondary my-4" />, + pre: ({ children }) => ( + <pre className="bg-bg-elevated rounded-lg p-4 overflow-x-auto mb-4"> + {children} + </pre> + ), + ul: ({ children }) => ( + <ul className="list-disc list-inside mb-4 text-text-primary"> + {children} + </ul> + ), + ol: ({ children }) => ( + <ol className="list-decimal list-inside mb-4 text-text-primary"> + {children} + </ol> + ), + strong: ({ children }) => ( + <strong className="font-bold text-text-primary">{children}</strong> + ), + em: ({ children }) => ( + <em className="italic text-text-primary">{children}</em> + ), + }} + > + {cleanContent} + </ReactMarkdown> + </div> + ); +}; + +export default memo(MarkdownViewer); diff --git a/web/src/pages/skills/components/rename-space-dialog.tsx b/web/src/pages/skills/components/rename-space-dialog.tsx new file mode 100644 index 0000000000..96c3a48e05 --- /dev/null +++ b/web/src/pages/skills/components/rename-space-dialog.tsx @@ -0,0 +1,80 @@ +import { Button } from '@/components/ui/button'; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from '@/components/ui/dialog'; +import { Input } from '@/components/ui/input'; +import React from 'react'; +import { useTranslation } from 'react-i18next'; +import type { SkillSpace } from '../types'; + +interface RenameSpaceDialogProps { + open: boolean; + onOpenChange: (open: boolean) => void; + spaceToRename: SkillSpace | null; + renameSpaceInput: string; + onRenameInputChange: (value: string) => void; + onRename: () => void; +} + +export const RenameSpaceDialog: React.FC<RenameSpaceDialogProps> = ({ + open, + onOpenChange, + spaceToRename, + renameSpaceInput, + onRenameInputChange, + onRename, +}) => { + const { t } = useTranslation(); + + return ( + <Dialog open={open} onOpenChange={onOpenChange}> + <DialogContent className="sm:max-w-[425px]"> + <DialogHeader> + <DialogTitle> + {t('skills.renameSpaceTitle') || 'Rename Skill Space'} + </DialogTitle> + <DialogDescription> + {t('skills.renameSpaceDescription') || + 'Enter a new name for this skill space.'} + </DialogDescription> + </DialogHeader> + <div className="py-4"> + <label className="text-sm font-medium mb-2 block"> + {t('skills.spaceName') || 'Space Name'} + </label> + <Input + placeholder={t('skills.spaceNamePlaceholder') || 'e.g., my-space'} + value={renameSpaceInput} + onChange={(e) => onRenameInputChange(e.target.value)} + onKeyDown={(e) => { + if (e.key === 'Enter' && renameSpaceInput.trim()) { + onRename(); + } + }} + /> + </div> + <DialogFooter> + <Button variant="outline" onClick={() => onOpenChange(false)}> + {t('common.cancel')} + </Button> + <Button + onClick={onRename} + disabled={ + !renameSpaceInput.trim() || + renameSpaceInput.trim() === spaceToRename?.name + } + > + {t('common.save') || 'Save'} + </Button> + </DialogFooter> + </DialogContent> + </Dialog> + ); +}; + +export default RenameSpaceDialog; diff --git a/web/src/pages/skills/components/search-config-modal.tsx b/web/src/pages/skills/components/search-config-modal.tsx new file mode 100644 index 0000000000..560a7a0458 --- /dev/null +++ b/web/src/pages/skills/components/search-config-modal.tsx @@ -0,0 +1,429 @@ +import { + SelectWithSearch, + SelectWithSearchFlagOptionType, +} from '@/components/originui/select-with-search'; +import { Button } from '@/components/ui/button'; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from '@/components/ui/dialog'; +import { Form } from '@/components/ui/form'; +import { Input } from '@/components/ui/input'; +import { Label } from '@/components/ui/label'; +import message from '@/components/ui/message'; +import { Slider } from '@/components/ui/slider'; +import { Switch } from '@/components/ui/switch'; +import { LlmModelType } from '@/constants/knowledge'; +import { useSelectLlmOptionsByModelType } from '@/hooks/use-llm-request'; +import { SkillSearchConfig } from '@/services/skill-space-service'; +import { useEffect, useMemo, useState } from 'react'; +import { useForm } from 'react-hook-form'; +import { useTranslation } from 'react-i18next'; +import type { + FieldConfig, + FieldWeight, + SearchConfigModalProps, +} from '../types'; + +// Use SearchConfig as alias for SkillSearchConfig for backward compatibility +type SearchConfig = SkillSearchConfig; + +const defaultFieldConfig: FieldConfig = { + name: { enabled: true, weight: 3.0 }, + tags: { enabled: true, weight: 2.0 }, + description: { enabled: true, weight: 1.0 }, + content: { enabled: false, weight: 0.5 }, +}; + +const defaultConfig: SearchConfig = { + id: '', + tenant_id: '', + space_id: '', + embd_id: '', + vector_similarity_weight: 0.3, + similarity_threshold: 0.2, + field_config: defaultFieldConfig as Record<string, any>, + rerank_id: undefined, + tenant_rerank_id: undefined, + top_k: 10, + index_version: '', + status: '', +}; + +export const SearchConfigModal: React.FC<SearchConfigModalProps> = ({ + open, + onOpenChange, + config, + onSave, + onReindex, + loading = false, +}) => { + const { t } = useTranslation(); + + const form = useForm<SearchConfig>({ + defaultValues: defaultConfig, + }); + const { setValue, watch, handleSubmit, reset } = form; + + const formData = watch(); + const [saving, setSaving] = useState(false); + const [reindexing, setReindexing] = useState(false); + + // Get embedding model options from user's configured LLMs + const llmOptions = useSelectLlmOptionsByModelType(); + const embeddingModelOptions = useMemo(() => { + return llmOptions[ + LlmModelType.Embedding + ] as SelectWithSearchFlagOptionType[]; + }, [llmOptions]); + + useEffect(() => { + if (open) { + if (config) { + reset({ + ...defaultConfig, + ...config, + field_config: { + ...defaultFieldConfig, + ...config.field_config, + }, + }); + } else { + reset(defaultConfig); + } + } + }, [open, config, reset]); + + const handleSave = handleSubmit(async (data) => { + if (!data.embd_id) { + message.error(t('skillSearch.pleaseSelectEmbeddingModel')); + return; + } + setSaving(true); + try { + const success = await onSave(data); + if (success) { + onOpenChange(false); + } + } finally { + setSaving(false); + } + }); + + const handleReindex = async () => { + if (!onReindex) return; + const currentEmbdId = form.getValues('embd_id'); + if (!currentEmbdId) { + message.error(t('skillSearch.pleaseSelectEmbeddingModel')); + return; + } + setReindexing(true); + try { + await onReindex(currentEmbdId); + } finally { + setReindexing(false); + } + }; + + const updateFieldWeight = ( + field: keyof FieldConfig, + updates: Partial<FieldWeight>, + ) => { + const currentFieldConfig = form.getValues('field_config'); + setValue( + 'field_config', + { + ...currentFieldConfig, + [field]: { + ...currentFieldConfig[field], + ...updates, + }, + }, + { shouldDirty: true }, + ); + }; + + const getSearchTypeLabel = (weight: number) => { + if (weight === 0) return t('skillSearch.pureKeyword'); + if (weight === 1) return t('skillSearch.pureVector'); + return `${t('skillSearch.hybrid')} (${Math.round((1 - weight) * 100)}% ${t('skillSearch.keyword')} + ${Math.round(weight * 100)}% ${t('skillSearch.vector')})`; + }; + + return ( + <Dialog open={open} onOpenChange={onOpenChange}> + <DialogContent className="max-w-2xl max-h-[90vh] overflow-y-auto"> + <DialogHeader> + <DialogTitle>{t('skillSearch.configTitle')}</DialogTitle> + <DialogDescription>{t('skillSearch.configDesc')}</DialogDescription> + </DialogHeader> + + <Form {...form}> + <div className="space-y-6 py-4"> + {/* Embedding Model */} + <div className="space-y-2"> + <Label htmlFor="embd_id">{t('skillSearch.embeddingModel')}</Label> + <SelectWithSearch + value={formData.embd_id} + onChange={(value) => + setValue('embd_id', value, { shouldDirty: true }) + } + options={embeddingModelOptions} + placeholder={t('skillSearch.embeddingModelPlaceholder')} + /> + </div> + + {/* Hybrid Search Weight */} + <div className="space-y-4"> + <div className="flex justify-between items-center"> + <Label>{t('skillSearch.vectorSimilarityWeight')}</Label> + <span className="text-sm text-muted-foreground"> + {getSearchTypeLabel(formData.vector_similarity_weight)} + </span> + </div> + <Slider + value={[formData.vector_similarity_weight]} + onValueChange={([value]) => + setValue('vector_similarity_weight', value, { + shouldDirty: true, + }) + } + min={0} + max={1} + step={0.1} + /> + <div className="flex justify-between text-xs text-muted-foreground"> + <span>{t('skillSearch.keywordOnly')}</span> + <span>{t('skillSearch.balanced')}</span> + <span>{t('skillSearch.vectorOnly')}</span> + </div> + </div> + + {/* Similarity Threshold */} + <div className="space-y-4"> + <div className="flex justify-between items-center"> + <Label>{t('skillSearch.similarityThreshold')}</Label> + <span className="text-sm text-muted-foreground"> + {formData.similarity_threshold.toFixed(1)} + </span> + </div> + <Slider + value={[formData.similarity_threshold]} + onValueChange={([value]) => + setValue('similarity_threshold', value, { shouldDirty: true }) + } + min={0} + max={1} + step={0.05} + /> + </div> + + {/* Top K */} + <div className="space-y-2"> + <Label htmlFor="top_k">{t('skillSearch.topK')}</Label> + <Input + id="top_k" + type="number" + min={1} + max={100} + value={formData.top_k} + onChange={(e) => + setValue('top_k', parseInt(e.target.value) || 10, { + shouldDirty: true, + }) + } + /> + </div> + + {/* Field Configuration */} + <div className="space-y-4"> + <Label className="text-base font-medium"> + {t('skillSearch.indexFields')} + </Label> + <p className="text-sm text-muted-foreground"> + {t('skillSearch.indexFieldsDesc')} + </p> + + {/* Name Field */} + <div className="flex items-center justify-between p-3 border rounded-lg"> + <div className="flex items-center gap-3"> + <Switch + checked={formData.field_config.name.enabled} + onCheckedChange={(checked) => + updateFieldWeight('name', { enabled: checked }) + } + /> + <div> + <p className="font-medium">{t('skillSearch.fieldName')}</p> + <p className="text-xs text-muted-foreground"> + {t('skillSearch.fieldNameDesc')} + </p> + </div> + </div> + <div className="flex items-center gap-2"> + <span className="text-sm text-muted-foreground"> + {t('skillSearch.weight')}: + </span> + <Input + type="number" + step={0.1} + min={0} + max={10} + value={formData.field_config.name.weight} + onChange={(e) => + updateFieldWeight('name', { + weight: parseFloat(e.target.value) || 0, + }) + } + className="w-20" + disabled={!formData.field_config.name.enabled} + /> + </div> + </div> + + {/* Tags Field */} + <div className="flex items-center justify-between p-3 border rounded-lg"> + <div className="flex items-center gap-3"> + <Switch + checked={formData.field_config.tags.enabled} + onCheckedChange={(checked) => + updateFieldWeight('tags', { enabled: checked }) + } + /> + <div> + <p className="font-medium">{t('skillSearch.fieldTags')}</p> + <p className="text-xs text-muted-foreground"> + {t('skillSearch.fieldTagsDesc')} + </p> + </div> + </div> + <div className="flex items-center gap-2"> + <span className="text-sm text-muted-foreground"> + {t('skillSearch.weight')}: + </span> + <Input + type="number" + step={0.1} + min={0} + max={10} + value={formData.field_config.tags.weight} + onChange={(e) => + updateFieldWeight('tags', { + weight: parseFloat(e.target.value) || 0, + }) + } + className="w-20" + disabled={!formData.field_config.tags.enabled} + /> + </div> + </div> + + {/* Description Field */} + <div className="flex items-center justify-between p-3 border rounded-lg"> + <div className="flex items-center gap-3"> + <Switch + checked={formData.field_config.description.enabled} + onCheckedChange={(checked) => + updateFieldWeight('description', { enabled: checked }) + } + /> + <div> + <p className="font-medium"> + {t('skillSearch.fieldDescription')} + </p> + <p className="text-xs text-muted-foreground"> + {t('skillSearch.fieldDescriptionDesc')} + </p> + </div> + </div> + <div className="flex items-center gap-2"> + <span className="text-sm text-muted-foreground"> + {t('skillSearch.weight')}: + </span> + <Input + type="number" + step={0.1} + min={0} + max={10} + value={formData.field_config.description.weight} + onChange={(e) => + updateFieldWeight('description', { + weight: parseFloat(e.target.value) || 0, + }) + } + className="w-20" + disabled={!formData.field_config.description.enabled} + /> + </div> + </div> + + {/* Content Field */} + <div className="flex items-center justify-between p-3 border rounded-lg"> + <div className="flex items-center gap-3"> + <Switch + checked={formData.field_config.content.enabled} + onCheckedChange={(checked) => + updateFieldWeight('content', { enabled: checked }) + } + /> + <div> + <p className="font-medium"> + {t('skillSearch.fieldContent')} + </p> + <p className="text-xs text-muted-foreground"> + {t('skillSearch.fieldContentDesc')} + </p> + </div> + </div> + <div className="flex items-center gap-2"> + <span className="text-sm text-muted-foreground"> + {t('skillSearch.weight')}: + </span> + <Input + type="number" + step={0.1} + min={0} + max={10} + value={formData.field_config.content.weight} + onChange={(e) => + updateFieldWeight('content', { + weight: parseFloat(e.target.value) || 0, + }) + } + className="w-20" + disabled={!formData.field_config.content.enabled} + /> + </div> + </div> + </div> + + <DialogFooter className="gap-2"> + {onReindex && ( + <Button + variant="outline" + onClick={handleReindex} + disabled={reindexing || loading} + > + {reindexing + ? t('skillSearch.reindexing') + : t('skillSearch.reindex')} + </Button> + )} + <Button variant="outline" onClick={() => onOpenChange(false)}> + {t('common.cancel')} + </Button> + <Button onClick={handleSave} disabled={saving || loading}> + {saving ? t('common.saving') : t('common.save')} + </Button> + </DialogFooter> + </div> + </Form> + </DialogContent> + </Dialog> + ); +}; + +export default SearchConfigModal; diff --git a/web/src/pages/skills/components/skill-card.tsx b/web/src/pages/skills/components/skill-card.tsx new file mode 100644 index 0000000000..1dbc930701 --- /dev/null +++ b/web/src/pages/skills/components/skill-card.tsx @@ -0,0 +1,168 @@ +import SvgIcon from '@/components/svg-icon'; +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, + AlertDialogTrigger, +} from '@/components/ui/alert-dialog'; +import { Badge } from '@/components/ui/badge'; +import { Button } from '@/components/ui/button'; +import { Card } from '@/components/ui/card'; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from '@/components/ui/tooltip'; +import { Eye, Tag, Trash2 } from 'lucide-react'; +import React, { memo } from 'react'; +import type { Skill } from '../types'; + +interface SkillCardProps { + skill: Skill; + onView: (skill: Skill) => void; + onDelete: (skillId: string, skillName: string, folderId?: string) => void; + formatRelative: (timestamp: number) => string; +} + +const SkillCard: React.FC<SkillCardProps> = ({ + skill, + onView, + onDelete, + formatRelative, +}) => { + const fileCount = skill.files.filter((f) => !f.is_dir).length; + const filesLoading = skill.files.length === 0 && (skill as any)._folderId; + + return ( + <TooltipProvider> + <Card + className="cursor-pointer hover:shadow-md transition-all bg-bg-card border border-border rounded-xl p-4" + onClick={() => onView(skill)} + > + <div className="flex gap-4"> + <div className="flex-shrink-0 mt-1"> + <SvgIcon name="home-icon/skill-folder" width={24} height={24} /> + </div> + + <div className="flex-1 min-w-0"> + <div className="flex justify-between items-start"> + <h5 className="font-semibold text-base m-0 mb-2 truncate pr-2"> + {skill.name} + </h5> + + <div + className="flex items-center gap-1" + onClick={(e) => e.stopPropagation()} + > + <Tooltip> + <TooltipTrigger asChild> + <Button + variant="ghost" + size="icon-xs" + onClick={(e: React.MouseEvent) => { + e.stopPropagation(); + onView(skill); + }} + > + <Eye className="size-4" /> + </Button> + </TooltipTrigger> + <TooltipContent>View</TooltipContent> + </Tooltip> + + <AlertDialog> + <Tooltip> + <TooltipTrigger asChild> + <AlertDialogTrigger asChild> + <Button + variant="ghost" + size="icon-xs" + onClick={(e: React.MouseEvent) => e.stopPropagation()} + > + <Trash2 className="size-4 text-state-error" /> + </Button> + </AlertDialogTrigger> + </TooltipTrigger> + <TooltipContent>Delete</TooltipContent> + </Tooltip> + <AlertDialogContent> + <AlertDialogHeader> + <AlertDialogTitle>Delete Skill</AlertDialogTitle> + <AlertDialogDescription> + Are you sure you want to delete this skill? This action + cannot be undone. + </AlertDialogDescription> + </AlertDialogHeader> + <AlertDialogFooter> + <AlertDialogCancel>Cancel</AlertDialogCancel> + <AlertDialogAction + onClick={() => + onDelete( + skill.id, + skill.name, + (skill as any)._folderId, + ) + } + className="bg-state-error hover:bg-state-error/90" + > + Delete + </AlertDialogAction> + </AlertDialogFooter> + </AlertDialogContent> + </AlertDialog> + </div> + </div> + + {skill.description && ( + <p className="text-text-secondary text-sm mb-3 line-clamp-2"> + {skill.description} + </p> + )} + + <div className="flex flex-wrap gap-1 mb-2"> + {skill.metadata?.tags?.slice(0, 4).map((tag) => ( + <Badge key={tag} variant="secondary"> + {tag} + </Badge> + ))} + {skill.metadata?.tags && skill.metadata.tags.length > 4 && ( + <Badge variant="secondary"> + +{skill.metadata.tags.length - 4} + </Badge> + )} + </div> + + <div className="flex justify-between items-center mt-2"> + <span className="text-text-secondary text-xs"> + {filesLoading + ? '...' + : fileCount > 0 + ? `${fileCount} files` + : ''} + </span> + + <div className="flex items-center gap-2"> + {skill.metadata?.version && ( + <Badge variant="outline" className="text-xs"> + <Tag className="size-3 mr-1" />v{skill.metadata.version} + </Badge> + )} + <span className="text-text-secondary text-xs"> + {formatRelative(skill.updated_at)} + </span> + </div> + </div> + </div> + </div> + </Card> + </TooltipProvider> + ); +}; + +export default memo(SkillCard); diff --git a/web/src/pages/skills/components/skill-detail.tsx b/web/src/pages/skills/components/skill-detail.tsx new file mode 100644 index 0000000000..c378a0cb0f --- /dev/null +++ b/web/src/pages/skills/components/skill-detail.tsx @@ -0,0 +1,505 @@ +import { Badge } from '@/components/ui/badge'; +import { Button } from '@/components/ui/button'; +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuTrigger, +} from '@/components/ui/dropdown-menu'; +import { Spin } from '@/components/ui/spin'; +import { TreeDataItem, TreeView } from '@/components/ui/tree-view'; +import { + ArrowBigLeft, + ChevronDown, + FileCode, + FileText, + FolderOpen, + GitBranch, + Tag, +} from 'lucide-react'; +import React, { memo, useCallback, useEffect, useMemo, useState } from 'react'; +import { useTranslation } from 'react-i18next'; +import { isMarkdownFile } from '../hooks'; +import type { Skill, SkillFileEntry } from '../types'; +import CodeViewer from './code-viewer'; +import MarkdownViewer from './markdown-viewer'; + +interface SkillDetailProps { + skill: Skill | null; + open: boolean; + onClose: () => void; + getFileContent: ( + skillId: string, + filePath: string, + version?: string, + skillObj?: Skill, + ) => Promise<string | null>; + getVersionFiles?: ( + skillId: string, + version: string, + skillObj?: Skill, + ) => Promise<SkillFileEntry[]>; +} + +const getFileIcon = (filename: string, isDir: boolean) => { + if (isDir) return FolderOpen; + if (isMarkdownFile(filename)) return FileCode; + return FileText; +}; + +// Build tree from flat file list +const buildFileTree = (files: SkillFileEntry[]): TreeDataItem[] => { + const root: TreeDataItem[] = []; + const map: Record<string, TreeDataItem> = {}; + + // Sort files: directories first, then alphabetically + const sortedFiles = [...files].sort((a, b) => { + if (a.is_dir !== b.is_dir) return a.is_dir ? -1 : 1; + return a.name.localeCompare(b.name); + }); + + sortedFiles.forEach((file) => { + const parts = file.path.split('/'); + const name = parts[parts.length - 1]; + + const node: TreeDataItem = { + name: name, + id: file.path, + icon: getFileIcon(name, file.is_dir), + }; + + if (file.is_dir) { + node.children = []; + } + + map[file.path] = node; + + if (parts.length === 1) { + root.push(node); + } else { + const parentPath = parts.slice(0, -1).join('/'); + const parent = map[parentPath]; + if (parent && parent.children) { + parent.children.push(node); + } + } + }); + + return root; +}; + +const SkillDetail: React.FC<SkillDetailProps> = ({ + skill, + open, + onClose, + getFileContent, + getVersionFiles, +}) => { + const { t } = useTranslation(); + const [selectedFile, setSelectedFile] = useState<string | null>(null); + const [fileContent, setFileContent] = useState<string>(''); + const [loading, setLoading] = useState(false); + const [selectedVersion, setSelectedVersion] = useState<string>(''); + const [versionFiles, setVersionFiles] = useState<SkillFileEntry[]>([]); + const [versionLoading, setVersionLoading] = useState(false); + + // Check if skill has multiple versions + const hasVersions = skill?.versions && skill.versions.length > 0; + const availableVersions = skill?.versions || []; + + // Reset state when skill changes or drawer opens/closes + useEffect(() => { + if (open && skill) { + // Initialize version + // For multi-version skill, check if metadata.version is in availableVersions + // If not, use the first available version + let defaultVersion = skill.metadata?.version || ''; + if (hasVersions) { + if (!defaultVersion || !availableVersions.includes(defaultVersion)) { + defaultVersion = availableVersions[0]; + } + } + setSelectedVersion(defaultVersion); + } else { + // Reset when closed + setSelectedVersion(''); + setVersionFiles([]); + setVersionLoading(false); + setSelectedFile(null); + setFileContent(''); + } + }, [ + open, + skill?.id, + hasVersions, + skill?.metadata?.version, + availableVersions, + ]); + + const resolvedVersion = useMemo(() => { + if (!skill) return ''; + return ( + selectedVersion || skill.metadata?.version || skill.versions?.[0] || '' + ); + }, [selectedVersion, skill?.id, skill?.metadata?.version, skill?.versions]); + + // Load files when version or skill changes + useEffect(() => { + let isActive = true; + + const loadVersionFiles = async () => { + if (!skill || !getVersionFiles) { + if (isActive) { + setVersionFiles([]); + setVersionLoading(false); + } + return; + } + + // Check if skill has _folderId (required for file operations) + if (!(skill as any)._folderId) { + console.warn( + `[Skill Detail] Skill "${skill.name}" has no folder_id. ` + + 'Please reindex skills in settings to fix this issue.', + ); + if (isActive) { + setVersionFiles([]); + setVersionLoading(false); + } + return; + } + + // If it's the default version and skill.files is not empty, use skill.files + // Only for local skills (not search results which have empty files array) + if ( + resolvedVersion === + (skill.metadata?.version || skill.versions?.[0] || '') && + skill.files.length > 0 && + skill.source_type !== 'search' + ) { + if (isActive) { + setVersionFiles(skill.files); + setVersionLoading(false); + } + return; + } + + // Load files for the selected version + if (isActive) setVersionLoading(true); + try { + const versionToLoad = resolvedVersion; + // Pass skill object to handle search results not in skills state + const files = await getVersionFiles(skill.id, versionToLoad, skill); + if (isActive) setVersionFiles(files); + } catch (error) { + console.error('Failed to load version files:', error); + if (isActive) setVersionFiles([]); + } finally { + if (isActive) setVersionLoading(false); + } + }; + + loadVersionFiles(); + + return () => { + isActive = false; + }; + }, [ + skill?.id, + skill?.source_type, + skill?.metadata?.version, + skill?.versions, + (skill as any)?._folderId, + skill?.files, + resolvedVersion, + getVersionFiles, + ]); + + // Use version files if available, otherwise use skill.files + const currentFiles = useMemo(() => { + if (hasVersions && versionFiles.length > 0) { + return versionFiles; + } + if (skill?.files && skill.files.length > 0) { + return skill.files; + } + return versionFiles; + }, [skill?.files, versionFiles, hasVersions]); + + const treeData = useMemo(() => buildFileTree(currentFiles), [currentFiles]); + + const handleSelect = useCallback( + async (item: TreeDataItem | undefined) => { + if (!skill || !item) return; + + const file = currentFiles.find((f) => f.path === item.id); + if (!file || file.is_dir) return; + + setSelectedFile(item.id); + setLoading(true); + + try { + // Pass skill object to handle search results not in skills state + const content = await getFileContent( + skill.id, + file.path, + selectedVersion || undefined, + skill, + ); + setFileContent(content || ''); + } catch (error) { + console.error('Failed to load file content'); + } finally { + setLoading(false); + } + }, + [skill, currentFiles, selectedVersion, getFileContent], + ); + + // Auto-select SKILL.md or README on open + useEffect(() => { + if (open && skill && currentFiles.length > 0 && !selectedFile) { + // Priority: SKILL.md > README.md > index.md + const priorityFiles = ['skill.md', 'readme.md', 'index.md']; + let targetFile: SkillFileEntry | undefined; + + for (const priority of priorityFiles) { + targetFile = currentFiles.find( + (f) => f.name.toLowerCase() === priority && !f.is_dir, + ); + if (targetFile) break; + } + + if (targetFile) { + handleSelect({ id: targetFile.path } as TreeDataItem); + } + } + }, [open, skill?.id, currentFiles.length]); + + const renderFileContent = () => { + if (!selectedFile) { + return ( + <div className="flex flex-col items-center justify-center py-24 text-text-secondary"> + <FileText className="size-12 mb-4 opacity-50" /> + <p>Select a file to view</p> + </div> + ); + } + + if (loading) { + return ( + <div className="flex justify-center py-10"> + <Spin size="large" /> + </div> + ); + } + + const filename = selectedFile.split('/').pop() || ''; + + if (isMarkdownFile(filename)) { + return <MarkdownViewer content={fileContent} />; + } + + return <CodeViewer content={fileContent} filename={filename} />; + }; + + if (!open || !skill) { + return null; + } + + return ( + <div className="fixed inset-0 z-50 flex flex-col bg-bg-base"> + {/* Page Header with Back Button - aligned with container below */} + <header className="flex items-center justify-between px-6 py-4 bg-bg-base"> + <Button variant="outline" onClick={onClose}> + <ArrowBigLeft /> + {t('common.back')} + </Button> + <div className="flex items-center gap-2"> + {hasVersions ? ( + <DropdownMenu> + <DropdownMenuTrigger asChild> + <Button + variant="outline" + size="sm" + className="h-8 px-3 text-xs gap-1" + disabled={versionLoading} + > + <GitBranch className="size-3.5" /> + <span className="max-w-[120px] truncate"> + {t('skills.versionHistory')} + </span> + <ChevronDown className="size-3 ml-1" /> + </Button> + </DropdownMenuTrigger> + <DropdownMenuContent align="end" className="w-[200px]"> + <div className="px-2 py-1.5 text-xs font-medium text-text-secondary border-b border-border mb-1"> + {t('skills.selectVersion')} + </div> + {availableVersions.map((version, index) => ( + <DropdownMenuItem + key={version} + onClick={() => setSelectedVersion(version)} + className={`flex items-center justify-between cursor-pointer ${ + selectedVersion === version ? 'bg-accent-primary/10' : '' + }`} + > + <div className="flex items-center gap-2"> + <Tag className="size-3 text-text-secondary" /> + <span + className={ + selectedVersion === version ? 'font-medium' : '' + } + > + v{version} + </span> + </div> + <div className="flex items-center gap-1"> + {index === 0 && ( + <Badge + variant="secondary" + className="text-[10px] h-4 px-1" + > + {t('skills.latest')} + </Badge> + )} + {selectedVersion === version && ( + <div className="w-1.5 h-1.5 rounded-full bg-accent-primary" /> + )} + </div> + </DropdownMenuItem> + ))} + </DropdownMenuContent> + </DropdownMenu> + ) : ( + skill.metadata?.version && ( + <Badge variant="outline" className="text-xs h-8 px-2"> + <Tag className="size-3 mr-1" />v{skill.metadata.version} + </Badge> + ) + )} + </div> + </header> + + {/* Main Content Area with Border */} + <div className="flex-1 mx-6 mb-4 border-0.5 border-border-button rounded-lg overflow-hidden bg-bg-base"> + <div className="flex h-full"> + {/* Sidebar - File Tree */} + <div className="w-80 border-r border-r-0.5 border-border-button flex flex-col bg-bg-base"> + <div className="p-4 border-b border-b-0.5 border-border-button bg-bg-base"> + <h2 className="font-semibold text-lg truncate">{skill.name}</h2> + {skill.metadata?.description && ( + <p className="text-text-secondary text-xs mt-2"> + {skill.metadata.description} + </p> + )} + <div className="flex flex-wrap gap-1 mt-2"> + {skill.metadata?.tags?.map((tag) => ( + <Badge key={tag} variant="secondary"> + {tag} + </Badge> + ))} + </div> + </div> + + {/* Version History Section */} + {hasVersions && ( + <div className="border-b border-b-0.5 border-border-button bg-bg-secondary/30"> + <div className="px-4 py-2 flex items-center gap-2 text-xs font-medium text-text-secondary"> + <GitBranch className="size-3.5" /> + <span>{t('skills.versionHistory')}</span> + <span className="text-text-tertiary"> + ({availableVersions.length}) + </span> + </div> + <div className="px-2 pb-2 max-h-[120px] overflow-y-auto"> + {availableVersions.map((version, index) => ( + <button + key={version} + onClick={() => setSelectedVersion(version)} + disabled={versionLoading} + className={`w-full flex items-center justify-between px-2 py-1.5 text-xs rounded-md transition-colors ${ + selectedVersion === version + ? 'bg-accent-primary/10 text-accent-primary' + : 'hover:bg-bg-secondary text-text-secondary' + } ${versionLoading ? 'opacity-50 cursor-not-allowed' : 'cursor-pointer'}`} + > + <div className="flex items-center gap-2"> + <Tag className="size-3" /> + <span + className={ + selectedVersion === version ? 'font-medium' : '' + } + > + v{version} + </span> + </div> + <div className="flex items-center gap-1"> + {index === 0 && ( + <Badge + variant="secondary" + className="text-[10px] h-4 px-1" + > + {t('skills.latest')} + </Badge> + )} + {selectedVersion === version && ( + <div className="w-1.5 h-1.5 rounded-full bg-accent-primary" /> + )} + </div> + </button> + ))} + </div> + </div> + )} + + <div className="flex-1 overflow-auto p-2"> + {/* File Tree */} + {versionLoading ? ( + <div className="flex justify-center py-10"> + <Spin size="default" /> + </div> + ) : currentFiles.length === 0 ? ( + <div className="flex flex-col items-center justify-center py-10 text-text-secondary"> + <FolderOpen className="size-8 mb-2 opacity-50" /> + <p className="text-sm"> + {skill?.source_type === 'search' && + !(skill as any)._folderId + ? 'Please reindex skills in settings to view files' + : t('skills.noFiles')} + </p> + </div> + ) : ( + <div> + <p className="text-text-secondary text-xs pl-2 mb-2"> + {t('skills.files')} + {currentFiles.length > 0 && ( + <span className="ml-1 text-text-tertiary"> + ({currentFiles.filter((f) => !f.is_dir).length} files) + </span> + )} + </p> + <TreeView + data={treeData} + initialSelectedItemId={selectedFile || undefined} + onSelectChange={handleSelect} + expandAll + defaultNodeIcon={FolderOpen} + defaultLeafIcon={FileText} + /> + </div> + )} + </div> + </div> + + {/* Main Content */} + <div className="flex-1 overflow-auto p-6 bg-bg-base"> + {renderFileContent()} + </div> + </div> + </div> + </div> + ); +}; + +export default memo(SkillDetail); diff --git a/web/src/pages/skills/components/upload-modal.tsx b/web/src/pages/skills/components/upload-modal.tsx new file mode 100644 index 0000000000..be5f9ea6e0 --- /dev/null +++ b/web/src/pages/skills/components/upload-modal.tsx @@ -0,0 +1,1055 @@ +import { FileUploader } from '@/components/file-uploader'; +import { RAGFlowFormItem } from '@/components/ragflow-form'; +import { Button } from '@/components/ui/button'; +import { Form } from '@/components/ui/form'; +import { Input } from '@/components/ui/input'; +import { Label } from '@/components/ui/label'; +import { Modal } from '@/components/ui/modal/modal'; +import { Progress } from '@/components/ui/progress'; +import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs'; +import { CheckCircle, FolderOpen, Globe, Loader2, XCircle } from 'lucide-react'; +import React, { useCallback, useEffect, useState } from 'react'; +import { useForm } from 'react-hook-form'; +import { useTranslation } from 'react-i18next'; +import { validateSkillFormat } from '../hooks'; +import type { ValidationError } from '../types'; +import { findJunkFiles } from '../validation'; + +interface UploadModalProps { + open: boolean; + onCancel: () => void; + onUpload: (name: string, version: string, files: File[]) => Promise<boolean>; + loading?: boolean; +} + +type GitPlatform = 'github' | 'gitee'; + +interface GitFile { + path: string; + download_url: string; + type: 'file' | 'dir'; + size: number; +} + +const PLATFORM_CONFIG: Record< + GitPlatform, + { name: string; apiBase: string; rawBase: string; defaultBranch: string } +> = { + github: { + name: 'GitHub', + apiBase: 'https://api.github.com', + rawBase: 'https://raw.githubusercontent.com', + defaultBranch: 'main', + }, + gitee: { + name: 'Gitee', + apiBase: 'https://gitee.com/api/v5', + rawBase: 'https://gitee.com', + defaultBranch: 'master', + }, +}; + +interface UploadFormData { + name: string; + version: string; +} + +const UploadModal: React.FC<UploadModalProps> = ({ + open, + onCancel, + onUpload, +}) => { + const { t } = useTranslation(); + const [activeTab, setActiveTab] = useState('upload'); + + // Upload form with react-hook-form + const form = useForm<UploadFormData>({ + defaultValues: { + name: '', + version: '', + }, + }); + const { register, handleSubmit, setValue, watch, reset } = form; + const nameValue = watch('name'); + + const [files, setFiles] = useState<File[]>([]); + const [uploading, setUploading] = useState(false); + const [progress, setProgress] = useState(0); + const [validationStatus, setValidationStatus] = useState< + 'valid' | 'invalid' | 'pending' | null + >(null); + const [validationMessage, setValidationMessage] = useState<string>(''); + const [, setValidationErrors] = useState<ValidationError[]>([]); + const [parsedMetadata, setParsedMetadata] = useState<{ + name?: string; + description?: string; + } | null>(null); + + // Git import tab state + const [gitPlatform, setGitPlatform] = useState<GitPlatform>('github'); + const [repoUrl, setRepoUrl] = useState(''); + const [gitVersion, setGitVersion] = useState(''); + const [gitToken, setGitToken] = useState(''); + const [gitImporting, setGitImporting] = useState(false); + const [gitProgress, setGitProgress] = useState(''); + const [gitValidationStatus, setGitValidationStatus] = useState< + 'valid' | 'invalid' | 'pending' | null + >(null); + const [gitValidationMessage, setGitValidationMessage] = useState<string>(''); + + const validateName = (value: string): boolean | string => { + if (!value) { + return t('skills.skillNameHelp'); + } + if (!/^[a-zA-Z0-9_-]+$/.test(value)) { + return t('skills.skillNameHelp'); + } + return true; + }; + + const validateVersion = (value: string): boolean | string => { + if (!value) { + return t('skills.versionRequired') || 'Version is required'; + } + // Semantic versioning format: x.y.z + if (!/^\d+\.\d+\.\d+/.test(value)) { + return ( + t('skills.versionFormatHelp') || + 'Version must be in semver format (e.g., 1.0.0)' + ); + } + return true; + }; + + const validateGitVersion = (value: string): boolean => { + if (!value) { + return false; + } + return /^\d+\.\d+\.\d+/.test(value); + }; + + const handleOk = handleSubmit(async (data) => { + if (files.length === 0) { + return; + } + + setUploading(true); + setProgress(0); + + try { + const success = await onUpload(data.name, data.version, files); + + if (success) { + reset({ name: '', version: '' }); + setFiles([]); + onCancel(); + } + } catch (error) { + console.error('Upload error:', error); + } finally { + setUploading(false); + setProgress(0); + } + }); + + const handleCancel = useCallback(() => { + if (!uploading && !gitImporting) { + // Reset upload tab state + reset({ name: '', version: '' }); + setFiles([]); + setValidationStatus(null); + setValidationMessage(''); + setValidationErrors([]); + setParsedMetadata(null); + // Reset git import tab state + setActiveTab('upload'); + setRepoUrl(''); + setGitVersion(''); + setGitToken(''); + setGitValidationStatus(null); + setGitValidationMessage(''); + setGitProgress(''); + onCancel(); + } + }, [uploading, gitImporting, onCancel, reset]); + + // Handle files change from FileUploader + const handleFilesChange = useCallback( + (newFiles: File[]) => { + setFiles(newFiles); + + // Auto-fill name from folder name if empty + if (newFiles.length > 0 && !nameValue) { + const firstFile = newFiles[0]; + const path = (firstFile as any).webkitRelativePath || firstFile.name; + const folderName = path.split('/')[0]; + if (folderName) { + setValue('name', folderName, { shouldValidate: true }); + } + } + }, + [nameValue, setValue], + ); + + // Validate files when files change + useEffect(() => { + const validateFilesAsync = async () => { + if (files.length === 0) { + setValidationStatus(null); + setValidationMessage(''); + setValidationErrors([]); + setParsedMetadata(null); + return; + } + + setValidationStatus('pending'); + + try { + // Check for junk files first + const junkFiles = findJunkFiles(files); + if (junkFiles.length > 0) { + setValidationStatus('invalid'); + const fileNames = junkFiles.slice(0, 3).join(', '); + const more = + junkFiles.length > 3 ? ` (+${junkFiles.length - 3} more)` : ''; + setValidationMessage( + `${t('skills.validation.junkFilesFound') || 'Please remove temporary files before uploading'}: ${fileNames}${more}`, + ); + setValidationErrors([]); + setParsedMetadata(null); + return; + } + + const result = await validateSkillFormat(files); + + if (result.valid) { + setValidationStatus('valid'); + setValidationMessage( + t('skills.validation.valid') || 'Valid skill format', + ); + setValidationErrors([]); + setParsedMetadata({ + name: result.name, + description: result.description, + }); + // Auto-fill name if extracted from SKILL.md + if (result.name && !nameValue) { + setValue('name', result.name, { shouldValidate: true }); + } + } else { + setValidationStatus('invalid'); + setParsedMetadata(null); + + // Build detailed error message + let errorMsg = ''; + if (result.details) { + errorMsg = `${t(`skills.validation.${result.error}`) || t('skills.validation.invalid')}: ${result.details}`; + } else { + errorMsg = + t(`skills.validation.${result.error}`) || + t('skills.validation.invalid'); + } + setValidationMessage(errorMsg); + } + } catch (err) { + console.error('Validation error:', err); + setValidationStatus('invalid'); + const errorMsg = err instanceof Error ? err.message : String(err); + setValidationMessage( + `${t('skills.validation.error') || 'Validation failed'}: ${errorMsg}`, + ); + setValidationErrors([]); + setParsedMetadata(null); + } + }; + + validateFilesAsync(); + }, [files, t, nameValue, setValue]); + + const isUploadDisabled = validationStatus === 'invalid' || files.length === 0; + + // ===== Git Import Functions ===== + + // Parse Git repository URL + const parseGitUrl = useCallback((url: string, platform: GitPlatform) => { + const config = PLATFORM_CONFIG[platform]; + + if (platform === 'github') { + // GitHub URL patterns: + // https://github.com/owner/repo + // https://github.com/owner/repo/tree/branch/path + // https://github.com/owner/repo/blob/branch/path/file + const patterns = [ + /github\.com\/([^/]+)\/([^/]+)\/tree\/([^/]+)\/(.+)/, + /github\.com\/([^/]+)\/([^/]+)\/blob\/([^/]+)\/(.+)/, + /github\.com\/([^/]+)\/([^/]+)(?:\/|$)/, + ]; + + for (const pattern of patterns) { + const match = url.match(pattern); + if (match) { + return { + owner: match[1], + repo: match[2].replace('.git', ''), + ref: match[3] || config.defaultBranch, + path: match[4] || '', + }; + } + } + } else if (platform === 'gitee') { + // Gitee URL patterns: + // https://gitee.com/owner/repo + // https://gitee.com/owner/repo/tree/branch/path + // https://gitee.com/owner/repo/blob/branch/path/file + const patterns = [ + /gitee\.com\/([^/]+)\/([^/]+)\/tree\/([^/]+)\/(.+)/, + /gitee\.com\/([^/]+)\/([^/]+)\/blob\/([^/]+)\/(.+)/, + /gitee\.com\/([^/]+)\/([^/]+)(?:\/|$)/, + ]; + + for (const pattern of patterns) { + const match = url.match(pattern); + if (match) { + return { + owner: match[1], + repo: match[2].replace('.git', ''), + ref: match[3] || config.defaultBranch, + path: match[4] || '', + }; + } + } + } + + return null; + }, []); + + // Fetch directory contents recursively from Git API + const fetchGitDirectoryContents = useCallback( + async ( + platform: GitPlatform, + owner: string, + repo: string, + path: string, + ref: string, + token?: string, + ): Promise<GitFile[]> => { + const config = PLATFORM_CONFIG[platform]; + const headers: HeadersInit = { + Accept: 'application/json', + }; + + if (token) { + if (platform === 'github') { + headers.Authorization = `token ${token}`; + } else { + headers['PRIVATE-TOKEN'] = token; + } + } + + let url: string; + if (platform === 'github') { + url = `${config.apiBase}/repos/${owner}/${repo}/contents/${path}?ref=${ref}`; + } else { + url = `${config.apiBase}/repos/${owner}/${repo}/contents/${path}?ref=${ref}`; + if (token) { + url += `&access_token=${token}`; + } + } + + const response = await fetch(url, { headers }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + const message = errorData.message || `HTTP ${response.status}`; + + if (response.status === 403) { + const limit = platform === 'github' ? '60' : '1000'; + throw new Error( + `API rate limit exceeded. ${limit} requests/hour for unauthenticated requests.`, + ); + } + if (response.status === 404) { + throw new Error( + 'Repository or path not found. Please check the URL and ensure the repository is public.', + ); + } + throw new Error(`Failed to fetch: ${message}`); + } + + const items = await response.json(); + const files: GitFile[] = []; + + // Handle single file case + if (!Array.isArray(items)) { + if (items.type === 'file') { + files.push({ + path: items.path, + download_url: items.download_url, + type: 'file', + size: items.size, + }); + } + return files; + } + + for (const item of items) { + if (item.type === 'file') { + files.push({ + path: item.path, + download_url: item.download_url, + type: 'file', + size: item.size, + }); + } else if (item.type === 'dir') { + // Recursively fetch subdirectories + const subFiles = await fetchGitDirectoryContents( + platform, + owner, + repo, + item.path, + ref, + token, + ); + files.push(...subFiles); + } + } + + return files; + }, + [], + ); + + // Infer MIME type from file extension + const getMimeTypeFromExtension = (filePath: string): string => { + const ext = filePath.split('.').pop()?.toLowerCase() ?? ''; + const mimeTypes: Record<string, string> = { + md: 'text/markdown', + mdx: 'text/markdown', + txt: 'text/plain', + json: 'application/json', + json5: 'application/json', + yaml: 'application/yaml', + yml: 'application/yaml', + toml: 'application/toml', + js: 'application/javascript', + cjs: 'application/javascript', + mjs: 'application/javascript', + ts: 'application/typescript', + tsx: 'application/typescript', + jsx: 'application/javascript', + py: 'text/x-python', + sh: 'text/x-shellscript', + rb: 'text/x-ruby', + go: 'text/x-go', + rs: 'text/x-rust', + swift: 'text/x-swift', + kt: 'text/x-kotlin', + java: 'text/x-java', + cs: 'text/x-csharp', + cpp: 'text/x-c++', + c: 'text/x-c', + h: 'text/x-c', + hpp: 'text/x-c++', + sql: 'text/x-sql', + csv: 'text/csv', + ini: 'text/x-ini', + cfg: 'text/x-config', + env: 'text/x-env', + xml: 'application/xml', + html: 'text/html', + htm: 'text/html', + css: 'text/css', + scss: 'text/x-scss', + sass: 'text/x-sass', + svg: 'image/svg+xml', + }; + return mimeTypes[ext] || 'text/plain'; + }; + + // Download file from Git + const downloadGitFile = useCallback( + async ( + platform: GitPlatform, + file: GitFile, + owner: string, + repo: string, + ref: string, + ): Promise<File> => { + let downloadUrl = file.download_url; + const config = PLATFORM_CONFIG[platform]; + + // If download_url is not provided, construct raw URL + if (!downloadUrl) { + if (platform === 'github') { + // https://raw.githubusercontent.com/owner/repo/ref/path + downloadUrl = `${config.rawBase}/${owner}/${repo}/${ref}/${file.path}`; + } else if (platform === 'gitee') { + // https://gitee.com/owner/repo/raw/ref/path + downloadUrl = `${config.rawBase}/${owner}/${repo}/raw/${ref}/${file.path}`; + } + } + + if (!downloadUrl) { + throw new Error(`Download URL not available for file: ${file.path}`); + } + + const response = await fetch(downloadUrl); + if (!response.ok) { + throw new Error( + `Failed to download ${file.path}: ${response.status} ${response.statusText}`, + ); + } + + const blob = await response.blob(); + const fileName = file.path.split('/').pop() || 'file'; + + // Use MIME type from extension if blob.type is empty or generic + let fileType = blob.type; + if ( + !fileType || + fileType === 'application/octet-stream' || + fileType === 'text/plain' + ) { + fileType = getMimeTypeFromExtension(file.path); + } + + const downloadedFile = new File([blob], fileName, { + type: fileType, + }); + + // Add webkitRelativePath to maintain directory structure + Object.defineProperty(downloadedFile, 'webkitRelativePath', { + value: file.path, + writable: false, + }); + + return downloadedFile; + }, + [], + ); + + // Handle Git import + const handleGitImport = useCallback(async () => { + if (!repoUrl || !gitVersion) { + return; + } + + if (!validateGitVersion(gitVersion)) { + setGitValidationStatus('invalid'); + setGitValidationMessage( + t('skills.versionFormatHelp') || + 'Version must be in semver format (e.g., 1.0.0)', + ); + return; + } + + setGitImporting(true); + setGitProgress('Parsing repository URL...'); + setGitValidationStatus(null); + setGitValidationMessage(''); + + try { + const parsed = parseGitUrl(repoUrl, gitPlatform); + if (!parsed) { + throw new Error( + `Invalid ${PLATFORM_CONFIG[gitPlatform].name} URL format`, + ); + } + + const { owner, repo, ref, path } = parsed; + + // 1. Fetch file list from Git API + setGitProgress('Fetching file list...'); + const gitFiles = await fetchGitDirectoryContents( + gitPlatform, + owner, + repo, + path, + ref, + gitToken || undefined, + ); + + if (gitFiles.length === 0) { + throw new Error('No files found in the repository'); + } + + // Filter out common non-skill files + const filteredGitFiles = gitFiles.filter((f) => { + const name = f.path.split('/').pop()?.toLowerCase(); + // Skip common non-code files + if ( + [ + '.gitignore', + 'license', + 'copying', + 'makefile', + 'dockerfile', + ].includes(name || '') + ) { + return false; + } + return true; + }); + + // 2. Download all files + setGitProgress(`Downloading ${filteredGitFiles.length} files...`); + const downloadedFiles: File[] = []; + const downloadErrors: string[] = []; + + for (let i = 0; i < filteredGitFiles.length; i++) { + const file = filteredGitFiles[i]; + setGitProgress( + `Downloading ${i + 1}/${filteredGitFiles.length}: ${file.path}`, + ); + + try { + const downloadedFile = await downloadGitFile( + gitPlatform, + file, + owner, + repo, + ref, + ); + downloadedFiles.push(downloadedFile); + } catch (err) { + const errorMsg = err instanceof Error ? err.message : String(err); + console.warn(`Failed to download ${file.path}:`, err); + downloadErrors.push(`${file.path}: ${errorMsg}`); + } + } + + if (downloadedFiles.length === 0) { + throw new Error( + `No files could be downloaded. Errors:\n${downloadErrors.slice(0, 3).join('\n')}`, + ); + } + + // 3. Validate skill format + setGitProgress('Validating skill format...'); + + const validation = await validateSkillFormat(downloadedFiles); + + if (!validation.valid) { + setGitValidationStatus('invalid'); + const errorKey = `skills.validation.${validation.error}`; + const errorMessage = t(errorKey) || validation.error; + const details = validation.details ? `: ${validation.details}` : ''; + setGitValidationMessage(`${errorMessage}${details}`); + setGitImporting(false); + setGitProgress(''); + return; + } + + setGitValidationStatus('valid'); + setGitValidationMessage( + t('skills.validation.valid') || 'Valid skill format', + ); + + // 4. Upload to RAGFlow + setGitProgress('Uploading to RAGFlow...'); + const skillName = + validation.name || repo.toLowerCase().replace(/[^a-z0-9_-]/g, '-'); + + const success = await onUpload(skillName, gitVersion, downloadedFiles); + + if (success) { + handleCancel(); + } + } catch (error) { + console.error('Git import error:', error); + setGitValidationStatus('invalid'); + setGitValidationMessage( + error instanceof Error ? error.message : 'Import failed', + ); + } finally { + setGitImporting(false); + setGitProgress(''); + } + }, [ + repoUrl, + gitVersion, + gitPlatform, + gitToken, + t, + parseGitUrl, + fetchGitDirectoryContents, + downloadGitFile, + onUpload, + handleCancel, + ]); + + // Check if Git import can be submitted + const isGitImportDisabled = + !repoUrl || !gitVersion || !validateGitVersion(gitVersion) || gitImporting; + + // Handle tab change + const handleTabChange = (value: string) => { + setActiveTab(value); + }; + + return ( + <Modal + open={open} + onOpenChange={(v: boolean) => !v && handleCancel()} + title={t('skills.addSkill') || 'Add Skill'} + showfooter={false} + onCancel={handleCancel} + size="large" + > + <Tabs value={activeTab} onValueChange={handleTabChange} className="mt-4"> + <TabsList className="grid w-full grid-cols-2"> + <TabsTrigger value="upload" disabled={gitImporting}> + <FolderOpen className="mr-2 size-4" /> + {t('skills.upload') || 'Upload'} + </TabsTrigger> + <TabsTrigger value="git" disabled={uploading}> + <Globe className="mr-2 size-4" /> + {t('skills.importFromGit') || 'Import from Git'} + </TabsTrigger> + </TabsList> + + {/* Upload Tab */} + <TabsContent value="upload" className="space-y-4 mt-4"> + <Form {...form}> + <RAGFlowFormItem + name="name" + label={ + <> + {t('skills.skillName')} + <span className="text-state-error ml-1">*</span> + </> + } + rules={{ validate: validateName }} + > + <Input + id="skill-name" + placeholder={t('skills.skillNamePlaceholder')} + disabled={uploading} + {...register('name', { validate: validateName })} + /> + </RAGFlowFormItem> + + <RAGFlowFormItem + name="version" + label={ + <> + {t('skills.skillVersion') || 'Version'} + <span className="text-state-error ml-1">*</span> + </> + } + rules={{ validate: validateVersion }} + > + <Input + id="skill-version" + placeholder={ + t('skills.skillVersionPlaceholder') || 'e.g., 1.0.0' + } + disabled={uploading} + {...register('version', { validate: validateVersion })} + /> + </RAGFlowFormItem> + <p className="text-xs text-text-secondary"> + {t('skills.versionFormatHelp') || + 'Version must be in semver format (e.g., 1.0.0)'} + </p> + </Form> + + <div className="bg-bg-card border border-border-button rounded-lg p-4"> + <p className="font-medium text-sm"> + {t('skills.selectFilesOrFolder')} + </p> + <p className="text-text-secondary text-sm mt-1"> + {t('skills.uploadDescription')} + </p> + </div> + + {/* File Uploader */} + <FileUploader + value={files} + onValueChange={handleFilesChange} + disabled={uploading} + multiple + accept={{ '*/*': [] }} + title={t('skills.dragFilesTitle')} + description={t('skills.dragFilesDescription')} + /> + + {/* Validation Status */} + {validationStatus && ( + <div + className={`border rounded-lg p-4 ${ + validationStatus === 'valid' + ? 'bg-state-success/5 border-state-success/20' + : validationStatus === 'invalid' + ? 'bg-state-error/5 border-state-error/20' + : 'bg-bg-card border-border-button' + }`} + > + <div className="flex items-start gap-3"> + {validationStatus === 'valid' ? ( + <CheckCircle className="size-5 text-state-success flex-shrink-0 mt-0.5" /> + ) : validationStatus === 'invalid' ? ( + <XCircle className="size-5 text-state-error flex-shrink-0 mt-0.5" /> + ) : null} + <div className="flex-1"> + <p + className={`font-medium ${ + validationStatus === 'valid' + ? 'text-state-success' + : validationStatus === 'invalid' + ? 'text-state-error' + : 'text-text-primary' + }`} + > + {validationStatus === 'valid' + ? t('skills.validation.valid') || 'Valid skill format' + : t('skills.validation.invalid') || + 'Invalid skill format'} + </p> + <p className="text-text-secondary text-sm mt-1"> + {validationMessage} + </p> + {parsedMetadata && ( + <div className="mt-3 pt-3 border-t border-border-button"> + <p className="text-text-secondary text-sm font-medium"> + {t('skills.parsedMetadata') || 'Parsed from SKILL.md:'} + </p> + {parsedMetadata.name && ( + <div className="text-sm mt-1"> + <span className="text-text-secondary"> + {t('skills.name') || 'Name'}:{' '} + </span> + <span>{parsedMetadata.name}</span> + </div> + )} + {parsedMetadata.description && ( + <div className="text-sm mt-1"> + <span className="text-text-secondary"> + {t('skills.description') || 'Description'}:{' '} + </span> + <span> + {parsedMetadata.description.slice(0, 100)} + {parsedMetadata.description.length > 100 + ? '...' + : ''} + </span> + </div> + )} + </div> + )} + </div> + </div> + </div> + )} + + {uploading && progress > 0 && ( + <div className="space-y-2"> + <Progress value={progress} /> + <p className="text-text-secondary text-sm text-center"> + {t('skills.uploading')}... + </p> + </div> + )} + + {/* Upload Actions */} + <div className="flex justify-end gap-2 pt-4 border-t border-border-button"> + <Button + variant="outline" + onClick={handleCancel} + disabled={uploading} + > + {t('common.cancel')} + </Button> + <Button + onClick={handleOk} + disabled={isUploadDisabled || uploading} + loading={uploading} + > + {uploading ? t('skills.uploading') : t('common.upload')} + </Button> + </div> + </TabsContent> + + {/* Git Import Tab */} + <TabsContent value="git" className="space-y-4 mt-4"> + {/* Platform Selection */} + <div className="space-y-2"> + <Label>{t('skills.gitPlatform') || 'Platform'}</Label> + <div className="flex gap-2"> + <Button + type="button" + variant={gitPlatform === 'github' ? 'default' : 'outline'} + onClick={() => setGitPlatform('github')} + disabled={gitImporting} + className="flex-1" + > + <Globe className="mr-2 size-4" /> + GitHub + </Button> + <Button + type="button" + variant={gitPlatform === 'gitee' ? 'default' : 'outline'} + onClick={() => setGitPlatform('gitee')} + disabled={gitImporting} + className="flex-1" + > + <Globe className="mr-2 size-4" /> + Gitee + </Button> + </div> + </div> + + {/* Repository URL */} + <div className="space-y-2"> + <Label htmlFor="git-repo-url"> + {t('skills.repoUrl') || 'Repository URL'} + <span className="text-state-error ml-1">*</span> + </Label> + <Input + id="git-repo-url" + placeholder={ + gitPlatform === 'github' + ? 'https://github.com/owner/repo/tree/main/skill-path' + : 'https://gitee.com/owner/repo/tree/master/skill-path' + } + disabled={gitImporting} + value={repoUrl} + onChange={(e) => setRepoUrl(e.target.value)} + /> + <p className="text-xs text-text-secondary"> + {t('skills.repoUrlHelp') || + `Supports: ${PLATFORM_CONFIG[gitPlatform].name} repository URL with optional path`} + </p> + </div> + + {/* Version */} + <div className="space-y-2"> + <Label htmlFor="git-version"> + {t('skills.skillVersion') || 'Version'} + <span className="text-state-error ml-1">*</span> + </Label> + <Input + id="git-version" + placeholder="1.0.0" + disabled={gitImporting} + value={gitVersion} + onChange={(e) => setGitVersion(e.target.value)} + /> + <p className="text-xs text-text-secondary"> + {t('skills.versionFormatHelp') || + 'Version must be in semver format (e.g., 1.0.0)'} + </p> + </div> + + {/* Access Token (Optional) */} + <div className="space-y-2"> + <Label htmlFor="git-token"> + {t('skills.accessToken') || 'Access Token'} + <span className="text-text-secondary ml-1"> + ({t('common.optional') || 'optional'}) + </span> + </Label> + <Input + id="git-token" + type="password" + placeholder={ + gitPlatform === 'github' ? 'ghp_xxxxxxxxxxxx' : 'gitee token' + } + disabled={gitImporting} + value={gitToken} + onChange={(e) => setGitToken(e.target.value)} + /> + <p className="text-xs text-text-secondary"> + {gitPlatform === 'github' + ? t('skills.githubTokenHelp') || + 'For private repos or higher rate limits (5000 req/hour)' + : t('skills.giteeTokenHelp') || + 'For private repos or higher rate limits (2000 req/hour)'} + </p> + </div> + + {/* Rate Limit Info */} + <div className="bg-bg-card border border-border-button rounded-lg p-4"> + <p className="text-sm font-medium"> + {t('skills.rateLimitInfo') || 'Rate Limit Info'} + </p> + <p className="text-text-secondary text-sm mt-1"> + {gitPlatform === 'github' + ? t('skills.githubRateLimit') || + 'Public repos: 60 requests/hour per IP. Use token for 5000 req/hour.' + : t('skills.giteeRateLimit') || + 'Public repos: 1000 requests/hour per IP. Use token for 2000 req/hour.'} + </p> + </div> + + {/* Progress */} + {gitImporting && gitProgress && ( + <div className="bg-bg-card border border-border-button rounded-lg p-4"> + <div className="flex items-center gap-3"> + <Loader2 className="size-5 animate-spin text-accent-primary" /> + <span className="text-sm">{gitProgress}</span> + </div> + </div> + )} + + {/* Validation Status */} + {gitValidationStatus && ( + <div + className={`border rounded-lg p-4 ${ + gitValidationStatus === 'valid' + ? 'bg-state-success/5 border-state-success/20' + : 'bg-state-error/5 border-state-error/20' + }`} + > + <div className="flex items-start gap-3"> + {gitValidationStatus === 'valid' ? ( + <CheckCircle className="size-5 text-state-success flex-shrink-0 mt-0.5" /> + ) : ( + <XCircle className="size-5 text-state-error flex-shrink-0 mt-0.5" /> + )} + <div className="flex-1"> + <p + className={`font-medium ${ + gitValidationStatus === 'valid' + ? 'text-state-success' + : 'text-state-error' + }`} + > + {gitValidationStatus === 'valid' + ? t('skills.validation.valid') || 'Valid' + : t('skills.validation.invalid') || 'Error'} + </p> + <p className="text-text-secondary text-sm mt-1"> + {gitValidationMessage} + </p> + </div> + </div> + </div> + )} + + {/* Git Import Actions */} + <div className="flex justify-end gap-2 pt-4 border-t border-border-button"> + <Button + variant="outline" + onClick={handleCancel} + disabled={gitImporting} + > + {t('common.cancel')} + </Button> + <Button + onClick={handleGitImport} + disabled={isGitImportDisabled} + loading={gitImporting} + > + {gitImporting + ? t('skills.importing') || 'Importing...' + : t('skills.import') || 'Import'} + </Button> + </div> + </TabsContent> + </Tabs> + </Modal> + ); +}; + +export default UploadModal; diff --git a/web/src/pages/skills/hooks.ts b/web/src/pages/skills/hooks.ts new file mode 100644 index 0000000000..099c83b989 --- /dev/null +++ b/web/src/pages/skills/hooks.ts @@ -0,0 +1,1567 @@ +import message from '@/components/ui/message'; +import fileManagerService from '@/services/file-manager-service'; +import skillSpaceService, { + SkillSearchConfig, +} from '@/services/skill-space-service'; +import { getAuthorization } from '@/utils/authorization-util'; +import { useQuery } from '@tanstack/react-query'; +import { useCallback, useEffect, useMemo, useState } from 'react'; +import { useTranslation } from 'react-i18next'; +import type { Skill, SkillFileEntry, SkillMetadata, SkillSpace } from './types'; +import { + filterUploadFiles, + isTextFile, + parseFrontmatter, + validateSkillFormat as validateSkillFormatImpl, +} from './validation'; + +const SKILLS_FOLDER = 'skills'; + +// Helper to get file extension +const getFileExt = (filename: string): string => { + const parts = filename.split('.'); + return parts.length > 1 ? parts.pop()!.toLowerCase() : ''; +}; + +// Helper to check if file is markdown +export const isMarkdownFile = (filename: string): boolean => { + const mdExts = ['md', 'markdown', 'mdown', 'mkd']; + return mdExts.includes(getFileExt(filename)); +}; + +// Helper to parse YAML-like metadata from markdown frontmatter +export const parseMetadata = ( + content: string, +): { metadata: SkillMetadata; body: string } => { + const { metadata, body } = parseFrontmatter(content); + return { metadata, body }; +}; + +// Normalize timestamp-like values from backend to milliseconds. +// Supports epoch seconds, epoch milliseconds and ISO datetime strings. +const toTimestampMs = (value: unknown): number | null => { + if (value === null || value === undefined || value === '') return null; + + const normalizeEpoch = (raw: number): number | null => { + if (!Number.isFinite(raw)) return null; + + let n = raw; + // Convert unit by magnitude: ns -> us -> ms -> s. + // Current epoch in ms is around 1e12. + if (n > 1e17) + n = n / 1e6; // nanoseconds + else if (n > 1e14) + n = n / 1e3; // microseconds + else if (n < 1e11) n = n * 1e3; // seconds + + return Math.round(n); + }; + + if (typeof value === 'number' && Number.isFinite(value)) { + return normalizeEpoch(value); + } + + if (typeof value === 'string') { + const trimmed = value.trim(); + if (!trimmed) return null; + + const numeric = Number(trimmed); + if (!Number.isNaN(numeric)) { + return normalizeEpoch(numeric); + } + + const parsed = Date.parse(trimmed); + return Number.isNaN(parsed) ? null : parsed; + } + + return null; +}; + +const pickSkillTimestamp = (result: any): number => { + const candidates = [ + result?.updated_at, + result?.updatedAt, + result?.update_time, + result?.updateTime, + result?.update_date, + result?.modified_at, + result?.modifiedAt, + result?.metadata?.updated_at, + result?.metadata?.updatedAt, + result?.metadata?.update_time, + result?.metadata?.updateTime, + result?.metadata?.update_date, + result?.skill?.updated_at, + result?.skill?.updatedAt, + result?.skill?.update_time, + result?.skill?.updateTime, + result?.skill?.update_date, + result?.created_at, + result?.createdAt, + result?.create_time, + result?.createTime, + result?.create_date, + result?.metadata?.created_at, + result?.metadata?.createdAt, + result?.metadata?.create_time, + result?.metadata?.createTime, + result?.metadata?.create_date, + result?.skill?.created_at, + result?.skill?.createdAt, + result?.skill?.create_time, + result?.skill?.createTime, + result?.skill?.create_date, + ]; + + for (const candidate of candidates) { + const ts = toTimestampMs(candidate); + if (ts !== null) return ts; + } + + return Date.now(); +}; + +// Export validation function from validation module +export { validateSkillFormatImpl as validateSkillFormat }; + +// Re-export validation utilities for use in components +export { + isMacJunkPath, + isTextFile, + parseFrontmatter, + sanitizeRelPath, +} from './validation'; + +// Query key for file content +const fileContentQueryKey = (fileId: string) => ['skillFileContent', fileId]; + +// Hook to fetch file content using TanStack Query +export const useFileContent = (fileId: string | null) => { + return useQuery({ + queryKey: fileContentQueryKey(fileId || ''), + queryFn: async (): Promise<string | null> => { + if (!fileId) return null; + const response = await fileManagerService.getFile({}, fileId); + const blob = response.data as Blob; + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => resolve(reader.result as string); + reader.onerror = () => reject(reader.error); + reader.readAsText(blob); + }); + }, + enabled: !!fileId, + staleTime: 5 * 60 * 1000, // 5 minutes + }); +}; + +// Hook to manage skills +export const useSkills = () => { + const { t } = useTranslation(); + const [skills, setSkills] = useState<Skill[]>([]); + const [loading, setLoading] = useState(false); + const [searchQuery, setSearchQuery] = useState(''); + + // Fetch file content (kept for backward compatibility) + const fetchFileContent = async (fileId: string): Promise<string | null> => { + try { + const response = await fileManagerService.getFile({}, fileId); + const blob = response.data as Blob; + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => resolve(reader.result as string); + reader.onerror = () => reject(reader.error); + reader.readAsText(blob); + }); + } catch (error) { + console.error('Error fetching file content:', error); + return null; + } + }; + + // Fetch details of a specific skill (with version support) + const fetchSkillDetails = async ( + folderId: string, + folderName: string, + ): Promise<Skill | null> => { + try { + // First, list the skill folder to find version folders + const { data: skillFolderData } = await fileManagerService.listFile({ + parent_id: folderId, + }); + + if (skillFolderData.code !== 0) return null; + + const skillItems = skillFolderData.data?.files || []; + + // Find version folders (folders that match semver pattern like x.y.z) + const versionFolders = skillItems.filter( + (f: any) => f.type === 'folder' && /^\d+\.\d+\.\d+/.test(f.name), + ); + + if (versionFolders.length === 0) { + // No version folders found - fallback to legacy structure + // eslint-disable-next-line @typescript-eslint/no-use-before-define + return fetchSkillDetailsLegacy(folderId, folderName, skillItems); + } + + // Sort versions by version number (descending) + const sortedVersions = versionFolders.sort((a: any, b: any) => { + const va = a.name.split('.').map(Number); + const vb = b.name.split('.').map(Number); + for (let i = 0; i < Math.max(va.length, vb.length); i++) { + const na = va[i] || 0; + const nb = vb[i] || 0; + if (na !== nb) return nb - na; // Descending order + } + return 0; + }); + + const allVersions = sortedVersions.map((v: any) => v.name); + const latestVersionFolder = sortedVersions[0]; + const versionFolderId = latestVersionFolder.id; + const versionName = latestVersionFolder.name; + + // Get all files recursively in the latest version folder + const fileEntries: SkillFileEntry[] = []; + let readmeContent: string | null = null; + let firstFileDate: string | null = null; + + // Recursively fetch all files + const fetchFilesRecursive = async ( + parentId: string, + basePath: string = '', + ) => { + const { data } = await fileManagerService.listFile({ + parent_id: parentId, + }); + if (data.code !== 0) return; + + const files = data.data?.files || []; + + // Track date from first encountered file + if (!firstFileDate && files.length > 0) { + firstFileDate = files[0]?.create_date || files[0]?.update_date; + } + + for (const f of files) { + const path = basePath ? `${basePath}/${f.name}` : f.name; + + fileEntries.push({ + name: f.name, + path: path, + is_dir: f.type === 'folder', + size: f.size || 0, + }); + + // Check for SKILL.md first, then README.md for metadata + const lowerName = f.name.toLowerCase(); + if ( + lowerName === 'skill.md' || + lowerName === 'readme.md' || + lowerName === 'index.md' + ) { + if (!readmeContent) { + readmeContent = await fetchFileContent(f.id); + } + } + + // Recursively fetch subfolder contents + if (f.type === 'folder') { + await fetchFilesRecursive(f.id, path); + } + } + }; + + await fetchFilesRecursive(versionFolderId); + + // Parse metadata from README + let metadata: SkillMetadata = {}; + let description = ''; + + if (readmeContent) { + const parsed = parseMetadata(readmeContent); + metadata = parsed.metadata; + description = metadata.description || parsed.body.slice(0, 200); + } + + // Get dates + const createDate = firstFileDate || new Date().toISOString(); + const updateDate = createDate; + + // Priority: folderName (user-specified) > metadata.name (from SKILL.md) + // This allows users to override the skill name from SKILL.md + const skillName = folderName || metadata.name || 'unnamed-skill'; + + return { + id: skillName, // Use skill name as ID (consistent with search results) + name: skillName, + description, + source_type: 'local', + created_at: new Date(createDate).getTime(), + updated_at: new Date(updateDate).getTime(), + files: fileEntries, + metadata: { ...metadata, version: versionName }, + versions: allVersions, + _folderId: folderId, // Internal use for file operations + }; + } catch (error) { + console.error('Error fetching skill details:', error); + return null; + } + }; + + // Legacy fetch for skills without version structure + const fetchSkillDetailsLegacy = async ( + folderId: string, + folderName: string, + skillItems: any[], + ): Promise<Skill | null> => { + try { + const fileEntries: SkillFileEntry[] = []; + let readmeContent: string | null = null; + let firstFileDate: string | null = null; + + // Recursively fetch all files + const fetchFilesRecursive = async ( + parentId: string, + basePath: string = '', + ) => { + const { data } = await fileManagerService.listFile({ + parent_id: parentId, + }); + if (data.code !== 0) return; + + const files = data.data?.files || []; + + if (!firstFileDate && files.length > 0) { + firstFileDate = files[0]?.create_date || files[0]?.update_date; + } + + for (const f of files) { + const path = basePath ? `${basePath}/${f.name}` : f.name; + + fileEntries.push({ + name: f.name, + path: path, + is_dir: f.type === 'folder', + size: f.size || 0, + }); + + const lowerName = f.name.toLowerCase(); + if ( + lowerName === 'skill.md' || + lowerName === 'readme.md' || + lowerName === 'index.md' + ) { + if (!readmeContent) { + readmeContent = await fetchFileContent(f.id); + } + } + + if (f.type === 'folder') { + await fetchFilesRecursive(f.id, path); + } + } + }; + + // Process items from the skill folder + for (const f of skillItems) { + if (f.type === 'folder') { + await fetchFilesRecursive(f.id, f.name); + } else { + fileEntries.push({ + name: f.name, + path: f.name, + is_dir: false, + size: f.size || 0, + }); + + const lowerName = f.name.toLowerCase(); + if ( + lowerName === 'skill.md' || + lowerName === 'readme.md' || + lowerName === 'index.md' + ) { + if (!readmeContent) { + readmeContent = await fetchFileContent(f.id); + } + } + } + } + + let metadata: SkillMetadata = {}; + let description = ''; + + if (readmeContent) { + const parsed = parseMetadata(readmeContent); + metadata = parsed.metadata; + description = metadata.description || parsed.body.slice(0, 200); + } + + const createDate = firstFileDate || new Date().toISOString(); + + // Priority: folderName (user-specified) > metadata.name (from SKILL.md) + // This allows users to override the skill name from SKILL.md + const skillName = folderName || metadata.name || 'unnamed-skill'; + + return { + id: skillName, // Use skill name as ID (consistent with search results) + name: skillName, + description, + source_type: 'local', + created_at: new Date(createDate).getTime(), + updated_at: new Date(createDate).getTime(), + files: fileEntries, + metadata, + _folderId: folderId, // Internal use for file operations + }; + } catch (error) { + console.error('Error fetching legacy skill details:', error); + return null; + } + }; + + // Ensure skills folder exists, returns folder ID + const ensureSkillsFolder = useCallback(async (): Promise<string | null> => { + try { + // List root files to find skills folder + const { data } = await fileManagerService.listFile({}); + + if (data.code !== 0) return null; + + const rootId = data.data?.parent_folder?.id; + const files = data.data?.files || []; + + // Check if skills folder exists + const skillsFolder = files.find( + (f: any) => f.name === SKILLS_FOLDER && f.type === 'folder', + ); + + if (skillsFolder) { + return skillsFolder.id; + } + + // Create skills folder + const createRes = await fileManagerService.createFolder({ + name: SKILLS_FOLDER, + type: 'folder', + parent_id: rootId, + }); + + if (createRes.data.code === 0) { + return createRes.data.data?.id || null; + } + + return null; + } catch (error) { + console.error('Error ensuring skills folder:', error); + return null; + } + }, []); + + const fetchSpaces = useCallback(async (): Promise<SkillSpace[]> => { + try { + const result = await skillSpaceService.listSpaces(); + return result.spaces.map((space) => ({ + id: space.id, + name: space.name, + create_time: space.create_time, + folder_id: space.folder_id, + })); + } catch (error) { + console.error('Error fetching skill hubs:', error); + return []; + } + }, []); + + const ensureSkillSpaceFolder = useCallback( + async ( + spaceName: string, + createIfMissing = false, + ): Promise<string | null> => { + const skillsFolderId = await ensureSkillsFolder(); + if (!skillsFolderId) return null; + + const { data } = await fileManagerService.listFile({ + parent_id: skillsFolderId, + }); + + if (data.code !== 0) return null; + + const hubFolder = (data.data?.files || []).find( + (f: any) => f.name === spaceName && f.type === 'folder', + ); + if (hubFolder) return hubFolder.id; + + if (!createIfMissing) return null; + + const createRes = await fileManagerService.createFolder({ + name: spaceName, + type: 'folder', + parent_id: skillsFolderId, + }); + + if (createRes.data.code !== 0) return null; + return createRes.data.data?.id || null; + }, + [ensureSkillsFolder], + ); + + const createSpace = useCallback( + async (spaceName: string): Promise<{ id: string; name: string } | null> => { + try { + const space = await skillSpaceService.createSpace({ name: spaceName }); + message.success( + t('skills.spaceCreated') || 'Skill Space created successfully', + ); + return space; + } catch (error: any) { + console.error('Error creating skill space:', error); + message.error(error.message || t('skills.fetchError')); + return null; + } + }, + [t], + ); + + // Delete a skill space + const deleteSpace = useCallback( + async (spaceId: string): Promise<boolean> => { + try { + await skillSpaceService.deleteSpace(spaceId); + message.success( + t('skills.spaceDeleted') || 'Skill Space deleted successfully', + ); + return true; + } catch (error: any) { + console.error('Error deleting skill space:', error); + message.error(error.message || t('skills.fetchError')); + return false; + } + }, + [t], + ); + + // Update a skill space (rename) + const updateSpace = useCallback( + async (spaceId: string, spaceName: string): Promise<boolean> => { + try { + await skillSpaceService.updateSpace(spaceId, { name: spaceName }); + message.success( + t('skills.spaceUpdated') || 'Skill Space renamed successfully', + ); + return true; + } catch (error: any) { + console.error('Error updating skill space:', error); + message.error(error.message || t('skills.fetchError')); + return false; + } + }, + [t], + ); + + // Fetch skills from file system (fallback when search returns empty) + const fetchSkillsFromFileSystem = useCallback( + async (spaceName?: string): Promise<{ skills: Skill[]; total: number }> => { + if (!spaceName) { + return { skills: [], total: 0 }; + } + try { + const spaceFolderId = await ensureSkillSpaceFolder(spaceName, false); + if (!spaceFolderId) { + return { skills: [], total: 0 }; + } + + const { data } = await fileManagerService.listFile({ + parent_id: spaceFolderId, + }); + + const skillFolders = + data.code === 0 + ? data.data?.files?.filter((f: any) => f.type === 'folder') || [] + : []; + + // Fetch details for each skill + const skillsData: Skill[] = ( + await Promise.all( + skillFolders.map(async (folder: any) => { + const skill = await fetchSkillDetails(folder.id, folder.name); + return skill; + }), + ) + ).filter(Boolean); + + return { skills: skillsData, total: skillsData.length }; + } catch (error) { + console.error('Error fetching skills from file system:', error); + return { skills: [], total: 0 }; + } + }, + // eslint-disable-next-line react-hooks/exhaustive-deps + [ensureSkillSpaceFolder], + ); + + // Fetch skills using search API (supports pagination and sorting) + // Falls back to file system if search returns empty (skills not indexed yet) + const fetchSkills = useCallback( + async ( + spaceName?: string, + spaceId?: string, + page = 1, + pageSize = 50, + sortBy = 'update_time', + sortOrder: 'asc' | 'desc' = 'desc', + ) => { + if (!spaceName || !spaceId) { + setSkills([]); + return { skills: [], total: 0 }; + } + setLoading(true); + try { + // Use search API with empty query to list all skills + const response = await fetch('/api/v1/skills/search', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: getAuthorization(), + }, + body: JSON.stringify({ + space_id: spaceId, + query: '', // Empty query = list all + page, + page_size: pageSize, + sort_by: sortBy, + sort_order: sortOrder, + }), + }); + + if (!response.ok) { + throw new Error('Failed to fetch skills'); + } + + const result = await response.json(); + if (result.code !== 0) { + throw new Error(result.message || 'Failed to fetch skills'); + } + + const searchSkills = result.data?.skills || []; + const total = result.data?.total || 0; + + // If search returned results, use them + if (searchSkills.length > 0) { + const skillsData: Skill[] = searchSkills.map((result: any) => { + const timestamp = pickSkillTimestamp(result); + const skillId = result.skill_id || result.name; + + return { + id: skillId, + name: result.name, + description: result.description || '', + source_type: 'search', + created_at: timestamp, + updated_at: timestamp, + metadata: { + tags: result.tags || [], + version: result.version, + }, + files: [], + _folderId: result.folder_id, + }; + }); + + setSkills(skillsData); + + // Asynchronously load file counts for search results + Promise.all( + skillsData.map(async (skill) => { + if (!skill._folderId) return skill; + try { + const detail = await fetchSkillDetails( + skill._folderId, + skill.name, + ); + if (detail) { + return { ...skill, files: detail.files }; + } + } catch { + // Ignore errors for individual skill file loading + } + return skill; + }), + ).then((enrichedSkills) => { + setSkills(enrichedSkills); + }); + + return { skills: skillsData, total }; + } + + // Search returned empty, fall back to file system + // eslint-disable-next-line no-console + console.log( + '[Skills] Search returned empty, falling back to file system', + ); + const fsResult = await fetchSkillsFromFileSystem(spaceName); + setSkills(fsResult.skills); + return fsResult; + } catch (error) { + console.error('Error fetching skills:', error); + // Fall back to file system on error + const fsResult = await fetchSkillsFromFileSystem(spaceName); + setSkills(fsResult.skills); + return fsResult; + } finally { + setLoading(false); + } + }, + // eslint-disable-next-line react-hooks/exhaustive-deps + [t, fetchSkillsFromFileSystem], + ); + + // Upload a new skill with proper directory structure (with version support) + const uploadSkill = useCallback( + async ( + name: string, + version: string, + files: File[], + spaceName?: string, + spaceId?: string, + embdId?: string, + ): Promise<boolean> => { + try { + setLoading(true); + if (!spaceName) throw new Error('Space name is required'); + + // Use spaceName for file system operations, spaceId for indexing + const normalizedSpaceName = spaceName.trim(); + const normalizedSpaceId = spaceId?.trim() || normalizedSpaceName; + + // Filter out ignored/junk files first + const filteredFiles = filterUploadFiles(files); + + // Validate skill format + const validation = await validateSkillFormatImpl(filteredFiles); + if (!validation.valid) { + const errorKey = `skills.validation.${validation.error}`; + const errorMsg = t(errorKey) || t('skills.validation.invalid'); + message.error(errorMsg); + return false; + } + + // Get space folder ID (using space name for file system) + const spaceFolderId = await ensureSkillSpaceFolder( + normalizedSpaceName, + true, + ); + + if (!spaceFolderId) throw new Error('Skills space not found'); + + const skillNameNormalized = name.replace(/\s+/g, '-').toLowerCase(); + + // Check if skill folder exists + const { data: existingData } = await fileManagerService.listFile({ + parent_id: spaceFolderId, + }); + + let skillFolderId: string; + + if (existingData.code === 0) { + const existingSkill = existingData.data?.files?.find( + (f: any) => f.name === skillNameNormalized && f.type === 'folder', + ); + + if (existingSkill) { + // Skill exists, check if version already exists + const { data: versionData } = await fileManagerService.listFile({ + parent_id: existingSkill.id, + }); + + if (versionData.code === 0) { + const existingVersion = versionData.data?.files?.find( + (f: any) => f.name === version && f.type === 'folder', + ); + + if (existingVersion) { + message.error( + t('skills.versionExists') || 'This version already exists', + ); + return false; + } + } + + skillFolderId = existingSkill.id; + } else { + // Create skill folder + const folderRes = await fileManagerService.createFolder({ + name: skillNameNormalized, + type: 'folder', + parent_id: spaceFolderId, + }); + + if (folderRes.data.code !== 0) { + throw new Error('Failed to create skill folder'); + } + + skillFolderId = folderRes.data.data?.id; + } + } else { + throw new Error('Failed to list skills folder'); + } + + if (!skillFolderId) throw new Error('Failed to get skill folder ID'); + + // Create version folder + const versionRes = await fileManagerService.createFolder({ + name: version, + type: 'folder', + parent_id: skillFolderId, + }); + + if (versionRes.data.code !== 0) { + throw new Error('Failed to create version folder'); + } + + const versionFolderId = versionRes.data.data?.id; + + if (!versionFolderId) + throw new Error('Failed to get version folder ID'); + + // Upload files recursively to preserve directory structure + const uploadFileWithStructure = async ( + file: File, + parentId: string, + ) => { + const relativePath = (file as any).webkitRelativePath || file.name; + const pathParts = relativePath.split('/'); + + // If file is in root directory (no subdirectories) + if (pathParts.length === 1) { + const formData = new FormData(); + formData.append('parent_id', parentId); + formData.append('file', file); + await fileManagerService.uploadFile(formData); + return; + } + + // Navigate/create directory structure + let currentParentId = parentId; + for (let i = 0; i < pathParts.length - 1; i++) { + const dirName = pathParts[i]; + + // List current directory to check if subdirectory exists + const { data: listData } = await fileManagerService.listFile({ + parent_id: currentParentId, + }); + + if (listData.code !== 0) { + throw new Error(`Failed to list directory: ${dirName}`); + } + + const existingDir = listData.data?.files?.find( + (f: any) => f.name === dirName && f.type === 'folder', + ); + + if (existingDir) { + currentParentId = existingDir.id; + } else { + // Create subdirectory + const createRes = await fileManagerService.createFolder({ + name: dirName, + type: 'folder', + parent_id: currentParentId, + }); + + if (createRes.data.code !== 0) { + throw new Error(`Failed to create directory: ${dirName}`); + } + + currentParentId = createRes.data.data?.id; + } + } + + // Upload file to the final directory + const formData = new FormData(); + formData.append('parent_id', currentParentId); + formData.append('file', file); + await fileManagerService.uploadFile(formData); + }; + + // Upload all files sequentially to avoid race conditions + for (const file of filteredFiles) { + await uploadFileWithStructure(file, versionFolderId); + } + + // Build search index for the uploaded skill + try { + // Read all text files and build content + let skillMetadata: SkillMetadata = {}; + let skillDescription = ''; + const fileContents: { path: string; content: string }[] = []; + + for (const file of filteredFiles) { + const relativePath = (file as any).webkitRelativePath || file.name; + if (!isTextFile(relativePath, file.type)) { + continue; + } + + const content = await file.text(); + fileContents.push({ path: relativePath, content }); + + // Parse metadata from skill.md/readme.md/index.md + const lowerName = file.name.toLowerCase(); + if ( + lowerName === 'skill.md' || + lowerName === 'readme.md' || + lowerName === 'index.md' + ) { + const parsed = parseMetadata(content); + skillMetadata = parsed.metadata; + skillDescription = + skillMetadata.description || parsed.body.slice(0, 200); + } + } + + // Build concatenated content for indexing + const concatenatedContent = fileContents + .map((f) => `${f.path}\n===\n${f.content}`) + .join('\n\n'); + + // Index the skill with embd_id from config (if available) + // Use user-specified name (skillNameNormalized) as skill ID and name + // This ensures consistency between folder name, skill ID, and display name + const indexResponse = await fetch('/api/v1/skills/index', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: getAuthorization(), + }, + body: JSON.stringify({ + space_id: normalizedSpaceId, + embd_id: embdId, + skills: [ + { + id: skillNameNormalized, + folder_id: skillFolderId, + name: skillNameNormalized, + description: skillDescription, + tags: skillMetadata.tags || [], + content: concatenatedContent, + }, + ], + }), + }); + + if (!indexResponse.ok) { + console.warn( + '[Skill Index] Failed to index skill:', + await indexResponse.text(), + ); + } + } catch (indexError) { + // Indexing failure should not block upload success + console.warn('[Skill Index] Error indexing skill:', indexError); + } + + message.success(t('skills.uploadSuccess')); + await fetchSkills(normalizedSpaceName, normalizedSpaceId); + return true; + } catch (error) { + console.error('Error uploading skill:', error); + message.error(t('skills.uploadError')); + return false; + } finally { + setLoading(false); + } + }, + [t, fetchSkills, ensureSkillSpaceFolder], + ); + + // Delete a skill + const deleteSkill = useCallback( + async ( + skillId: string, + _skillName?: string, + spaceId?: string, + spaceName?: string, + folderId?: string, + ): Promise<boolean> => { + try { + if (!spaceId) throw new Error('Space ID is required'); + if (!spaceName) throw new Error('Space name is required'); + const normalizedSpaceId = spaceId.trim(); + const normalizedSpaceName = spaceName.trim(); + + let targetFolderId: string | null = folderId || null; + + // If folderId not provided, try to find the skill in current skills state + if (!targetFolderId) { + const skillInState = skills.find((s) => s.id === skillId); + if (skillInState && (skillInState as any)._folderId) { + targetFolderId = (skillInState as any)._folderId; + } + } + + // Fallback: search in file system if not found + if (!targetFolderId) { + const spaceFolderId = await ensureSkillSpaceFolder( + normalizedSpaceName, + false, + ); + if (spaceFolderId) { + const { data: listData } = await fileManagerService.listFile({ + parent_id: spaceFolderId, + }); + + if (listData.code === 0) { + const skillFolder = (listData.data?.files || []).find( + (f: any) => f.type === 'folder' && f.name === skillId, + ); + if (skillFolder) { + targetFolderId = skillFolder.id; + } + } + } + } + + if (!targetFolderId) { + throw new Error('Skill not found'); + } + + // Get versions by listing the skill folder + const { data: versionData } = await fileManagerService.listFile({ + parent_id: targetFolderId, + }); + + let versionsToDelete: string[] = ['latest']; + if (versionData.code === 0) { + const versionFolders = (versionData.data?.files || []).filter( + (f: any) => f.type === 'folder' && /^\d+\.\d+\.\d+/.test(f.name), + ); + if (versionFolders.length > 0) { + versionsToDelete = versionFolders.map((f: any) => f.name); + } + } + + // Delete search index for all versions + // Backend uses skillName_version as doc_id (replacing '/' with '_') + // We need to delete each version's index separately + // eslint-disable-next-line no-console + console.log( + `[deleteSkill] Starting index deletion for skillId: ${skillId}, spaceId: ${normalizedSpaceId}`, + ); + // eslint-disable-next-line no-console + console.log(`[deleteSkill] versionsToDelete:`, versionsToDelete); + + for (const version of versionsToDelete) { + const indexId = + version === 'latest' ? skillId : `${skillId}/${version}`; + try { + // eslint-disable-next-line no-console + console.log( + `[deleteSkill] Deleting index: ${indexId} for space: ${normalizedSpaceId}`, + ); + await skillSpaceService.deleteSkillIndex( + indexId, + normalizedSpaceId, + ); + // eslint-disable-next-line no-console + console.log(`[deleteSkill] Successfully deleted index: ${indexId}`); + } catch (indexError: any) { + // eslint-disable-next-line no-console + console.warn( + `[deleteSkill] Error deleting skill index for ${indexId}:`, + indexError?.message || indexError, + ); + } + } + + // If we couldn't determine versions from filesystem, try common version formats + if (versionsToDelete.length === 1 && versionsToDelete[0] === 'latest') { + // Try to delete the skill with version suffixes + const commonVersions = ['1.0.0', '0.1.0', '0.0.1', 'latest']; + for (const version of commonVersions) { + const indexId = `${skillId}/${version}`; + try { + // eslint-disable-next-line no-console + console.log( + `[deleteSkill] Trying to delete index with version: ${indexId}`, + ); + await skillSpaceService.deleteSkillIndex( + indexId, + normalizedSpaceId, + ); + // eslint-disable-next-line no-console + console.log( + `[deleteSkill] Successfully deleted index: ${indexId}`, + ); + } catch { + // Ignore errors for versions that don't exist + } + } + } + + const { data } = await fileManagerService.removeFile({ + ids: [targetFolderId], + }); + + if (data.code !== 0) throw new Error('Failed to delete skill'); + + message.success(t('skills.deleteSuccess')); + // Refresh skills list using space name and space id + await fetchSkills(normalizedSpaceName, normalizedSpaceId); + return true; + } catch (error) { + console.error('Error deleting skill:', error); + message.error(t('skills.deleteError')); + return false; + } + }, + // eslint-disable-next-line react-hooks/exhaustive-deps + [t, fetchSkills, ensureSkillSpaceFolder, skills], + ); + + // Recursively find file by path in folder structure + // For versioned skills, automatically finds the version folder first + const findFileByPath = async ( + folderId: string, + targetPath: string, + version?: string, + ): Promise<any | null> => { + let currentFolderId = folderId; + + // If version is provided, first find the version folder + if (version) { + const { data } = await fileManagerService.listFile({ + parent_id: currentFolderId, + }); + if (data.code !== 0) return null; + + const files = data.data?.files || []; + const versionFolder = files.find( + (f: any) => f.name === version && f.type === 'folder', + ); + + if (!versionFolder) return null; + currentFolderId = versionFolder.id; + } else { + // No version specified, try to find the latest version folder + const { data } = await fileManagerService.listFile({ + parent_id: currentFolderId, + }); + if (data.code !== 0) return null; + + const files = data.data?.files || []; + const versionFolders = files.filter( + (f: any) => f.type === 'folder' && /^\d+\.\d+\.\d+/.test(f.name), + ); + + if (versionFolders.length > 0) { + // Sort by version number (descending) to get the latest + const sortedVersions = versionFolders.sort((a: any, b: any) => { + const va = a.name.split('.').map(Number); + const vb = b.name.split('.').map(Number); + for (let i = 0; i < Math.max(va.length, vb.length); i++) { + const na = va[i] || 0; + const nb = vb[i] || 0; + if (na !== nb) return nb - na; // Descending order + } + return 0; + }); + currentFolderId = sortedVersions[0].id; + } + // If no version folders found, stay at current level (legacy structure) + } + + // Now find the file in the version folder (or original folder if no version) + const parts = targetPath.split('/'); + + for (let i = 0; i < parts.length; i++) { + const { data } = await fileManagerService.listFile({ + parent_id: currentFolderId, + }); + if (data.code !== 0) return null; + + const files = data.data?.files || []; + const part = parts[i]; + + // Check if this is the last part (the file) + if (i === parts.length - 1) { + const file = files.find((f: any) => f.name === part); + return file || null; + } + + // This is a folder, find it and continue + const subFolder = files.find( + (f: any) => f.name === part && f.type === 'folder', + ); + if (!subFolder) return null; + currentFolderId = subFolder.id; + } + + return null; + }; + + // Get file content for a skill + // Automatically handles versioned skills by checking skill.metadata.version + // Can be called with an optional skill object (for search results not in skills state) + const getSkillFileContent = useCallback( + async ( + skillId: string, + filePath: string, + version?: string, + skillObj?: Skill, + ): Promise<string | null> => { + try { + // Find the skill to get its folder ID + // Use provided skill object if available (for search results), otherwise look up in skills state + const skill = skillObj || skills.find((s) => s.id === skillId); + if (!skill) return null; + + // Use internal _folderId for file operations + const folderId = (skill as any)._folderId; + if (!folderId) return null; + + // If version is not provided, try to find it from the skill or auto-discover + let targetVersion = version; + if (!targetVersion) { + targetVersion = skill?.metadata?.version; + } + + // Handle both file name and file path + const file = await findFileByPath(folderId, filePath, targetVersion); + if (!file) return null; + return await fetchFileContent(file.id); + } catch (error) { + console.error('Error getting skill file content:', error); + return null; + } + }, + [skills], + ); + + // Fetch files for a specific version of a skill + // Can be called with an optional skill object (for search results not in skills state) + const getSkillVersionFiles = useCallback( + async ( + skillId: string, + version: string, + skillObj?: Skill, + ): Promise<SkillFileEntry[]> => { + try { + // Find the skill to get its folder ID + // Use provided skill object if available (for search results), otherwise look up in skills state + const skill = skillObj || skills.find((s) => s.id === skillId); + if (!skill) return []; + + // Use internal _folderId for file operations + const folderId = (skill as any)._folderId; + if (!folderId) return []; + + // First, list the skill folder to find the version folder + const { data: skillFolderData } = await fileManagerService.listFile({ + parent_id: folderId, + }); + + if (skillFolderData.code !== 0) return []; + + const skillItems = skillFolderData.data?.files || []; + + // If version is not provided, find the latest version folder + let targetVersion = version; + if (!targetVersion) { + // Find all version folders (matching semver pattern x.y.z) + const versionFolders = skillItems.filter( + (f: any) => f.type === 'folder' && /^\d+\.\d+\.\d+/.test(f.name), + ); + if (versionFolders.length === 0) return []; + + // Sort by version number (descending) to get the latest + const sortedVersions = versionFolders.sort((a: any, b: any) => { + const va = a.name.split('.').map(Number); + const vb = b.name.split('.').map(Number); + for (let i = 0; i < Math.max(va.length, vb.length); i++) { + const na = va[i] || 0; + const nb = vb[i] || 0; + if (na !== nb) return nb - na; // Descending order + } + return 0; + }); + targetVersion = sortedVersions[0].name; + } + + const versionFolder = skillItems.find( + (f: any) => f.name === targetVersion && f.type === 'folder', + ); + + if (!versionFolder) return []; + + const fileEntries: SkillFileEntry[] = []; + + // Recursively fetch all files in the version folder + const fetchFilesRecursive = async ( + parentId: string, + basePath: string = '', + ) => { + const { data } = await fileManagerService.listFile({ + parent_id: parentId, + }); + if (data.code !== 0) return; + + const files = data.data?.files || []; + + for (const f of files) { + const path = basePath ? `${basePath}/${f.name}` : f.name; + + fileEntries.push({ + name: f.name, + path: path, + is_dir: f.type === 'folder', + size: f.size || 0, + }); + + if (f.type === 'folder') { + await fetchFilesRecursive(f.id, path); + } + } + }; + + await fetchFilesRecursive(versionFolder.id); + return fileEntries; + } catch (error) { + console.error('Error fetching skill version files:', error); + return []; + } + }, + [skills], + ); + + // Filter skills by search query + const filteredSkills = useMemo( + () => + skills.filter( + (skill) => + skill.name.toLowerCase().includes(searchQuery.toLowerCase()) || + skill.description + ?.toLowerCase() + .includes(searchQuery.toLowerCase()) || + skill.metadata?.tags?.some((tag) => + tag.toLowerCase().includes(searchQuery.toLowerCase()), + ), + ), + [skills, searchQuery], + ); + + // Fetch skills on mount + useEffect(() => { + fetchSkills(); + }, [fetchSkills]); + + // Get skill details by folder ID and name (for loading versions) + const getSkillDetails = useCallback( + async (folderId: string, folderName: string): Promise<Skill | null> => { + return await fetchSkillDetails(folderId, folderName); + }, + // eslint-disable-next-line react-hooks/exhaustive-deps + [], + ); + + return { + skills, + filteredSkills, + loading, + searchQuery, + setSearchQuery, + fetchSpaces, + createSpace, + deleteSpace, + updateSpace, + fetchSkills, + uploadSkill, + deleteSkill, + getSkillFileContent, + getSkillVersionFiles, + getSkillDetails, + }; +}; + +// Query key for skill search config +const skillSearchConfigQueryKey = (spaceId: string, embdId?: string) => + ['skillSearchConfig', spaceId, embdId].filter(Boolean); + +// Skill Search Config Hook +export const useSkillSearchConfig = (spaceId?: string) => { + const { t } = useTranslation(); + const [saving, setSaving] = useState(false); + + // Use TanStack Query to fetch and cache config + const { + data: config, + refetch: refetchConfig, + isLoading: configLoading, + } = useQuery({ + queryKey: skillSearchConfigQueryKey(spaceId || '', undefined), + queryFn: async () => { + if (!spaceId) return null; + const data = await skillSpaceService.getConfig(spaceId); + return data as SkillSearchConfig | null; + }, + enabled: !!spaceId, + staleTime: 5 * 60 * 1000, // 5 minutes + }); + + // Fetch config with optional embdId (refreshes the query) + const fetchConfig = useCallback( + async (_embdId?: string, currentSpaceId?: string) => { + const targetSpaceId = currentSpaceId || spaceId; + if (!targetSpaceId) return null; + const { data } = await refetchConfig(); + return data as SkillSearchConfig | null; + }, + [spaceId, refetchConfig], + ); + + // Save config + const saveConfig = useCallback( + async (configData: SkillSearchConfig): Promise<boolean> => { + try { + setSaving(true); + if (!spaceId) throw new Error('Space ID is required'); + await skillSpaceService.updateConfig({ + ...configData, + space_id: spaceId, + }); + // Refetch config after save + await refetchConfig(); + message.success(t('skillSearch.saveSuccess')); + return true; + } catch (error: any) { + console.error('Error saving skill search config:', error); + message.error(error.message || t('skillSearch.saveError')); + return false; + } finally { + setSaving(false); + } + }, + [t, spaceId, refetchConfig], + ); + + // Reindex all skills + const reindex = useCallback( + async (embdId?: string): Promise<boolean> => { + try { + setSaving(true); + if (!spaceId) throw new Error('Space ID is required'); + await skillSpaceService.reindex({ + skills: [], + space_id: spaceId, + embd_id: embdId, + }); + message.success(t('skillSearch.reindexSuccess')); + return true; + } catch (error: any) { + console.error('Error reindexing skills:', error); + message.error(error.message || t('skillSearch.reindexError')); + return false; + } finally { + setSaving(false); + } + }, + [t, spaceId], + ); + + // Initialize index + const initializeIndex = useCallback(async (): Promise<boolean> => { + try { + if (!spaceId) throw new Error('Space ID is required'); + // Initialize index is now handled automatically when creating index + // Call index API directly to ensure index exists + // embd_id will be fetched from skill search config by backend + await skillSpaceService.indexSkills({ skills: [], space_id: spaceId }); + return true; + } catch (error) { + console.error('Error initializing skill search index:', error); + return false; + } + }, [spaceId]); + + // Search skills + const searchSkills = useCallback( + async (query: string, page = 1, pageSize = 10) => { + try { + if (!spaceId) return { skills: [], total: 0 }; + const data = await skillSpaceService.search({ + space_id: spaceId, + query, + page, + page_size: pageSize, + }); + // Transform backend results to Skill[] format + // Use folder_id if available (for file operations), otherwise skill_id + const skills: Skill[] = (data.skills || []).map((result: any) => { + // Prefer backend timestamp to avoid all cards showing "just now". + // Fallback to now only when backend doesn't provide time fields. + const timestamp = pickSkillTimestamp(result); + + // skill_id from backend is now the skill name (without version suffix) + const skillId = result.skill_id || result.name; + + return { + id: skillId, // Use skill name as ID (consistent with list view) + name: result.name, + description: result.description, + source_type: 'search', + created_at: timestamp, + updated_at: timestamp, + metadata: { + tags: result.tags || [], + score: result.score, + bm25_score: result.bm25_score, + vector_score: result.vector_score, + }, + files: [], + _folderId: result.folder_id, // Store folder_id for file operations if needed + }; + }); + return { + skills, + total: data.total || 0, + }; + } catch (error) { + console.error('Error searching skills:', error); + return { skills: [], total: 0 }; + } + }, + [spaceId], + ); + + // Get index status + const getIndexStatus = useCallback(async () => { + try { + const response = await fetch('/api/v1/skills/status', { + headers: { + Authorization: getAuthorization(), + }, + }); + const data = await response.json(); + if (data.code === 0) { + return data.data; + } + return null; + } catch (error) { + console.error('Error getting skill index status:', error); + return null; + } + }, []); + + return { + config, + configLoading, + saving, + fetchConfig, + saveConfig, + reindex, + initializeIndex, + searchSkills, + getIndexStatus, + }; +}; diff --git a/web/src/pages/skills/index.tsx b/web/src/pages/skills/index.tsx new file mode 100644 index 0000000000..cf30d663cc --- /dev/null +++ b/web/src/pages/skills/index.tsx @@ -0,0 +1,1199 @@ +import { BulkOperateBar } from '@/components/bulk-operate-bar'; +import { CardContainer } from '@/components/card-container'; +import { EmptyCardType } from '@/components/empty/constant'; +import { EmptyAppCard } from '@/components/empty/empty'; +import ListFilterBar from '@/components/list-filter-bar'; +import SvgIcon from '@/components/svg-icon'; +import { Button } from '@/components/ui/button'; +import { Card, CardContent, CardFooter } from '@/components/ui/card'; +import { Checkbox } from '@/components/ui/checkbox'; +import { SearchInput } from '@/components/ui/input'; +import { Segmented } from '@/components/ui/segmented'; +import { Spin } from '@/components/ui/spin'; +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from '@/components/ui/table'; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from '@/components/ui/tooltip'; +import { Routes } from '@/routes'; +import fileManagerService from '@/services/file-manager-service'; +import { formatFileSize } from '@/utils/common-util'; +import { formatDate } from '@/utils/date'; +import { + Eye, + FolderOpen, + LayoutGrid, + List, + Pencil, + Plus, + RefreshCw, + Settings, + Trash2, +} from 'lucide-react'; +import React, { useCallback, useEffect, useMemo, useState } from 'react'; +import { useTranslation } from 'react-i18next'; +import { useLocation, useNavigate } from 'react-router'; +import { CreateSpaceDialog } from './components/create-space-dialog'; +import DeleteSelectedSpacesDialog from './components/delete-selected-spaces-dialog'; +import DeleteSpaceDialog from './components/delete-space-dialog'; +import RenameSpaceDialog from './components/rename-space-dialog'; +import SearchConfigModal from './components/search-config-modal'; +import SkillCard from './components/skill-card'; +import SkillDetail from './components/skill-detail'; +import UploadModal from './components/upload-modal'; +import { useSkills, useSkillSearchConfig } from './hooks'; +import type { Skill } from './types'; + +// Format relative time +const formatRelative = (timestamp: number): string => { + let normalized = timestamp; + if (normalized > 1e17) normalized = normalized / 1e6; + else if (normalized > 1e14) normalized = normalized / 1e3; + else if (normalized > 0 && normalized < 1e11) normalized = normalized * 1e3; + + const diff = Date.now() - normalized; + if (diff < 0) return 'just now'; + + const minutes = Math.floor(diff / 60000); + if (minutes < 1) return 'just now'; + if (minutes < 60) return `${minutes}m ago`; + + const hours = Math.floor(minutes / 60); + if (hours < 24) return `${hours}h ago`; + + const days = Math.floor(hours / 24); + if (days < 30) return `${days}d ago`; + + const months = Math.floor(days / 30); + if (months < 12) return `${months}mo ago`; + + const years = Math.floor(months / 12); + return `${years}y ago`; +}; + +const SkillsPage: React.FC = () => { + const { t } = useTranslation(); + const { pathname } = useLocation(); + const navigate = useNavigate(); + const [spaces, setSpaces] = useState<Array<{ id: string; name: string }>>([]); + const [spaceInput, setSpaceInput] = useState(''); + const [selectedSpaceId, setSelectedSpaceId] = useState<string>(''); + const [selectedSpaceName, setSelectedSpaceName] = useState<string>(''); + const [spaceLoading, setSpaceLoading] = useState(false); + const [spaceSearchString, setSpaceSearchString] = useState(''); + + const { + skills, + filteredSkills, + loading, + searchQuery, + setSearchQuery, + fetchSpaces, + createSpace, + deleteSpace, + updateSpace, + fetchSkills, + uploadSkill, + deleteSkill, + getSkillFileContent, + getSkillVersionFiles, + getSkillDetails, + } = useSkills(); + + const { + config, + configLoading, + saveConfig, + fetchConfig, + reindex, + searchSkills, + } = useSkillSearchConfig(selectedSpaceId); + + const [viewMode, setViewMode] = useState<'grid' | 'list'>('grid'); + const [spaceViewMode, setSpaceViewMode] = useState<'grid' | 'list'>('grid'); + const [detailOpen, setDetailOpen] = useState(false); + const [selectedSkill, setSelectedSkill] = useState<Skill | null>(null); + const [uploadModalOpen, setUploadModalOpen] = useState(false); + const [configModalOpen, setConfigModalOpen] = useState(false); + const [createSpaceModalOpen, setCreateSpaceModalOpen] = useState(false); + const [deleteSpaceModalOpen, setDeleteSpaceModalOpen] = useState(false); + const [spaceToDelete, setSpaceToDelete] = useState<{ + id: string; + name: string; + } | null>(null); + const [renameSpaceModalOpen, setRenameSpaceModalOpen] = useState(false); + const [spaceToRename, setSpaceToRename] = useState<{ + id: string; + name: string; + } | null>(null); + const [renameSpaceInput, setRenameSpaceInput] = useState(''); + const [rowSelection, setRowSelection] = useState<Record<string, boolean>>({}); + const [spaceDetails, setSpaceDetails] = useState< + Record<string, { size: number; createTime: number }> + >({}); + const [deleteSpacesModalOpen, setDeleteSpacesModalOpen] = useState(false); + const [searchResults, setSearchResults] = useState<Skill[]>([]); + const [isSearching, setIsSearching] = useState(false); + const [hasSearched, setHasSearched] = useState(false); + const [skillDetailLoading, setSkillDetailLoading] = useState(false); + + // Pagination and sorting state + const [currentPage, setCurrentPage] = useState(1); + const [pageSize] = useState(20); + const [totalSkills, setTotalSkills] = useState(0); + const [sortBy] = useState<'name' | 'update_time' | 'create_time'>( + 'update_time', + ); + const [sortOrder, setSortOrder] = useState<'asc' | 'desc'>('desc'); + + // Selection state derived values (must be declared before any functions that use them) + const selectedSpaceCount = useMemo( + () => Object.keys(rowSelection).length, + [rowSelection], + ); + const selectedSpaceIds = useMemo( + () => Object.keys(rowSelection), + [rowSelection], + ); + const hasSelectedSpaces = selectedSpaceCount > 0; + + const clearModalLocks = useCallback(() => { + setDetailOpen(false); + setUploadModalOpen(false); + setConfigModalOpen(false); + setSelectedSkill(null); + document.body.style.removeProperty('pointer-events'); + document.body.style.removeProperty('overflow'); + }, []); + + useEffect(() => { + clearModalLocks(); + }, [pathname, clearModalLocks]); + + useEffect(() => { + return () => { + document.body.style.removeProperty('pointer-events'); + document.body.style.removeProperty('overflow'); + }; + }, []); + + const loadSpaces = useCallback(async () => { + setSpaceLoading(true); + setRowSelection({}); // Clear selection when loading new data + try { + const nextSpaces = await fetchSpaces(); + setSpaces(nextSpaces); + // Fetch folder details for each space + const details: Record<string, { size: number; createTime: number }> = {}; + for (const space of nextSpaces) { + if (space.folder_id) { + try { + const { data } = await fileManagerService.listFile({ + parent_id: space.folder_id, + }); + if (data.code === 0) { + const files = data.data?.files || []; + const totalSize = files.reduce( + (sum: number, f: any) => sum + (f.size || 0), + 0, + ); + details[space.id] = { + size: totalSize, + createTime: space.create_time || Date.now(), + }; + } + } catch (e) { + console.warn('Failed to fetch space folder details:', e); + } + } + } + setSpaceDetails(details); + } finally { + setSpaceLoading(false); + } + }, [fetchSpaces]); + + useEffect(() => { + loadSpaces(); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + // Function to load skills with pagination and sorting + const loadSkills = useCallback(async () => { + const result = await fetchSkills( + selectedSpaceName, + selectedSpaceId, + currentPage, + pageSize, + sortBy, + sortOrder, + ); + setTotalSkills(result.total); + }, [ + fetchSkills, + selectedSpaceName, + selectedSpaceId, + currentPage, + pageSize, + sortBy, + sortOrder, + ]); + + // Load skills when space changes or pagination/sorting changes + useEffect(() => { + if (!selectedSpaceId || !selectedSpaceName) return; + // Clear search results when switching spaces + setSearchResults([]); + setHasSearched(false); + setSearchQuery(''); + setCurrentPage(1); + fetchConfig(undefined, selectedSpaceId); + // Use search API with pagination and sorting + loadSkills(); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [selectedSpaceId, selectedSpaceName]); + + // Load skills when pagination or sorting changes + useEffect(() => { + if (!selectedSpaceId || !selectedSpaceName || hasSearched) return; + loadSkills(); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [currentPage, sortBy, sortOrder]); + + const handleViewSkill = useCallback( + async (skill: Skill) => { + // If skill already has versions, use it directly + if (skill.versions && skill.versions.length > 0) { + setSelectedSkill(skill); + setDetailOpen(true); + return; + } + + // Try to enrich skill data with versions from existing skills list + if (!(skill as any)._folderId || !skill.versions) { + const existingSkill = filteredSkills.find((s) => s.id === skill.id); + if (existingSkill) { + if ((existingSkill as any)._folderId) { + skill = { + ...skill, + _folderId: (existingSkill as any)._folderId, + }; + } + if (existingSkill.versions && existingSkill.versions.length > 0) { + skill = { + ...skill, + versions: existingSkill.versions, + files: existingSkill.files, + }; + } + } + } + + // If still no versions but has folderId, fetch from file system + if ( + (!skill.versions || skill.versions.length === 0) && + (skill as any)._folderId + ) { + setSkillDetailLoading(true); + try { + const detailedSkill = await getSkillDetails( + (skill as any)._folderId, + skill.name, + ); + if (detailedSkill) { + skill = { + ...skill, + versions: detailedSkill.versions, + files: detailedSkill.files, + metadata: { + ...skill.metadata, + ...detailedSkill.metadata, + }, + }; + } + } catch (error) { + console.warn('Failed to fetch skill details:', error); + } finally { + setSkillDetailLoading(false); + } + } + + if (!(skill as any)._folderId) { + console.warn( + `[Skill Search] Skill "${skill.name}" has no folder_id. ` + + 'Please reindex skills to fix this issue.', + ); + } + + setSelectedSkill(skill); + setDetailOpen(true); + }, + [filteredSkills, getSkillDetails], + ); + + const handleCloseDetail = useCallback(() => { + setDetailOpen(false); + setSelectedSkill(null); + }, []); + + const handleUpload = useCallback( + async (name: string, version: string, files: File[]) => { + // Pass space name (for file system), space ID (for indexing), and embd_id (for indexing) + return await uploadSkill( + name, + version, + files, + selectedSpaceName, + selectedSpaceId, + config?.embd_id, + ); + }, + [uploadSkill, selectedSpaceName, selectedSpaceId, config?.embd_id], + ); + + const handleDelete = useCallback( + async (skillId: string, skillName: string, folderId?: string) => { + // Pass both space ID (for index), space name (for file system), and folderId (for search results) + const success = await deleteSkill( + skillId, + skillName, + selectedSpaceId, + selectedSpaceName, + folderId, + ); + // If delete succeeded and we have search results, remove the skill from searchResults + if (success) { + setSearchResults((prev) => prev.filter((s) => s.id !== skillId)); + } + }, + [deleteSkill, selectedSpaceId, selectedSpaceName], + ); + + const handleCreateHub = useCallback(async () => { + const nextHubName = spaceInput.trim(); + if (!nextHubName) return; + const newHub = await createSpace(nextHubName); + if (!newHub) return; + setSpaceInput(''); + setCreateSpaceModalOpen(false); + await loadSpaces(); + // Select the newly created space + setSelectedSpaceId(newHub.id); + setSelectedSpaceName(newHub.name); + }, [spaceInput, createSpace, loadSpaces]); + + const handleDeleteHub = useCallback(async () => { + if (!spaceToDelete) return; + const success = await deleteSpace(spaceToDelete.id); + if (success) { + setDeleteSpaceModalOpen(false); + setSpaceToDelete(null); + await loadSpaces(); + } + }, [spaceToDelete, deleteSpace, loadSpaces]); + + const openDeleteSpaceModal = useCallback( + (space: { id: string; name: string }, e: React.MouseEvent) => { + e.stopPropagation(); + setSpaceToDelete(space); + setDeleteSpaceModalOpen(true); + }, + [], + ); + + const openRenameSpaceModal = useCallback( + (space: { id: string; name: string }, e: React.MouseEvent) => { + e.stopPropagation(); + setSpaceToRename(space); + setRenameSpaceInput(space.name); + setRenameSpaceModalOpen(true); + }, + [], + ); + + const handleRenameHub = useCallback(async () => { + if (!spaceToRename || !renameSpaceInput.trim()) return; + const success = await updateSpace( + spaceToRename.id, + renameSpaceInput.trim(), + ); + if (success) { + setRenameSpaceModalOpen(false); + setSpaceToRename(null); + setRenameSpaceInput(''); + await loadSpaces(); + // Update selected space name if it's the current space + if (selectedSpaceId === spaceToRename.id) { + setSelectedSpaceName(renameSpaceInput.trim()); + } + } + }, [ + spaceToRename, + renameSpaceInput, + updateSpace, + loadSpaces, + selectedSpaceId, + ]); + + const handleDeleteSelectedHubs = useCallback(async () => { + for (const hubId of selectedSpaceIds) { + await deleteSpace(hubId); + } + setDeleteSpacesModalOpen(false); + setRowSelection({}); + await loadSpaces(); + }, [selectedSpaceIds, deleteSpace, loadSpaces]); + + const handleOpenDeleteSelectedModal = useCallback(() => { + setDeleteSpacesModalOpen(true); + }, []); + + const handleSearch = useCallback( + async (query: string) => { + setSearchQuery(query); + if (!query.trim()) { + setSearchResults([]); + setHasSearched(false); + return; + } + setIsSearching(true); + setHasSearched(true); + try { + const results = await searchSkills(query, 1, 20); + if (results?.skills) { + const localSkillMap = new Map(skills.map((s) => [s.id, s])); + const localSkillNameMap = new Map( + skills.map((s) => [s.name.toLowerCase(), s]), + ); + const mergedResults = results.skills.map((skill) => { + const localSkill = + localSkillMap.get(skill.id) || + localSkillNameMap.get(skill.name.toLowerCase()); + if (!localSkill) return skill; + return { + ...skill, + created_at: localSkill.created_at, + updated_at: localSkill.updated_at, + _folderId: + (skill as any)._folderId || (localSkill as any)._folderId, + versions: localSkill.versions, + files: localSkill.files, + }; + }); + setSearchResults(mergedResults); + } else { + setSearchResults([]); + } + } catch (error) { + console.error('Search error:', error); + setSearchResults([]); + } finally { + setIsSearching(false); + } + }, + [searchSkills, setSearchQuery, skills], + ); + + const handleSearchInputChange = useCallback( + (e: React.ChangeEvent<HTMLInputElement>) => { + const value = e.target.value; + setSearchQuery(value); + if (!value.trim()) { + setSearchResults([]); + setHasSearched(false); + } + }, + [setSearchQuery], + ); + + const handleSearchKeyDown = useCallback( + (e: React.KeyboardEvent<HTMLInputElement>) => { + if (e.key === 'Enter') { + handleSearch(searchQuery); + } + }, + [handleSearch, searchQuery], + ); + + const handleHubSearchChange = useCallback( + (e: React.ChangeEvent<HTMLInputElement>) => { + setSpaceSearchString(e.target.value); + }, + [], + ); + + const filteredSpaces = useMemo(() => { + if (!spaceSearchString.trim()) return spaces; + return spaces.filter((space) => + space.name.toLowerCase().includes(spaceSearchString.toLowerCase()), + ); + }, [spaces, spaceSearchString]); + + const displayedSkills = useMemo(() => { + // Server-side sorting is already applied via API, no need to sort here + return hasSearched ? searchResults : filteredSkills; + }, [hasSearched, searchResults, filteredSkills]); + + const isLoading = loading || isSearching || configLoading; + + // Space list breadcrumb: root / skills + const hubListBreadcrumb = ( + <div className="flex items-center gap-2"> + <span + className="text-text-secondary cursor-pointer hover:text-text-primary" + onClick={() => navigate(Routes.Files)} + > + root + </span> + <span className="text-text-secondary">/</span> + <span>{t('skills.title')}</span> + </div> + ); + + // Skills list breadcrumb: root / skills / {hubName} + const skillsListBreadcrumb = ( + <div className="flex items-center gap-2"> + <span + className="text-text-secondary cursor-pointer hover:text-text-primary" + onClick={() => navigate(Routes.Files)} + > + root + </span> + <span className="text-text-secondary">/</span> + <span + className="text-text-secondary cursor-pointer hover:text-text-primary" + onClick={() => { + setSelectedSpaceId(''); + setSelectedSpaceName(''); + setSearchResults([]); + setHasSearched(false); + setSearchQuery(''); + fetchSkills(''); // Clear skills data + }} + > + {t('skills.title')} + </span> + <span className="text-text-secondary">/</span> + <span>{selectedSpaceName}</span> + </div> + ); + + // Space list page (no space selected) + if (!selectedSpaceId) { + return ( + <> + <article + className="size-full flex flex-col" + data-testid="skill-space-list" + > + <header className="px-5 pt-8 mb-4"> + <ListFilterBar + leftPanel={hubListBreadcrumb} + searchString={spaceSearchString} + onSearchChange={handleHubSearchChange} + showFilter={false} + icon="file" + > + <div className="flex items-center gap-2"> + <Segmented + value={spaceViewMode} + onChange={(v) => setSpaceViewMode(v as 'grid' | 'list')} + options={[ + { value: 'grid', label: <LayoutGrid className="size-4" /> }, + { value: 'list', label: <List className="size-4" /> }, + ]} + /> + <Button onClick={() => setCreateSpaceModalOpen(true)}> + <Plus className="size-[1em]" /> + {t('skills.createSpace') || 'Create Skill Space'} + </Button> + </div> + </ListFilterBar> + + {hasSelectedSpaces && spaceViewMode === 'list' && ( + <BulkOperateBar + className="mt-4" + count={selectedSpaceCount} + unit={t('skills.space') || 'spaces'} + list={[ + { + id: 'delete', + label: t('common.delete'), + icon: <Trash2 className="size-4" />, + onClick: handleOpenDeleteSelectedModal, + }, + ]} + /> + )} + </header> + + <div className="flex-1 px-5 flex flex-col overflow-hidden"> + {spaceLoading ? ( + <div className="flex-1 flex items-center justify-center"> + <Spin size="large" /> + </div> + ) : filteredSpaces.length ? ( + spaceViewMode === 'grid' ? ( + <CardContainer className="flex-1 overflow-auto"> + {filteredSpaces.map((space) => ( + <Card + key={space.id} + className="group flex flex-col cursor-pointer bg-bg-card hover:border-accent-primary hover:shadow-md transition-all" + onClick={() => { + setSelectedSpaceId(space.id); + setSelectedSpaceName(space.name); + }} + > + <CardContent className="flex-1 flex flex-col p-4"> + <div className="flex items-start justify-between mb-2"> + <div className="flex-1 min-w-0 flex items-center gap-2"> + <SvgIcon + name="home-icon/skill-space" + width={20} + height={20} + /> + <h3 className="font-semibold text-lg truncate"> + {space.name} + </h3> + </div> + <div className="flex opacity-0 group-hover:opacity-100 transition-opacity"> + <Button + variant="ghost" + size="icon" + className="h-8 w-8 text-text-secondary hover:text-accent-primary" + onClick={(e: React.MouseEvent) => + openRenameSpaceModal(space, e) + } + > + <Pencil className="size-4" /> + </Button> + <Button + variant="ghost" + size="icon" + className="h-8 w-8 text-text-secondary hover:text-red-500" + onClick={(e: React.MouseEvent) => + openDeleteSpaceModal(space, e) + } + > + <Trash2 className="size-4" /> + </Button> + </div> + </div> + </CardContent> + <CardFooter className="pt-0 pb-4 px-4"> + <span className="text-accent-primary text-sm"> + {t('skills.enterSpace') || 'Enter'} → + </span> + </CardFooter> + </Card> + ))} + </CardContainer> + ) : ( + <div className="flex-1 overflow-auto border border-border rounded-lg"> + <Table> + <TableHeader className="bg-bg-title sticky top-0"> + <TableRow> + <TableHead className="w-[50px] text-center"> + <Checkbox + checked={ + filteredSpaces.length > 0 && + filteredSpaces.every( + (space) => rowSelection[space.id], + ) + } + onCheckedChange={(checked) => { + const newSelection = { ...rowSelection }; + filteredSpaces.forEach((space) => { + if (checked) { + newSelection[space.id] = true; + } else { + delete newSelection[space.id]; + } + }); + setRowSelection(newSelection); + }} + /> + </TableHead> + <TableHead className="w-[20vw]"> + {t('skills.spaceName') || 'Name'} + </TableHead> + <TableHead className="w-[160px]"> + {t('fileManager.uploadDate') || 'Upload Date'} + </TableHead> + <TableHead className="w-[96px]"> + {t('fileManager.size') || 'Size'} + </TableHead> + <TableHead className="w-[96px] text-right"> + {t('common.action') || 'Action'} + </TableHead> + </TableRow> + </TableHeader> + <TableBody> + {filteredSpaces.map((space) => ( + <TableRow + key={space.id} + className="hover:bg-bg-secondary/50 cursor-pointer transition-colors" + onClick={() => { + setSelectedSpaceId(space.id); + setSelectedSpaceName(space.name); + }} + > + <TableCell + className="text-center" + onClick={(e) => e.stopPropagation()} + > + <Checkbox + checked={!!rowSelection[space.id]} + onCheckedChange={(checked) => { + setRowSelection((prev) => { + const newSelection = { ...prev }; + if (checked) { + newSelection[space.id] = true; + } else { + delete newSelection[space.id]; + } + return newSelection; + }); + }} + /> + </TableCell> + <TableCell> + <div className="flex items-center gap-2 overflow-hidden"> + <SvgIcon + name="home-icon/skill-space" + width={16} + height={16} + /> + <span className="font-medium truncate"> + {space.name} + </span> + </div> + </TableCell> + <TableCell className="text-sm text-text-secondary"> + {spaceDetails[space.id]?.createTime + ? formatDate(spaceDetails[space.id].createTime) + : '-'} + </TableCell> + <TableCell className="text-sm text-text-secondary"> + {spaceDetails[space.id]?.size !== undefined + ? formatFileSize(spaceDetails[space.id].size) + : '-'} + </TableCell> + <TableCell + className="text-right" + onClick={(e) => e.stopPropagation()} + > + <Button + variant="ghost" + size="icon" + className="h-8 w-8 text-text-secondary hover:text-accent-primary" + onClick={(e: React.MouseEvent) => + openRenameSpaceModal(space, e) + } + > + <Pencil className="size-4" /> + </Button> + <Button + variant="ghost" + size="icon" + className="h-8 w-8 text-text-secondary hover:text-red-500" + onClick={(e: React.MouseEvent) => + openDeleteSpaceModal(space, e) + } + > + <Trash2 className="size-4" /> + </Button> + </TableCell> + </TableRow> + ))} + </TableBody> + </Table> + </div> + ) + ) : ( + <div className="flex-1 flex items-center justify-center"> + {spaceSearchString ? ( + <EmptyAppCard + showIcon + size="large" + className="w-[480px] p-14" + isSearch + type={EmptyCardType.Skills} + /> + ) : ( + <EmptyAppCard + showIcon + size="large" + className="w-[480px] p-14" + type={EmptyCardType.Skills} + onClick={() => setCreateSpaceModalOpen(true)} + /> + )} + </div> + )} + </div> + </article> + + {/* Create Space Modal */} + <CreateSpaceDialog + open={createSpaceModalOpen} + onOpenChange={setCreateSpaceModalOpen} + spaceInput={spaceInput} + onSpaceInputChange={setSpaceInput} + onCreate={handleCreateHub} + /> + + {/* Delete Space Modal */} + <DeleteSpaceDialog + open={deleteSpaceModalOpen} + onOpenChange={(open) => { + setDeleteSpaceModalOpen(open); + if (!open) setSpaceToDelete(null); + }} + spaceToDelete={spaceToDelete} + onDelete={handleDeleteHub} + /> + + {/* Rename Space Modal */} + <RenameSpaceDialog + open={renameSpaceModalOpen} + onOpenChange={(open) => { + setRenameSpaceModalOpen(open); + if (!open) { + setSpaceToRename(null); + setRenameSpaceInput(''); + } + }} + spaceToRename={spaceToRename} + renameSpaceInput={renameSpaceInput} + onRenameInputChange={setRenameSpaceInput} + onRename={handleRenameHub} + /> + + {/* Delete Selected Spaces Modal */} + <DeleteSelectedSpacesDialog + open={deleteSpacesModalOpen} + onOpenChange={setDeleteSpacesModalOpen} + selectedCount={selectedSpaceCount} + onDelete={handleDeleteSelectedHubs} + /> + </> + ); + } + + // Inside a space (skills list page) + return ( + <article className="size-full flex flex-col" data-testid="skills-list"> + <header className="px-5 pt-8 mb-4"> + <ListFilterBar + leftPanel={skillsListBreadcrumb} + showFilter={false} + showSearch={false} + icon="file" + > + <div className="flex items-center gap-2"> + {/* Search skills */} + <SearchInput + placeholder={t('skills.searchPlaceholder') || 'Search skills...'} + value={searchQuery} + onChange={handleSearchInputChange} + onKeyDown={handleSearchKeyDown} + className="w-[200px]" + rootClassName="relative" + /> + {/* Sort order toggle */} + <Button + variant="outline" + size="icon" + onClick={() => setSortOrder(sortOrder === 'asc' ? 'desc' : 'asc')} + title={ + sortOrder === 'asc' + ? t('skills.sortDesc') || 'Sort Descending' + : t('skills.sortAsc') || 'Sort Ascending' + } + > + {sortOrder === 'asc' ? ( + <svg + className="size-4" + viewBox="0 0 24 24" + fill="none" + stroke="currentColor" + strokeWidth="2" + > + <path d="M12 5v14M5 12l7-7 7 7" /> + </svg> + ) : ( + <svg + className="size-4" + viewBox="0 0 24 24" + fill="none" + stroke="currentColor" + strokeWidth="2" + > + <path d="M12 19V5M5 12l7 7 7-7" /> + </svg> + )} + </Button> + + {/* Grid/List toggle */} + <Segmented + value={viewMode} + onChange={(v) => setViewMode(v as 'grid' | 'list')} + options={[ + { value: 'grid', label: <LayoutGrid className="size-4" /> }, + { value: 'list', label: <List className="size-4" /> }, + ]} + /> + <TooltipProvider> + <Tooltip> + <TooltipTrigger asChild> + <Button + variant="outline" + size="icon" + onClick={() => setConfigModalOpen(true)} + disabled={loading} + > + <Settings className="size-4" /> + </Button> + </TooltipTrigger> + <TooltipContent>{t('skills.configureSearch')}</TooltipContent> + </Tooltip> + <Tooltip> + <TooltipTrigger asChild> + <Button + variant="outline" + size="icon" + onClick={() => loadSkills()} + disabled={loading} + > + <RefreshCw className={loading ? 'animate-spin' : ''} /> + </Button> + </TooltipTrigger> + <TooltipContent>{t('common.refresh')}</TooltipContent> + </Tooltip> + </TooltipProvider> + <Button onClick={() => setUploadModalOpen(true)}> + <Plus className="mr-2" /> + {t('skills.addSkill') || 'Add Skill'} + </Button> + </div> + </ListFilterBar> + </header> + + <div className="flex-1 px-5 flex flex-col overflow-hidden"> + {/* Skills List */} + {isLoading ? ( + <div className="flex-1 flex items-center justify-center"> + <Spin size="large" /> + </div> + ) : displayedSkills.length === 0 ? ( + <div className="flex-1 flex flex-col items-center justify-center text-text-secondary"> + <FolderOpen className="size-16 mb-4 opacity-50" /> + {hasSearched ? ( + <p> + {t('skills.noSearchResults') || 'No search results'} + : "{searchQuery}" + </p> + ) : searchQuery ? ( + <p> + {t('skills.noSearchResults') || 'No search results'} + : "{searchQuery}" + </p> + ) : ( + <div className="text-center"> + <p className="mb-2">{t('skills.noSkills')}</p> + <button + className="text-accent-primary hover:underline" + onClick={() => setUploadModalOpen(true)} + > + {t('skills.addSkill') || 'Add Skill'} + </button> + </div> + )} + </div> + ) : viewMode === 'grid' ? ( + <CardContainer className="flex-1 overflow-auto"> + {displayedSkills.map((skill) => ( + <SkillCard + key={skill.id} + skill={skill} + onView={handleViewSkill} + onDelete={handleDelete} + formatRelative={formatRelative} + /> + ))} + </CardContainer> + ) : ( + <div className="flex-1 overflow-auto border border-border rounded-lg"> + <table className="w-full" style={{ tableLayout: 'fixed' }}> + <colgroup> + <col style={{ width: 'auto' }} /> + <col style={{ width: '120px' }} /> + <col style={{ width: '96px' }} /> + </colgroup> + <thead className="bg-bg-title sticky top-0"> + <tr> + <th className="px-4 py-3 text-left text-sm font-medium text-text-title"> + {t('skills.skillName') || 'Name'} + </th> + <th className="px-4 py-3 text-left text-sm font-medium text-text-title"> + {t('skills.version') || 'Version'} + </th> + <th className="px-4 py-3 text-right text-sm font-medium text-text-title"> + {t('common.action') || 'Action'} + </th> + </tr> + </thead> + <tbody className="divide-y divide-border"> + {displayedSkills.map((skill) => ( + <tr + key={skill.id} + className="hover:bg-bg-secondary/50 cursor-pointer transition-colors" + onClick={() => handleViewSkill(skill)} + > + <td className="px-4 py-3"> + <div className="flex items-center gap-2 overflow-hidden"> + <SvgIcon + name="home-icon/skill-folder" + width={16} + height={16} + /> + <span className="font-medium truncate"> + {skill.name} + </span> + </div> + {skill.description && ( + <p className="text-text-secondary text-xs mt-1 truncate"> + {skill.description} + </p> + )} + </td> + <td className="px-4 py-3 text-sm text-text-secondary"> + {skill.metadata?.version || '-'} + </td> + <td + className="px-4 py-3 text-right" + onClick={(e) => e.stopPropagation()} + > + <Button + variant="ghost" + size="icon" + className="h-8 w-8 text-text-secondary hover:text-accent-primary" + onClick={(e: React.MouseEvent) => { + e.stopPropagation(); + handleViewSkill(skill); + }} + > + <Eye className="size-4" /> + </Button> + <Button + variant="ghost" + size="icon" + className="h-8 w-8 text-text-secondary hover:text-red-500" + onClick={(e: React.MouseEvent) => { + e.stopPropagation(); + handleDelete( + skill.id, + skill.name, + (skill as any)._folderId, + ); + }} + > + <Trash2 className="size-4" /> + </Button> + </td> + </tr> + ))} + </tbody> + </table> + </div> + )} + + {/* Pagination */} + {!hasSearched && totalSkills > 0 && ( + <div className="flex items-center justify-between py-4 border-t border-border mt-4"> + <div className="text-sm text-text-secondary"> + {t('skills.totalSkills', { total: totalSkills })} + </div> + <div className="flex items-center gap-2"> + <Button + variant="outline" + size="sm" + disabled={currentPage <= 1 || loading} + onClick={() => setCurrentPage((p) => Math.max(1, p - 1))} + > + {t('common.previous')} + </Button> + <span className="text-sm text-text-secondary px-2"> + {t('skills.pageInfo', { + current: currentPage, + total: Math.ceil(totalSkills / pageSize), + })} + </span> + <Button + variant="outline" + size="sm" + disabled={ + currentPage >= Math.ceil(totalSkills / pageSize) || loading + } + onClick={() => setCurrentPage((p) => p + 1)} + > + {t('common.next')} + </Button> + </div> + </div> + )} + </div> + + {/* Skill Detail Drawer */} + {detailOpen && selectedSkill && ( + <SkillDetail + skill={selectedSkill} + open={detailOpen} + onClose={handleCloseDetail} + getFileContent={getSkillFileContent} + getVersionFiles={getSkillVersionFiles} + /> + )} + + {/* Skill Detail Loading Overlay */} + {skillDetailLoading && ( + <div className="fixed inset-0 z-[60] flex items-center justify-center bg-black/20"> + <Spin size="large" /> + </div> + )} + + {/* Upload Modal */} + {uploadModalOpen && ( + <UploadModal + open={uploadModalOpen} + onCancel={() => setUploadModalOpen(false)} + onUpload={handleUpload} + /> + )} + + {/* Search Config Modal */} + {configModalOpen && ( + <SearchConfigModal + open={configModalOpen} + onOpenChange={setConfigModalOpen} + config={config || undefined} + onSave={saveConfig} + onReindex={reindex} + loading={configLoading} + /> + )} + </article> + ); +}; + +export default SkillsPage; diff --git a/web/src/pages/skills/types.ts b/web/src/pages/skills/types.ts new file mode 100644 index 0000000000..6bd278c2a6 --- /dev/null +++ b/web/src/pages/skills/types.ts @@ -0,0 +1,166 @@ +// Skill types for Skill Space + +// ============================================================================ +// Core Skill Types +// ============================================================================ + +export interface Skill { + id: string; // Skill name (used as identifier, consistent with search results) + name: string; + description: string; + source_type: 'local' | 'git' | 'central' | 'search'; + source_ref?: string; + central_path?: string; + created_at: number; + updated_at: number; + files: SkillFileEntry[]; + metadata?: SkillMetadata; + versions?: string[]; // Available versions (for versioned skills) + _folderId?: string; // Internal: file system folder ID for file operations +} + +export interface SkillSpace { + id: string; + name: string; + folder_id?: string; + create_time?: number; +} + +export interface SkillFileEntry { + name: string; + path: string; + is_dir: boolean; + size: number; + content?: string; + contentType?: string; +} + +// ============================================================================ +// Skill Metadata Types +// ============================================================================ + +export interface SkillMetadata { + // Basic fields + name?: string; + description?: string; + version?: string; + author?: string; + tags?: string[]; + tools?: string[]; + + // Legacy fields for backward compatibility + [key: string]: any; +} + +// ============================================================================ +// API Payload Types +// ============================================================================ + +export interface SkillUploadPayload { + name: string; + description?: string; + files: { path: string; content: string }[]; +} + +export interface SkillUpdatePayload { + id: string; + description?: string; + metadata?: SkillMetadata; +} + +// ============================================================================ +// Validation Types +// ============================================================================ + +export interface SkillValidationResult { + valid: boolean; + error?: string; + details?: string; + name?: string; + description?: string; +} + +export interface ValidationError { + field: string; + message: string; +} + +// ============================================================================ +// UI Types +// ============================================================================ + +export type ViewMode = 'grid' | 'list'; + +export interface SkillCardProps { + skill: Skill; + onView: (skill: Skill) => void; + onDelete: (skillId: string, skillName: string, folderId?: string) => void; + formatRelative: (timestamp: number) => string; +} + +export interface SkillDetailProps { + skill: Skill | null; + open: boolean; + onClose: () => void; + getFileContent: ( + skillId: string, + filePath: string, + version?: string, + ) => Promise<string | null>; + getVersionFiles?: ( + skillId: string, + version: string, + ) => Promise<SkillFileEntry[]>; +} + +export interface UploadModalProps { + open: boolean; + onCancel: () => void; + onUpload: (name: string, version: string, files: File[]) => Promise<boolean>; + loading?: boolean; +} + +// ============================================================================ +// Skill Search Types +// ============================================================================ + +export interface FieldWeight { + enabled: boolean; + weight: number; +} + +export interface FieldConfig { + name: FieldWeight; + tags: FieldWeight; + description: FieldWeight; + content: FieldWeight; +} + +// Re-export SkillSearchConfig from service to ensure consistency +export { SkillSearchConfig } from '@/services/skill-space-service'; + +export interface SkillSearchResult { + skill_id: string; + name: string; + description: string; + tags: string[]; + score: number; + bm25_score?: number; + vector_score?: number; +} + +export interface SkillSearchResponse { + results: SkillSearchResult[]; + total: number; + query: string; + search_type: string; +} + +export interface SearchConfigModalProps { + open: boolean; + onOpenChange: (open: boolean) => void; + config?: SkillSearchConfig; + onSave: (config: SkillSearchConfig) => Promise<boolean>; + onReindex?: (embdId: string) => Promise<boolean>; + loading?: boolean; +} diff --git a/web/src/pages/skills/utils.ts b/web/src/pages/skills/utils.ts new file mode 100644 index 0000000000..3b9cd5f0dd --- /dev/null +++ b/web/src/pages/skills/utils.ts @@ -0,0 +1,14 @@ +// Skill Space - Utility exports +// Re-export validation utilities for external use + +export { + DEFAULT_IGNORE_PATTERNS, + filterIgnoredFiles, + isMacJunkPath, + isTextFile, + parseFrontmatter, + sanitizeRelPath, + shouldIgnore, + validateSkillFormat, + validateSkillStructure, +} from './validation'; diff --git a/web/src/pages/skills/validation.ts b/web/src/pages/skills/validation.ts new file mode 100644 index 0000000000..0f45626d7e --- /dev/null +++ b/web/src/pages/skills/validation.ts @@ -0,0 +1,563 @@ +// Skill validation utilities + +import type { + SkillFileEntry, + SkillMetadata, + SkillValidationResult, +} from './types'; + +// ============================================================================ +// Text File Validation +// ============================================================================ + +const TEXT_FILE_EXTENSIONS = [ + 'md', + 'mdx', + 'txt', + 'json', + 'json5', + 'yaml', + 'yml', + 'toml', + 'js', + 'cjs', + 'mjs', + 'ts', + 'tsx', + 'jsx', + 'py', + 'sh', + 'rb', + 'go', + 'rs', + 'swift', + 'kt', + 'java', + 'cs', + 'cpp', + 'c', + 'h', + 'hpp', + 'sql', + 'csv', + 'ini', + 'cfg', + 'env', + 'xml', + 'html', + 'css', + 'scss', + 'sass', + 'svg', +] as const; + +const TEXT_FILE_EXTENSION_SET = new Set<string>(TEXT_FILE_EXTENSIONS); + +const TEXT_CONTENT_TYPES = [ + 'application/json', + 'application/xml', + 'application/yaml', + 'application/x-yaml', + 'application/toml', + 'application/javascript', + 'application/typescript', + 'application/markdown', + 'image/svg+xml', +] as const; + +const TEXT_CONTENT_TYPE_SET = new Set<string>(TEXT_CONTENT_TYPES); + +/** + * Check if a content type is text-based + */ +export function isTextContentType(contentType: string): boolean { + if (!contentType) return false; + const normalized = contentType.split(';', 1)[0]?.trim().toLowerCase() ?? ''; + if (!normalized) return false; + if (normalized.startsWith('text/')) return true; + return TEXT_CONTENT_TYPE_SET.has(normalized); +} + +/** + * Check if a file is a text file based on its extension + */ +export function isTextFile(filePath: string, contentType?: string): boolean { + // Check content type first + if (contentType && isTextContentType(contentType)) { + return true; + } + + // Check extension + const ext = filePath.split('.').pop()?.toLowerCase() ?? ''; + if (!ext) return false; + return TEXT_FILE_EXTENSION_SET.has(ext); +} + +// ============================================================================ +// Path Sanitization +// ============================================================================ + +/** + * Sanitize relative path to prevent directory traversal attacks + */ +export function sanitizeRelPath(path: string): string | null { + const normalized = path.replace(/^\.\/+/, '').replace(/^\/+/, ''); + if (!normalized || normalized.endsWith('/')) return null; + if (normalized.includes('..') || normalized.includes('\\')) return null; + return normalized; +} + +/** + * Check if a path is Mac junk file (should be ignored) + */ +export function isMacJunkPath(path: string): boolean { + const normalized = path.toLowerCase(); + // Check for .DS_Store files (any location, any case) + if (normalized === '.ds_store' || normalized.endsWith('/.ds_store')) { + return true; + } + // Check for __MACOSX directories + if (normalized.startsWith('__macosx/') || normalized === '__macosx') { + return true; + } + // Check for resource fork files (._*) + if (normalized.startsWith('._') || normalized.includes('/._')) { + return true; + } + return false; +} + +/** + * Check if files contain any junk/temporary files + * Returns an array of junk file paths found + */ +export function findJunkFiles(files: File[]): string[] { + const junkFiles: string[] = []; + for (const file of files) { + const path = file.webkitRelativePath || file.name; + const sanitized = sanitizeRelPath(path); + if (sanitized && isMacJunkPath(sanitized)) { + junkFiles.push(path); + } + } + return junkFiles; +} + +/** + * Check if files contain any junk/temporary files + * Returns true if any junk files are found + */ +export function hasJunkFiles(files: File[]): boolean { + return findJunkFiles(files).length > 0; +} + +// ============================================================================ +// SKILL.md Validation +// ============================================================================ + +/** + * Parse YAML frontmatter from markdown content + * Returns metadata and body content + */ +export function parseFrontmatter(content: string): { + metadata: SkillMetadata; + body: string; + valid: boolean; + error?: string; +} { + const lines = content.split('\n'); + const metadata: SkillMetadata = {}; + + // Check frontmatter start + if (lines[0]?.trim() !== '---') { + return { + metadata, + body: content, + valid: false, + error: 'invalid_frontmatter', + }; + } + + // Find end of frontmatter + const endIndex = lines.slice(1).findIndex((line) => line.trim() === '---'); + if (endIndex === -1) { + return { + metadata, + body: content, + valid: false, + error: 'invalid_frontmatter', + }; + } + + const metaLines = lines.slice(1, endIndex + 1); + const body = lines.slice(endIndex + 2).join('\n'); + + // Parse YAML-like format + let currentKey = ''; + let currentIndent = 0; + + for (const line of metaLines) { + if (!line.trim() || line.trim().startsWith('#')) continue; + + const indent = line.search(/\S/); + const trimmedLine = line.trim(); + + // Handle nested objects (simple implementation) + const colonMatch = trimmedLine.match(/^(\w+):\s*(.*)$/); + if (colonMatch) { + const [, key, value] = colonMatch; + currentKey = key; + currentIndent = indent; + + if (value) { + // Parse value + metadata[key] = parseYamlValue(value); + } else { + // Could be an object or array start + metadata[key] = {}; + } + } else if (currentKey && indent > currentIndent) { + // Nested property + const nestedMatch = trimmedLine.match(/^(\w+):\s*(.*)$/); + if (nestedMatch) { + const [, nestedKey, nestedValue] = nestedMatch; + if ( + typeof metadata[currentKey] === 'object' && + metadata[currentKey] !== null + ) { + (metadata[currentKey] as Record<string, unknown>)[nestedKey] = + parseYamlValue(nestedValue); + } + } + } + } + + return { metadata, body, valid: true }; +} + +/** + * Parse a YAML value string + */ +function parseYamlValue(value: string): unknown { + const trimmed = value.trim(); + + // Boolean + if (trimmed === 'true') return true; + if (trimmed === 'false') return false; + + // Null + if (trimmed === 'null' || trimmed === '~') return null; + + // Number + if (/^-?\d+$/.test(trimmed)) return parseInt(trimmed, 10); + if (/^-?\d+\.\d+$/.test(trimmed)) return parseFloat(trimmed); + + // Array + if (trimmed.startsWith('[') && trimmed.endsWith(']')) { + return trimmed + .slice(1, -1) + .split(',') + .map((s) => s.trim()) + .filter((s) => s) + .map(parseYamlValue); + } + + // Quoted string + if ( + (trimmed.startsWith('"') && trimmed.endsWith('"')) || + (trimmed.startsWith("'") && trimmed.endsWith("'")) + ) { + return trimmed.slice(1, -1); + } + + // Unquoted string + return trimmed; +} + +// ============================================================================ +// Main Validation Function +// ============================================================================ + +const MAX_TOTAL_SIZE = 50 * 1024 * 1024; // 50MB +const MAX_FILE_SIZE = 5 * 1024 * 1024; // 5MB per file + +/** + * Validate skill format + * This is the main validation function used before upload + */ +export async function validateSkillFormat( + files: File[], +): Promise<SkillValidationResult> { + // Check if there are any files + if (files.length === 0) { + return { valid: false, error: 'no_files' }; + } + + // Check total size + const totalSize = files.reduce((sum, f) => sum + f.size, 0); + if (totalSize > MAX_TOTAL_SIZE) { + return { valid: false, error: 'total_size_exceeded' }; + } + + // Check individual file sizes + for (const file of files) { + if (file.size > MAX_FILE_SIZE) { + return { valid: false, error: 'file_too_large' }; + } + } + + // Sanitize and filter paths + const validFiles: File[] = []; + for (const file of files) { + const path = file.webkitRelativePath || file.name; + const sanitized = sanitizeRelPath(path); + + if (!sanitized) { + return { valid: false, error: 'invalid_path' }; + } + + if ( + isMacJunkPath(sanitized) || + shouldIgnore(sanitized, DEFAULT_IGNORE_PATTERNS) + ) { + continue; // Skip junk and ignored files + } + + validFiles.push(file); + } + + // Find SKILL.md file + const skillMdFile = validFiles.find((f) => { + const path = f.webkitRelativePath || f.name; + const normalized = path.toLowerCase(); + return normalized === 'skill.md' || normalized.endsWith('/skill.md'); + }); + + if (!skillMdFile) { + return { valid: false, error: 'missing_skill_md' }; + } + + // Read and validate SKILL.md content + try { + const content = await readFileAsText(skillMdFile); + const { + metadata, + valid: frontmatterValid, + error: frontmatterError, + } = parseFrontmatter(content); + + if (!frontmatterValid) { + return { valid: false, error: frontmatterError || 'invalid_frontmatter' }; + } + + // Validate required fields + if (!metadata.name) { + return { valid: false, error: 'missing_name' }; + } + + // Validate name format (slug format: lowercase, URL-safe) + if (!/^[a-z0-9][a-z0-9_-]*$/.test(metadata.name)) { + return { valid: false, error: 'invalid_name_format' }; + } + + // Validate version if provided (should be semver) + if (metadata.version) { + const version = String(metadata.version); + // Simple semver check: x.y.z format + if (!/^\d+\.\d+\.\d+/.test(version)) { + return { valid: false, error: 'invalid_version' }; + } + } + + // Validate all files are text-based + for (const file of validFiles) { + const path = file.webkitRelativePath || file.name; + if (!isTextFile(path, file.type)) { + return { valid: false, error: 'invalid_file_type', details: path }; + } + } + + return { + valid: true, + name: metadata.name, + description: metadata.description || '', + }; + } catch (error) { + console.error('Validation error:', error); + return { valid: false, error: 'read_failed' }; + } +} + +/** + * Read a File as text + */ +function readFileAsText(file: File): Promise<string> { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => resolve(reader.result as string); + reader.onerror = () => reject(reader.error); + reader.readAsText(file); + }); +} + +// ============================================================================ +// Ignore Pattern Handling (simplified version of ignore package) +// ============================================================================ + +/** + * Simple ignore pattern matching + * Supports basic glob patterns: *, ?, ** + */ +export function shouldIgnore(filePath: string, patterns: string[]): boolean { + const normalizedPath = filePath.toLowerCase(); + for (const pattern of patterns) { + const trimmedPattern = pattern.trim(); + if (!trimmedPattern || trimmedPattern.startsWith('#')) continue; + + if (matchPattern(normalizedPath, trimmedPattern.toLowerCase())) { + return true; + } + } + return false; +} + +function matchPattern(filePath: string, pattern: string): boolean { + // Handle directory patterns (trailing slash) + if (pattern.endsWith('/')) { + const dirPattern = pattern.slice(0, -1); + return filePath.startsWith(dirPattern + '/') || filePath === dirPattern; + } + + // Handle exact match + if (filePath === pattern) return true; + + // Handle glob patterns + const regex = globToRegex(pattern); + return regex.test(filePath); +} + +function globToRegex(pattern: string): RegExp { + let regex = ''; + let i = 0; + + while (i < pattern.length) { + const c = pattern[i]; + + if (c === '*') { + if (pattern[i + 1] === '*') { + // ** matches any number of directories + regex += '.*'; + i += 2; + } else { + // * matches any characters except / + regex += '[^/]*'; + i++; + } + } else if (c === '?') { + // ? matches any single character except / + regex += '[^/]'; + i++; + } else if (c === '.') { + regex += '\\.'; + i++; + } else if ( + c === '\\' || + c === '/' || + c === '$' || + c === '^' || + c === '+' || + c === '(' || + c === ')' || + c === '[' || + c === ']' || + c === '{' || + c === '}' + ) { + regex += '\\' + c; + i++; + } else { + regex += c; + i++; + } + } + + return new RegExp(`^${regex}$`); +} + +// ============================================================================ +// Default Ignore Patterns +// ============================================================================ + +export const DEFAULT_IGNORE_PATTERNS = [ + '.git/', + '.svn/', + '.hg/', + 'node_modules/', + '__MACOSX/', + '.DS_Store', + '._*', + '*.log', + '*.tmp', + '*.temp', + '*.swp', + '*.swo', + '*~', + '.env', + '.env.*', + '.vscode/', + '.idea/', + 'Thumbs.db', + 'desktop.ini', + '.skill-meta.json', +]; + +// ============================================================================ +// File List Filtering +// ============================================================================ + +/** + * Filter files based on ignore patterns + */ +export function filterIgnoredFiles( + files: SkillFileEntry[], + ignorePatterns: string[] = DEFAULT_IGNORE_PATTERNS, +): SkillFileEntry[] { + return files.filter((file) => !shouldIgnore(file.path, ignorePatterns)); +} + +/** + * Filter upload files (File objects) based on ignore patterns + * Removes junk files like .DS_Store, __MACOSX, etc. + */ +export function filterUploadFiles(files: File[]): File[] { + return files.filter((file) => { + const path = file.webkitRelativePath || file.name; + const sanitized = sanitizeRelPath(path); + if (!sanitized) return false; + return ( + !isMacJunkPath(sanitized) && + !shouldIgnore(sanitized, DEFAULT_IGNORE_PATTERNS) + ); + }); +} + +/** + * Check if a skill folder structure is valid + */ +export function validateSkillStructure(files: SkillFileEntry[]): { + valid: boolean; + error?: string; + skillMdPath?: string; +} { + // Find SKILL.md + const skillMdFile = files.find((f) => { + const normalized = f.path.toLowerCase(); + return normalized === 'skill.md' || normalized.endsWith('/skill.md'); + }); + + if (!skillMdFile) { + return { valid: false, error: 'missing_skill_md' }; + } + + return { valid: true, skillMdPath: skillMdFile.path }; +} diff --git a/web/src/routes.tsx b/web/src/routes.tsx index 90c6a9a074..89114318d3 100644 --- a/web/src/routes.tsx +++ b/web/src/routes.tsx @@ -33,6 +33,7 @@ export enum Routes { Chats = '/chats', Chat = '/chat', Files = '/files', + Skills = '/files/skills', ProfileSetting = '/profile-setting', Profile = '/profile', Api = '/api', @@ -246,6 +247,10 @@ const routeConfigOptions = [ path: Routes.Files, Component: () => import('@/pages/files'), }, + { + path: Routes.Skills, + Component: () => import('@/pages/skills'), + }, { path: Routes.UserSetting, Component: () => import('@/pages/user-setting'), diff --git a/web/src/services/skill-space-service.ts b/web/src/services/skill-space-service.ts new file mode 100644 index 0000000000..04bff34d77 --- /dev/null +++ b/web/src/services/skill-space-service.ts @@ -0,0 +1,257 @@ +import api from '@/utils/api'; +import request from '@/utils/request'; + +export interface SkillSpace { + id: string; + tenant_id: string; + name: string; + folder_id: string; + description?: string; + embd_id?: string; + rerank_id?: string; + top_k?: number; + status?: string; + create_time?: number; + update_time?: string; +} + +export interface CreateSpaceRequest { + name: string; + description?: string; + embd_id?: string; + rerank_id?: string; +} + +export interface UpdateSpaceRequest { + name?: string; + description?: string; + embd_id?: string; + rerank_id?: string; + top_k?: number; +} + +export interface SkillSearchConfig { + id: string; + tenant_id: string; + space_id: string; + embd_id: string; + vector_similarity_weight: number; + similarity_threshold: number; + field_config: Record<string, any>; + rerank_id?: string; + tenant_rerank_id?: number; + top_k: number; + index_version: string; + status: string; + create_time?: number; + update_time?: string; +} + +export interface UpdateConfigRequest { + tenant_id?: string; + space_id?: string; + embd_id: string; + vector_similarity_weight: number; + similarity_threshold: number; + field_config: Record<string, any>; + rerank_id?: string; + top_k: number; +} + +export interface SearchRequest { + tenant_id?: string; + space_id?: string; + query: string; + page?: number; + page_size?: number; +} + +export interface SearchResult { + skills: Array<{ + skill_id: string; + folder_id: string; + name: string; + description: string; + tags: string[]; + score: number; + bm25_score?: number; + vector_score?: number; + index_version?: string; + }>; + total: number; + query: string; + search_type: string; +} + +export interface SkillInfo { + id: string; + folder_id: string; + name: string; + description: string; + tags: string[]; + content: string; +} + +export interface IndexSkillsRequest { + tenant_id?: string; + space_id?: string; + skills: SkillInfo[]; + embd_id?: string; +} + +class SkillSpaceService { + private async request<T>( + method: string, + url: string, + data?: any, + params?: any, + ): Promise<T> { + const response: any = await request(url, { + method: method as any, + data, + params, + }); + + const jsonData = response?.data ?? response; + + if (jsonData?.code !== 0) { + throw new Error(jsonData?.message || 'Request failed'); + } + + return jsonData.data; + } + + // ==================== Skill Space Management ==================== + + // List all skill spaces + async listSpaces(): Promise<{ spaces: SkillSpace[]; total: number }> { + return await this.request<{ spaces: SkillSpace[]; total: number }>( + 'GET', + api.skillSpaces, + ); + } + + // Create a new skill space + async createSpace(request: CreateSpaceRequest): Promise<SkillSpace> { + return await this.request<SkillSpace>( + 'POST', + api.skillSpaces, + request, + ); + } + + // Get a skill space by ID + async getSpace(spaceId: string): Promise<SkillSpace> { + return await this.request<SkillSpace>( + 'GET', + api.skillSpace(spaceId), + ); + } + + // Update a skill space + async updateSpace( + spaceId: string, + request: UpdateSpaceRequest, + ): Promise<SkillSpace> { + return await this.request<SkillSpace>( + 'PUT', + api.skillSpace(spaceId), + request, + ); + } + + // Delete a skill space + async deleteSpace(spaceId: string): Promise<void> { + await this.request<void>( + 'DELETE', + api.skillSpace(spaceId), + ); + } + + // Get space by folder ID + async getSpaceByFolder(folderId: string): Promise<SkillSpace> { + return await this.request<SkillSpace>( + 'GET', + api.skillSpaceByFolder, + null, + { folder_id: folderId }, + ); + } + + // ==================== Skill Search Config ==================== + + // Get skill search config + async getConfig( + spaceId?: string, + embdId?: string, + ): Promise<SkillSearchConfig> { + const params: Record<string, string> = {}; + if (spaceId) params.space_id = spaceId; + if (embdId) params.embd_id = embdId; + + return await this.request<SkillSearchConfig>( + 'GET', + api.skillConfig, + null, + params, + ); + } + + // Update skill search config + async updateConfig(request: UpdateConfigRequest): Promise<SkillSearchConfig> { + return await this.request<SkillSearchConfig>( + 'POST', + api.skillConfig, + request, + ); + } + + // ==================== Skill Search ==================== + + // Search skills + async search(request: SearchRequest): Promise<SearchResult> { + return await this.request<SearchResult>( + 'POST', + api.skillSearch, + request, + ); + } + + // ==================== Skill Indexing ==================== + + // Index skills + async indexSkills( + request: IndexSkillsRequest, + ): Promise<{ indexed_count: number }> { + return await this.request<{ indexed_count: number }>( + 'POST', + api.skillIndex, + request, + ); + } + + // Delete skill index + async deleteSkillIndex(skillId: string, spaceId?: string): Promise<void> { + const params: Record<string, string> = { skill_id: skillId }; + if (spaceId) params.space_id = spaceId; + + await this.request<void>( + 'DELETE', + api.skillIndex, + null, + params, + ); + } + + // Reindex all skills + async reindex(request: IndexSkillsRequest): Promise<any> { + return await this.request<any>( + 'POST', + api.skillReindex, + request, + ); + } +} + +export const skillSpaceService = new SkillSpaceService(); +export default skillSpaceService; diff --git a/web/src/utils/api.ts b/web/src/utils/api.ts index 96ad830006..4ca23191ef 100644 --- a/web/src/utils/api.ts +++ b/web/src/utils/api.ts @@ -333,4 +333,13 @@ export default { adminGetSandboxConfig: `${restAPIv1}/admin/sandbox/config`, adminSetSandboxConfig: `${restAPIv1}/admin/sandbox/config`, adminTestSandboxConnection: `${restAPIv1}/admin/sandbox/test`, + + // Skill spaces + skillSpaces: `${restAPIv1}/skills/spaces`, + skillSpace: (spaceId: string) => `${restAPIv1}/skills/spaces/${spaceId}`, + skillSpaceByFolder: `${restAPIv1}/skills/space/by-folder`, + skillConfig: `${restAPIv1}/skills/config`, + skillSearch: `${restAPIv1}/skills/search`, + skillIndex: `${restAPIv1}/skills/index`, + skillReindex: `${restAPIv1}/skills/reindex`, }; diff --git a/web/vite.config.ts b/web/vite.config.ts index 7ee919a220..59598ded91 100644 --- a/web/vite.config.ts +++ b/web/vite.config.ts @@ -39,8 +39,23 @@ function resolveMinify(value: string | undefined): MinifyValue { // https://vitejs.dev/config/ export default defineConfig(({ mode }) => { + // Load env from .env file (also loads .env.local, .env.[mode], .env.[mode].local) const env = loadEnv(mode, process.cwd(), ''); + // Try to load from .env file explicitly if API_PROXY_SCHEME not found + let proxyScheme = env.API_PROXY_SCHEME; + if (!proxyScheme) { + try { + const envLocal = loadEnv('', process.cwd(), ''); + proxyScheme = envLocal.API_PROXY_SCHEME; + } catch { + // ignore + } + } + proxyScheme = proxyScheme || 'python'; + + console.log(`[vite.config] mode: ${mode}, API_PROXY_SCHEME: ${proxyScheme}`); + const proxySchemes = { python: { '/api/v1/admin': { @@ -107,10 +122,15 @@ export default defineConfig(({ mode }) => { }, }; - const proxy = - proxySchemes[env.API_PROXY_SCHEME || 'python'] || proxySchemes.python; + const proxy = proxySchemes[proxyScheme] || proxySchemes.python; return { + define: { + // Expose to client code via import.meta.env + 'import.meta.env.API_PROXY_SCHEME': JSON.stringify(proxyScheme), + // Keep backward compatibility + __API_PROXY_SCHEME__: JSON.stringify(proxyScheme), + }, plugins: [ inspectorBabelPlugin(), react(),