diff --git a/agent/templates/cajal_scientific_paper_agent.json b/agent/templates/cajal_scientific_paper_agent.json new file mode 100644 index 0000000000..3cc34d89b7 --- /dev/null +++ b/agent/templates/cajal_scientific_paper_agent.json @@ -0,0 +1,304 @@ +{ + "id": "41", + "title": { + "en": "CAJAL scientific paper agent", + "de": "CAJAL-Agent für wissenschaftliche Arbeiten", + "zh": "CAJAL 科学论文助手" + }, + "description": { + "en": "A local-first scientific paper generation agent for RAGFlow. It is preconfigured for Agnuxo/CAJAL-4B-P2PCLAW through Ollama, retrieves knowledge-base evidence, and drafts citation-grounded LaTeX-ready academic sections.", + "de": "Ein lokal ausgerichteter Agent zur Erstellung wissenschaftlicher Arbeiten in RAGFlow. Er ist für Agnuxo/CAJAL-4B-P2PCLAW über Ollama vorkonfiguriert, ruft Evidenz aus der Wissensdatenbank ab und erstellt zitationsgestützte, LaTeX-fähige akademische Abschnitte.", + "zh": "面向 RAGFlow 的本地优先科学论文生成助手。该模板预配置 Agnuxo/CAJAL-4B-P2PCLAW(Ollama),可检索知识库证据,并生成带引用依据、适合 LaTeX 的学术章节。" + }, + "canvas_type": "Agent", + "canvas_types": [ + "Agent", + "Recommended" + ], + "dsl": { + "components": { + "Agent:NewPumasLick": { + "downstream": [ + "Message:OrangeYearsShine" + ], + "obj": { + "component_name": "Agent", + "params": { + "delay_after_error": 1, + "description": "", + "exception_comment": "", + "exception_default_value": "", + "exception_goto": [], + "exception_method": null, + "frequencyPenaltyEnabled": false, + "frequency_penalty": 0.5, + "llm_id": "agnuxo/cajal-4b-p2pclaw@Ollama", + "maxTokensEnabled": true, + "max_retries": 3, + "max_rounds": 3, + "max_tokens": 32768, + "mcp": [], + "message_history_window_size": 12, + "outputs": { + "content": { + "type": "string", + "value": "" + } + }, + "parameter": "Precise", + "presencePenaltyEnabled": false, + "presence_penalty": 0.5, + "prompts": [ + { + "role": "user", + "content": "# Research request\n{sys.query}\n\nUse the configured knowledge base retrieval tool before drafting. If no evidence is retrieved, state that limitation clearly." + } + ], + "sys_prompt": "## Role & Task\nYou are **CAJAL**, a local-first scientific writing agent specialized in citation-grounded paper generation.\n\nUse RAGFlow retrieval results as the source of truth. Write precise academic content that can be pasted into a paper draft, technical report, or LaTeX manuscript. You are optimized for the local GGUF model `Agnuxo/CAJAL-4B-P2PCLAW` served through Ollama as `agnuxo/cajal-4b-p2pclaw`.\n\n## Operating Rules\n1. Decompose the user request into research goals, target section type, expected citation style, and missing evidence.\n2. Retrieve relevant knowledge-base passages before drafting factual claims.\n3. Ground every substantive claim in retrieved evidence. If evidence is missing, mark it as a limitation instead of inventing a citation.\n4. Prefer structured scientific writing: abstract, introduction, related work, methodology, experiments, results, limitations, and conclusion.\n5. Use LaTeX formatting for equations, symbols, algorithms, tables, and section headings when it helps the manuscript.\n6. Preserve traceability: cite source titles, document names, page numbers, or chunk identifiers when available in retrieved context.\n7. Keep language technical, concise, and reproducible. Avoid marketing language and vague generalizations.\n\n## Output Contract\nReturn one of the following, depending on the user request:\n- A complete paper section with citation markers and a short evidence map.\n- A literature review organized by themes, methods, findings, and gaps.\n- A methodology or experiment section with reproducible steps, assumptions, and limitations.\n- A LaTeX-ready abstract, introduction, or conclusion.\n\nAlways include:\n- **Draft**: the requested scientific content.\n- **Evidence used**: concise bullets mapping claims to retrieved sources.\n- **Limitations**: missing evidence, weak support, or assumptions that require verification.\n", + "temperature": 0.2, + "temperatureEnabled": true, + "tools": [ + { + "component_name": "Retrieval", + "name": "Retrieval", + "params": { + "cross_languages": [], + "description": "Retrieve papers, reports, datasets, and notes that ground CAJAL scientific writing outputs.", + "empty_response": "", + "kb_ids": [], + "keywords_similarity_weight": 0.7, + "outputs": { + "formalized_content": { + "type": "string", + "value": "" + } + }, + "rerank_id": "", + "similarity_threshold": 0.2, + "top_k": 1024, + "top_n": 10, + "use_kg": false + } + } + ], + "topPEnabled": false, + "top_p": 0.75, + "user_prompt": "", + "visual_files_var": "" + } + }, + "upstream": [ + "begin" + ] + }, + "Message:OrangeYearsShine": { + "downstream": [], + "obj": { + "component_name": "Message", + "params": { + "content": [ + "{Agent:NewPumasLick@content}" + ] + } + }, + "upstream": [ + "Agent:NewPumasLick" + ] + }, + "begin": { + "downstream": [ + "Agent:NewPumasLick" + ], + "obj": { + "component_name": "Begin", + "params": { + "enablePrologue": true, + "inputs": {}, + "mode": "conversational", + "prologue": "Hi, I am CAJAL in RAGFlow. Add research papers or datasets to your knowledge base, then ask me to draft a citation-grounded paper section, literature review, methodology, or LaTeX-ready abstract." + } + }, + "upstream": [] + } + }, + "globals": { + "sys.conversation_turns": 0, + "sys.files": [], + "sys.query": "", + "sys.user_id": "" + }, + "graph": { + "edges": [ + { + "data": { + "isHovered": false + }, + "id": "xy-edge__beginstart-Agent:NewPumasLickend", + "source": "begin", + "sourceHandle": "start", + "target": "Agent:NewPumasLick", + "targetHandle": "end" + }, + { + "data": { + "isHovered": false + }, + "id": "xy-edge__Agent:NewPumasLickstart-Message:OrangeYearsShineend", + "markerEnd": "logo", + "source": "Agent:NewPumasLick", + "sourceHandle": "start", + "style": { + "stroke": "rgba(91, 93, 106, 1)", + "strokeWidth": 1 + }, + "target": "Message:OrangeYearsShine", + "targetHandle": "end", + "type": "buttonEdge", + "zIndex": 1001 + } + ], + "nodes": [ + { + "data": { + "form": { + "enablePrologue": true, + "inputs": {}, + "mode": "conversational", + "prologue": "Hi, I am CAJAL in RAGFlow. Add research papers or datasets to your knowledge base, then ask me to draft a citation-grounded paper section, literature review, methodology, or LaTeX-ready abstract." + }, + "label": "Begin", + "name": "begin" + }, + "dragging": false, + "id": "begin", + "measured": { + "height": 48, + "width": 200 + }, + "position": { + "x": -9.569875358221438, + "y": 205.84018385864917 + }, + "selected": false, + "sourcePosition": "left", + "targetPosition": "right", + "type": "beginNode" + }, + { + "data": { + "form": { + "content": [ + "{Agent:NewPumasLick@content}" + ] + }, + "label": "Scientific Draft", + "name": "Response" + }, + "dragging": false, + "id": "Message:OrangeYearsShine", + "measured": { + "height": 56, + "width": 200 + }, + "position": { + "x": 734.4061285881053, + "y": 199.9706031723009 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "messageNode" + }, + { + "data": { + "form": { + "delay_after_error": 1, + "description": "", + "exception_comment": "", + "exception_default_value": "", + "exception_goto": [], + "exception_method": null, + "frequencyPenaltyEnabled": false, + "frequency_penalty": 0.5, + "llm_id": "agnuxo/cajal-4b-p2pclaw@Ollama", + "maxTokensEnabled": true, + "max_retries": 3, + "max_rounds": 3, + "max_tokens": 32768, + "mcp": [], + "message_history_window_size": 12, + "outputs": { + "content": { + "type": "string", + "value": "" + } + }, + "parameter": "Precise", + "presencePenaltyEnabled": false, + "presence_penalty": 0.5, + "prompts": [ + { + "role": "user", + "content": "# Research request\n{sys.query}\n\nUse the configured knowledge base retrieval tool before drafting. If no evidence is retrieved, state that limitation clearly." + } + ], + "sys_prompt": "## Role & Task\nYou are **CAJAL**, a local-first scientific writing agent specialized in citation-grounded paper generation.\n\nUse RAGFlow retrieval results as the source of truth. Write precise academic content that can be pasted into a paper draft, technical report, or LaTeX manuscript. You are optimized for the local GGUF model `Agnuxo/CAJAL-4B-P2PCLAW` served through Ollama as `agnuxo/cajal-4b-p2pclaw`.\n\n## Operating Rules\n1. Decompose the user request into research goals, target section type, expected citation style, and missing evidence.\n2. Retrieve relevant knowledge-base passages before drafting factual claims.\n3. Ground every substantive claim in retrieved evidence. If evidence is missing, mark it as a limitation instead of inventing a citation.\n4. Prefer structured scientific writing: abstract, introduction, related work, methodology, experiments, results, limitations, and conclusion.\n5. Use LaTeX formatting for equations, symbols, algorithms, tables, and section headings when it helps the manuscript.\n6. Preserve traceability: cite source titles, document names, page numbers, or chunk identifiers when available in retrieved context.\n7. Keep language technical, concise, and reproducible. Avoid marketing language and vague generalizations.\n\n## Output Contract\nReturn one of the following, depending on the user request:\n- A complete paper section with citation markers and a short evidence map.\n- A literature review organized by themes, methods, findings, and gaps.\n- A methodology or experiment section with reproducible steps, assumptions, and limitations.\n- A LaTeX-ready abstract, introduction, or conclusion.\n\nAlways include:\n- **Draft**: the requested scientific content.\n- **Evidence used**: concise bullets mapping claims to retrieved sources.\n- **Limitations**: missing evidence, weak support, or assumptions that require verification.\n", + "temperature": 0.2, + "temperatureEnabled": true, + "tools": [ + { + "component_name": "Retrieval", + "name": "Retrieval", + "params": { + "cross_languages": [], + "description": "Retrieve papers, reports, datasets, and notes that ground CAJAL scientific writing outputs.", + "empty_response": "", + "kb_ids": [], + "keywords_similarity_weight": 0.7, + "outputs": { + "formalized_content": { + "type": "string", + "value": "" + } + }, + "rerank_id": "", + "similarity_threshold": 0.2, + "top_k": 1024, + "top_n": 10, + "use_kg": false + } + } + ], + "topPEnabled": false, + "top_p": 0.75, + "user_prompt": "", + "visual_files_var": "" + }, + "label": "CAJAL Writer", + "name": "Knowledge Base Agent" + }, + "dragging": false, + "id": "Agent:NewPumasLick", + "measured": { + "height": 84, + "width": 200 + }, + "position": { + "x": 347.00048227952215, + "y": 186.49109364794631 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "agentNode" + } + ] + }, + "history": [], + "memory": [], + "messages": [], + "path": [], + "retrieval": [] + }, + "avatar": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAH0klEQVR4nO2ZC1BU1wGG/3uRp/IygG+DGK0GOjE1U6cxI4tT03Y0E+kENbaJbKpj60wzgNMwnTjuEtu0miGasY+0krI202kMVEnVxtoOLG00oVa0LajVBDcSEI0REFBgkZv/3GWXfdzdvctuHs7kmzmec9//d+45914XCXc4Xwjk1+59VJGGF7C5QAFSWBvgyWmWLl7IKiny6QNL173B5YjB84bOyrpKA4B1DLySdQpLKAiZGtZ7a/KMVoQJz6UfEZyhTWwaEBmssiLvCueu6BJg8EwFqGTTAC+uvNWC9w82sRWcux/JwaSHstjywcogRt4RG0KExwWG4QsVYCebKSwe3L5lR9OOWjyzfg2WL/0a1/jncO3b2FHxGnKeWYqo+Giu8UEMrWJKWBACPMY/DG+63txhvnKshUu+DF2/hayMDFRsL+VScDb++AVc6OjAuInxXPJl2tfnIikrzUyJMi7qQmLRhOEr2fOFbX/7P6STF7BqoWevfdij4NWGQfx+57OYO2sG1wSnsek8Nm15EU8sikF6ouelXz9ph7JwDqYt+5IIZaGEkauDIrH4wPBmhjexCSEws+VdVG1M4NIoj+2xYzBuJtavWcEl/VS8dggx/ZdQvcGzQwp+cxOXsu5RBQQMVkYJM4LA/Txh+ELFMWFVPARS5kFiabZdx8Olh7l17BzdvhzZmROhdJ3j6D/nIyBgOCMlLAgA9xmF4TMV4BSbrgnrLiBl5rOsRCRRbDUsBzQFiJjY91PCBj9w+yiP1lXWsTLAjc9YQGB9I8+Yx1oTiUWFvW9QgDo2PdASaDp/EQ8/sRnhcPTVcuTMncXwQQVESL9DidscaPW+QEtAICRu9PSxFTpJiePV8AI9AsTvXZBY/Pa+wJ9ApNApIILm8S5Y4QXXQwhYFH6csemDP4G3G5v579i5d04mknknQhDYS4HCrCVr/mC3D305KnbCEpvVIia5Onw6WaWw+KAl0Np+FUXbdiMcyoqfUoeRHoFrJ1uRtnBG1/9Mf/3LtElp+VwF2wcd7woJib1vUPwMH4GWQCQJJtBa/V9cPmFD8uQUpMdNGDhY8bNYrobh8acHu270/l0ImJWRt64Wn6WACN9z5gq2lXwPW8pfweT0icP/fH23vO9QLYq3/QKyLBmFQI3CUcT9NdESEEPItKsSN3r7MBaSJoxHWZERM6ZmMLy2gDP8/pd/og418dTL37hFSUpMUC5f+UiWZcnY9s5+ixCwUiCXx2iiJdDNx6f4pgkH8Q3lbxK7h8+enoHha1cRNdMp8axiHxo6+/5bVdk8DSROYIW1X7QEIom3wHD3gEf4vu1bVYEJZeWQ0zJQvmcfyiv2QZak6raG/QWfK4Ez9mTc5v8xPMJfuojoxXmIX/9DOMe+FCWbcHu4BJJ0YEwCx0824bFNW9HesB+CqYu+jepfPYcHF+aoPXS8sQl/+vU2bgmOU2C+qRc9/YrrPPbGBtzavd0nvCxLxui4pJrBm911PFwak4CYA80cj+JCAiGUzYkmxrSY4N2c3GLi6UEIFL/wRxxqkhmHnTEpDQcrfq6ea+hcE8bNy3GFzyq4H22HW1Kd4WMSkg1jmsSRpKj0Rzhy4gNUv/y8Gjrv8SJK3OWScA+fMn/ysVPPvTmeh6nh1TcxBUJ+jEaKYr7N36x7h+Edj0pB6+WrLokn87+BrTt/p4ZPzZ6MM7/8R2//h33vOcNzdwgBMwVMbGvySQmo4a0NqOZccU7YmGXLEfPQUlUid/XT6B8YdIU/99vjsPcOdEhDsfOd4QVCwKB8yp8SWuG1njbTl83DpMWz1PCKAswuWPDI0e8WebyAJBbxNdrF7cls+hBpAb3h3XtehL/3+4u7D35rQwpP4YFTwMJ91rHpQyQFQgmf9sAMNL9Ur4afv/FBjIuPVj+n4YVTwMD96tj0IVICoYYXv/q1VJ1Sl8UveQyaRwErvOB6B5SwKhqP00gI6A0vhsycJ7/KIzxhyHqGN0ADbnNAAYOicRfCFdAb/p50Gbfuc/wy5w1D5lOghk0fuG0USlgVr7sQjoDe8C8WxKGKPy2KjzlvAQb02/sCbh+FApngX1QUtyeSuwDi0hxFByV7L+LIf3r5kvpp4PBr07Hqvn71Y85bgOG6WS2ggA1+4D6eUKKQApVsqngI6KSkqh9HzsoM/3zg8Oz5VQ9E8wjf30YFDGdkeAsCwH18oYRZGXk7C4HuYxcwe6rjQsFovzaEvoFxqNkTOPzMjGikJso8wsF77XYkLx6dAwxWxvBmBIH7aUMJi8J3w0DnTVz7dyvX6KPzVBt+kL8cmzesRq9ps2Z48bRJmOIapS7E4zM2lXNt5CcU6ID7+ocSZkqY2NRN6ysnsHbJEpR8ZwV6t5Yg+iuLELf2KVd48VwXQf3BQGUMb4ZOuH9gKFEIYJfiNrEDcXZHHV4q3YRv5i7ikgM94RlETNgihrcgBHhccCiRCf7VhBK5rAPyr9I/Y/WKPEyfksH/9NjQ2dODhsYzwcLXsypkeBtCRGLRDUUMAMyKHxEx4dtrzyP97nQMygripiQiKi4aSbPvQmKW7+OXF69ntYvBa1iPCYklZEZECsGm4ja0Ops7EJsaj4SprlU+8IJiqIjAFga3Ikx4vvAYkTGALxyWFArlsnbBC9Sz6mI5zWKNRGh3JJY7mjte4GOz+r4tkRbxQQAAAABJRU5ErkJggg==" +} diff --git a/conf/llm_factories.json b/conf/llm_factories.json index 18c30e3abe..9648714fa2 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -1759,8 +1759,16 @@ "logo": "", "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION", "status": "1", - "rank": "988", - "llm": [] + "rank": "830", + "llm": [ + { + "llm_name": "agnuxo/cajal-4b-p2pclaw", + "tags": "LLM,CHAT,32K,SCIENTIFIC_WRITING", + "max_tokens": 32768, + "model_type": "chat", + "is_tools": false + } + ] }, { "name": "ModelScope", diff --git a/test/test_cajal_template_unit.py b/test/test_cajal_template_unit.py new file mode 100644 index 0000000000..ad47321996 --- /dev/null +++ b/test/test_cajal_template_unit.py @@ -0,0 +1,79 @@ +# +# Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import json +import importlib.util +from pathlib import Path + + +def _load_template_utils(): + repo_root = Path(__file__).resolve().parents[1] + module_path = repo_root / "api" / "db" / "template_utils.py" + spec = importlib.util.spec_from_file_location("template_utils", module_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def _load_cajal_template(): + repo_root = Path(__file__).resolve().parents[1] + template_path = repo_root / "agent" / "templates" / "cajal_scientific_paper_agent.json" + with template_path.open(encoding="utf-8") as template_file: + return _load_template_utils().normalize_canvas_template_categories(json.load(template_file)) + + +def test_cajal_template_exposes_local_ollama_model_and_agent_categories(): + template = _load_cajal_template() + + assert template["id"] == "41" + assert template["title"]["en"] == "CAJAL scientific paper agent" + assert template["canvas_type"] == "Agent" + assert template["canvas_types"] == ["Agent", "Recommended"] + + agent_params = template["dsl"]["components"]["Agent:NewPumasLick"]["obj"]["params"] + assert agent_params["llm_id"] == "agnuxo/cajal-4b-p2pclaw@Ollama" + assert agent_params["max_tokens"] == 32768 + assert "Agnuxo/CAJAL-4B-P2PCLAW" in agent_params["sys_prompt"] + assert "LaTeX" in agent_params["sys_prompt"] + + +def test_cajal_template_keeps_retrieval_grounding_and_graph_form_in_sync(): + template = _load_cajal_template() + agent_params = template["dsl"]["components"]["Agent:NewPumasLick"]["obj"]["params"] + retrieval_tools = [tool for tool in agent_params["tools"] if tool["component_name"] == "Retrieval"] + + assert len(retrieval_tools) == 1 + assert retrieval_tools[0]["params"]["top_n"] == 10 + assert "ground" in retrieval_tools[0]["params"]["description"].lower() + assert "{sys.query}" in agent_params["prompts"][0]["content"] + + agent_node = next(node for node in template["dsl"]["graph"]["nodes"] if node["id"] == "Agent:NewPumasLick") + begin_node = next(node for node in template["dsl"]["graph"]["nodes"] if node["id"] == "begin") + + assert agent_node["data"]["form"]["llm_id"] == agent_params["llm_id"] + assert agent_node["data"]["form"]["sys_prompt"] == agent_params["sys_prompt"] + assert "CAJAL" in begin_node["data"]["form"]["prologue"] + + +def test_cajal_is_registered_as_a_known_ollama_chat_model(): + repo_root = Path(__file__).resolve().parents[1] + factories_path = repo_root / "conf" / "llm_factories.json" + factories = json.loads(factories_path.read_text(encoding="utf-8")) + ollama = next(factory for factory in factories["factory_llm_infos"] if factory["name"] == "Ollama") + cajal = next(model for model in ollama["llm"] if model["llm_name"] == "agnuxo/cajal-4b-p2pclaw") + + assert cajal["model_type"] == "chat" + assert cajal["max_tokens"] == 32768 + assert "SCIENTIFIC_WRITING" in cajal["tags"]