diff --git a/common/data_source/moodle_connector.py b/common/data_source/moodle_connector.py index 39efcf07be..850ce5815d 100644 --- a/common/data_source/moodle_connector.py +++ b/common/data_source/moodle_connector.py @@ -21,14 +21,19 @@ from common.data_source.interfaces import ( LoadConnector, PollConnector, SecondsSinceUnixEpoch, + SlimConnectorWithPermSync, +) +from common.data_source.models import ( + Document, + GenerateSlimDocumentOutput, + SlimDocument, ) -from common.data_source.models import Document from common.data_source.utils import batch_generator, rl_requests logger = logging.getLogger(__name__) -class MoodleConnector(LoadConnector, PollConnector): +class MoodleConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync): """Moodle LMS connector for accessing course content""" def __init__(self, moodle_url: str, batch_size: int = INDEX_BATCH_SIZE) -> None: @@ -137,6 +142,78 @@ class MoodleConnector(LoadConnector, PollConnector): self._get_updated_content(courses, start, end) ) + @staticmethod + def _slim_doc_id_for_module(module) -> Optional[str]: + """Return the indexed document id for a Moodle module, or None. + + The id format must match the ones produced by the _process_* + helpers below. Module types that we never ingest (label, url) and + modules with no id return None. + """ + mtype = getattr(module, "modname", None) + mid = getattr(module, "id", None) + if not mtype or mid is None: + return None + if mtype in ("label", "url"): + return None + if mtype == "resource": + return f"moodle_resource_{mid}" + if mtype == "forum": + return f"moodle_forum_{mid}" + if mtype == "page": + return f"moodle_page_{mid}" + if mtype == "book": + return f"moodle_book_{mid}" + if mtype in ("assign", "quiz"): + return f"moodle_{mtype}_{mid}" + return None + + def retrieve_all_slim_docs_perm_sync( + self, + callback: Any = None, + ) -> GenerateSlimDocumentOutput: + """List the ids of every Moodle module that could be indexed. + + This is a lightweight pass over courses and modules with no file + downloads. The caller compares the returned ids against the index + and removes any indexed document whose id is not in this list. + """ + del callback + if not self.moodle_client: + raise ConnectorMissingCredentialError("Moodle client not initialized") + + logger.info("Starting Moodle slim snapshot for stale-document cleanup") + courses = self._get_enrolled_courses() + if not courses: + logger.warning("No courses found for slim snapshot") + return + + batch: list[SlimDocument] = [] + total = 0 + for course in courses: + try: + contents = self._get_course_contents(course.id) + for section in contents: + for module in section.modules: + slim_id = self._slim_doc_id_for_module(module) + if slim_id is None: + continue + batch.append(SlimDocument(id=slim_id)) + total += 1 + if len(batch) >= self.batch_size: + yield batch + batch = [] + except Exception as e: + self._log_error( + f"slim snapshot for course {getattr(course, 'fullname', '?')}", + e, + ) + + if batch: + yield batch + + logger.info(f"Moodle slim snapshot completed: {total} documents listed") + @retry(tries=3, delay=1, backoff=2) def _get_enrolled_courses(self) -> list: if not self.moodle_client: diff --git a/rag/svr/sync_data_source.py b/rag/svr/sync_data_source.py index 697e3d5dee..b5801905db 100644 --- a/rag/svr/sync_data_source.py +++ b/rag/svr/sync_data_source.py @@ -976,19 +976,40 @@ class Moodle(SyncBase): # Determine the time range for synchronization based on reindex or poll_range_start poll_start = task.get("poll_range_start") + file_list = None if task["reindex"] == "1" or poll_start is None: document_generator = self.connector.load_from_state() _begin_info = "totally" else: + # Freeze the poll end time BEFORE the slim snapshot so that the + # snapshot and the poll cover the same point in time. Without + # this, a module created between the snapshot and the poll + # could be polled as new and at the same time be missing from + # the slim list, which would mark it as stale and delete it. + end_ts = datetime.now(timezone.utc).timestamp() + + if self.conf.get("sync_deleted_files"): + file_list = [] + try: + for slim_batch in self.connector.retrieve_all_slim_docs_perm_sync(): + file_list.extend(slim_batch) + except Exception: + logging.exception( + "Moodle slim snapshot failed; skipping stale-document cleanup " + "(connector_id=%s, kb_id=%s)", + task.get("connector_id"), + task.get("kb_id"), + ) + file_list = None document_generator = self.connector.poll_source( poll_start.timestamp(), - datetime.now(timezone.utc).timestamp(), + end_ts, ) _begin_info = f"from {poll_start}" self.log_connection("Moodle", self.conf["moodle_url"], task) - return document_generator + return document_generator, file_list class BOX(SyncBase): diff --git a/web/src/pages/next-chats/chat/app-settings/chat-basic-settings.tsx b/web/src/pages/next-chats/chat/app-settings/chat-basic-settings.tsx index 5794787d9a..c0715c7846 100644 --- a/web/src/pages/next-chats/chat/app-settings/chat-basic-settings.tsx +++ b/web/src/pages/next-chats/chat/app-settings/chat-basic-settings.tsx @@ -42,14 +42,23 @@ export default function ChatBasicSetting() { }, [metadataKeys]); useEffect(() => { - const currentFields = form.getValues('prompt_config.reference_metadata.fields'); - if (metadataInclude && Array.isArray(currentFields) && currentFields.length > 0 && metadataKeys) { - const validFields = currentFields.filter((field) => metadataKeys.includes(field)); + const currentFields = form.getValues( + 'prompt_config.reference_metadata.fields', + ); + if ( + metadataInclude && + Array.isArray(currentFields) && + currentFields.length > 0 && + metadataKeys + ) { + const validFields = currentFields.filter((field) => + metadataKeys.includes(field), + ); if (validFields.length !== currentFields.length) { form.setValue('prompt_config.reference_metadata.fields', validFields); } } else if (!metadataInclude) { - form.setValue('prompt_config.reference_metadata.fields', undefined); + form.setValue('prompt_config.reference_metadata.fields', undefined); } }, [kbIds, metadataKeys, metadataInclude, form]); diff --git a/web/src/pages/next-search/search-setting.tsx b/web/src/pages/next-search/search-setting.tsx index d9a381782d..c3c812306d 100644 --- a/web/src/pages/next-search/search-setting.tsx +++ b/web/src/pages/next-search/search-setting.tsx @@ -26,11 +26,7 @@ import { MultiSelect } from '@/components/ui/multi-select'; import { RAGFlowSelect } from '@/components/ui/select'; import { Spin } from '@/components/ui/spin'; import { Switch } from '@/components/ui/switch'; -import { Textarea } from '@/components/ui/textarea'; -import { - useFetchKnowledgeList, - useFetchKnowledgeMetadataKeys, -} from '@/hooks/use-knowledge-request'; +import { useFetchKnowledgeMetadataKeys } from '@/hooks/use-knowledge-request'; import { useComposeLlmOptionsByModelTypes, useSelectLlmOptionsByModelType, @@ -232,14 +228,29 @@ const SearchSetting: React.FC = ({ }, [metadataKeys]); useEffect(() => { - const currentFields = formMethods.getValues('search_config.reference_metadata.fields'); - if (referenceMetadataEnabled && Array.isArray(currentFields) && currentFields.length > 0 && metadataKeys) { - const validFields = currentFields.filter((field) => metadataKeys.includes(field)); + const currentFields = formMethods.getValues( + 'search_config.reference_metadata.fields', + ); + if ( + referenceMetadataEnabled && + Array.isArray(currentFields) && + currentFields.length > 0 && + metadataKeys + ) { + const validFields = currentFields.filter((field) => + metadataKeys.includes(field), + ); if (validFields.length !== currentFields.length) { - formMethods.setValue('search_config.reference_metadata.fields', validFields); + formMethods.setValue( + 'search_config.reference_metadata.fields', + validFields, + ); } } else if (!referenceMetadataEnabled) { - formMethods.setValue('search_config.reference_metadata.fields', undefined); + formMethods.setValue( + 'search_config.reference_metadata.fields', + undefined, + ); } }, [selectedKbIds, metadataKeys, referenceMetadataEnabled, formMethods]); diff --git a/web/src/pages/user-setting/data-source/constant/index.tsx b/web/src/pages/user-setting/data-source/constant/index.tsx index 0aae8868c5..0a5eb8c429 100644 --- a/web/src/pages/user-setting/data-source/constant/index.tsx +++ b/web/src/pages/user-setting/data-source/constant/index.tsx @@ -126,6 +126,9 @@ export const DataSourceFeatureVisibilityMap: Partial< [DataSourceKey.RSS]: { syncDeletedFiles: true, }, + [DataSourceKey.MOODLE]: { + syncDeletedFiles: true, + }, [DataSourceKey.MYSQL]: { syncDeletedFiles: true, }, diff --git a/web/src/utils/tests/chat.test.ts b/web/src/utils/tests/chat.test.ts index 82b4780719..a55ace5cb6 100644 --- a/web/src/utils/tests/chat.test.ts +++ b/web/src/utils/tests/chat.test.ts @@ -23,4 +23,4 @@ test('handles mixed double-escaped delimiters with HTML entities', () => { test('passes through already correct single-escaped delimiters unchanged', () => { const result = preprocessLaTeX('\\(x = 1\\)'); expect(result).toBe('$x = 1$'); -}); \ No newline at end of file +});