ragflow/rag/svr/task_executor_refactor/embedding_service.py

#
#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""
Embedding Service Module.

Provides [`EmbeddingService`](rag/svr/task_executor_refactor/embedding_service.py:42) for vector embedding operations.
"""

from typing import Any, Dict, List, Tuple

import numpy as np
from common import settings
from common.misc_utils import thread_pool_exec
from common.token_utils import truncate
from rag.svr.task_executor_refactor.embedding_utils import EmbeddingUtils
from rag.svr.task_executor_refactor.task_context import TaskContext


class EmbeddingService:
    """Service for vector embedding operations.

    This service handles:
    - Batch encoding of text chunks
    - Title + content vector combination
    - Embedding model rate limiting

    All intermediate results are recorded via RecordingContext for comparison.
    """

    def __init__(
        self,
        ctx: TaskContext,
        embedding_batch_size: int = None,
    ):
        """Initialize EmbeddingService.

        Args:
            ctx: TaskContext containing task configuration and execution resources.
            embedding_batch_size: Batch size for embedding operations.
        """
        self._task_context = ctx

        self._embedding_batch_size = embedding_batch_size or settings.EMBEDDING_BATCH_SIZE

    async def embed_chunks(
        self,
        docs: List[Dict[str, Any]],
        embedding_model,
        parser_config: Dict = None,
    ) -> Tuple[int, int]:
        """Embed a list of chunks.

        Args:
            docs: List of chunk dictionaries to embed.
            embedding_model: The embedding model bundle (LLMBundle).
            parser_config: Parser configuration for filename embedding weight.

        Returns:
            Tuple of (token_count, vector_size).
        """
        if parser_config is None:
            parser_config = {}

        # Prepare text for embedding using EmbeddingUtils
        titles, contents = EmbeddingUtils.prepare_texts_for_embedding(docs)

        # Encode titles using EmbeddingUtils for truncation
        tk_count = 0
        if len(titles) > 0 and len(titles) == len(contents):
            async with self._task_context.embed_limiter:
                vts, c = await thread_pool_exec(embedding_model.encode, titles[0:1])
            tts = np.tile(vts[0], (len(contents), 1))
            tk_count += c
        else:
            tts = None

        # Batch encode contents using EmbeddingUtils
        vects_batches = []
        for i in range(0, len(contents), self._embedding_batch_size):
            batch = contents[i: i + self._embedding_batch_size]
            async with self._task_context.embed_limiter:
                vts, c = await thread_pool_exec(
                    self._batch_encode_wrapper,
                    [truncate(t, embedding_model.max_length - 10) for t in batch],
                    embedding_model,
                )
            vects_batches.append(vts)
            tk_count += c
            if self._task_context.progress_cb:
                self._task_context.progress_cb(prog=0.7 + 0.2 * (i + 1) / len(contents), msg="")

        # Stack vectors using EmbeddingUtils
        cnts = EmbeddingUtils.stack_vectors(vects_batches)

        # Combine title and content vectors using EmbeddingUtils
        title_weight = parser_config.get("filename_embd_weight", EmbeddingUtils.DEFAULT_TITLE_WEIGHT)
        vects = EmbeddingUtils.combine_title_content_vectors(tts, cnts, title_weight)

        assert len(vects) == len(docs)

        # Attach vectors to docs using EmbeddingUtils
        vector_size = EmbeddingUtils.attach_vectors(docs, vects)

        return tk_count, vector_size

    @staticmethod
    def _batch_encode_wrapper(txts: List[str], embedding_model) -> Tuple[np.ndarray, int]:
        """Synchronous wrapper for batch encoding — used with thread_pool_exec."""
        return embedding_model.encode(txts)