diff --git a/conf/llm_factories.json b/conf/llm_factories.json index b6d2b7ff18..0cadfe3679 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -790,6 +790,18 @@ "max_tokens": 1000000, "model_type": "chat", "is_tools": true + }, + { + "llm_name": "gte-rerank-v2", + "tags": "RE-RANK,4k", + "max_tokens": 4000, + "model_type": "rerank" + }, + { + "llm_name": "qwen3-rerank", + "tags": "RE-RANK,4k", + "max_tokens": 4000, + "model_type": "rerank" } ] }, diff --git a/rag/llm/rerank_model.py b/rag/llm/rerank_model.py index 5002fe7651..6730261ea7 100644 --- a/rag/llm/rerank_model.py +++ b/rag/llm/rerank_model.py @@ -375,7 +375,19 @@ class QWenRerank(Base): import dashscope - resp = dashscope.TextReRank.call(api_key=self.api_key, model=self.model_name, query=query, documents=texts, top_n=len(texts), return_documents=False) + # qwen3-rerank does not support return_documents parameter + if self.model_name.startswith("qwen3-rerank"): + resp = dashscope.TextReRank.call( + api_key=self.api_key, model=self.model_name, + query=query, documents=texts, top_n=len(texts) + ) + else: + resp = dashscope.TextReRank.call( + api_key=self.api_key, model=self.model_name, + query=query, documents=texts, + top_n=len(texts), return_documents=False + ) + rank = np.zeros(len(texts), dtype=float) if resp.status_code == HTTPStatus.OK: try: @@ -549,4 +561,4 @@ class RAGconRerank(Base): rank = Base._normalize_rank(rank) - return rank, token_count \ No newline at end of file + return rank, token_count