mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-02 08:45:42 +08:00
## Summary TuShare required non-empty upstream input but filtered fetched news with the static `keyword` param (default empty string), so agent-provided keywords were ignored. Use `self._param.keyword or ans` when filtering, matching how AkShare uses upstream input for its query. Fixes #16360 ## Test plan - [x] `test_tushare_filters_with_upstream_keyword_when_param_empty` mocks the API and asserts only rows matching the upstream keyword are returned --------- Co-authored-by: yzc <yuzhichang@gmail.com> Co-authored-by: Harsh Kashyap <harshkashyap@Harshs-MacBook-Pro.local>
98 lines
3.3 KiB
Python
98 lines
3.3 KiB
Python
#
|
|
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
import json
|
|
import logging
|
|
from abc import ABC
|
|
import pandas as pd
|
|
import time
|
|
import requests
|
|
from agent.component.base import ComponentBase, ComponentParamBase
|
|
from common.http_client import DEFAULT_TIMEOUT
|
|
|
|
|
|
class TuShareParam(ComponentParamBase):
|
|
"""
|
|
Define the TuShare component parameters.
|
|
"""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.token = "xxx"
|
|
self.src = "eastmoney"
|
|
self.start_date = "2024-01-01 09:00:00"
|
|
self.end_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
|
|
self.keyword = ""
|
|
|
|
def check(self):
|
|
self.check_valid_value(self.src, "Quick News Source",
|
|
["sina", "wallstreetcn", "10jqka", "eastmoney", "yuncaijing", "fenghuang", "jinrongjie"])
|
|
|
|
|
|
class TuShare(ComponentBase, ABC):
|
|
component_name = "TuShare"
|
|
|
|
def _run(self, history, **kwargs):
|
|
if self.check_if_canceled("TuShare processing"):
|
|
return
|
|
|
|
ans = self.get_input()
|
|
ans = ",".join(ans["content"]) if "content" in ans else ""
|
|
if not ans:
|
|
return TuShare.be_output("")
|
|
|
|
try:
|
|
if self.check_if_canceled("TuShare processing"):
|
|
return
|
|
|
|
tus_res = []
|
|
params = {
|
|
"api_name": "news",
|
|
"token": self._param.token,
|
|
"params": {"src": self._param.src, "start_date": self._param.start_date,
|
|
"end_date": self._param.end_date}
|
|
}
|
|
response = requests.post(url="http://api.tushare.pro", data=json.dumps(params).encode('utf-8'), timeout=DEFAULT_TIMEOUT)
|
|
response = response.json()
|
|
if self.check_if_canceled("TuShare processing"):
|
|
return
|
|
if response['code'] != 0:
|
|
return TuShare.be_output(response['msg'])
|
|
df = pd.DataFrame(response['data']['items'])
|
|
df.columns = response['data']['fields']
|
|
if self.check_if_canceled("TuShare processing"):
|
|
return
|
|
keyword = self._param.keyword or ans
|
|
logging.info(
|
|
"TuShare news filter keyword source=%s",
|
|
"param.keyword" if self._param.keyword else "upstream_input",
|
|
)
|
|
tus_res.append(
|
|
{
|
|
"content": (
|
|
df[df["content"].str.contains(keyword, case=False, na=False, regex=False)]
|
|
).to_markdown()
|
|
}
|
|
)
|
|
except Exception as e:
|
|
if self.check_if_canceled("TuShare processing"):
|
|
return
|
|
return TuShare.be_output("**ERROR**: " + str(e))
|
|
|
|
if not tus_res:
|
|
return TuShare.be_output("")
|
|
|
|
return pd.DataFrame(tus_res)
|