H
commited on
Commit
·
cab96b4
1
Parent(s):
c9f94cd
Fix component exception (#1603)
Browse files### What problem does this PR solve?
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- graph/component/arxiv.py +16 -13
- graph/component/baidu.py +13 -9
- graph/component/duckduckgo.py +13 -10
- graph/component/pubmed.py +12 -8
- graph/component/wikipedia.py +7 -8
graph/component/arxiv.py
CHANGED
@@ -47,22 +47,25 @@ class ArXiv(ComponentBase, ABC):
|
|
47 |
if not ans:
|
48 |
return ArXiv.be_output("")
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
62 |
|
63 |
if not arxiv_res:
|
64 |
return ArXiv.be_output("")
|
65 |
|
66 |
df = pd.DataFrame(arxiv_res)
|
67 |
if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
|
68 |
-
return df
|
|
|
47 |
if not ans:
|
48 |
return ArXiv.be_output("")
|
49 |
|
50 |
+
try:
|
51 |
+
sort_choices = {"relevance": arxiv.SortCriterion.Relevance,
|
52 |
+
"lastUpdatedDate": arxiv.SortCriterion.LastUpdatedDate,
|
53 |
+
'submittedDate': arxiv.SortCriterion.SubmittedDate}
|
54 |
+
arxiv_client = arxiv.Client()
|
55 |
+
search = arxiv.Search(
|
56 |
+
query=ans,
|
57 |
+
max_results=self._param.top_n,
|
58 |
+
sort_by=sort_choices[self._param.sort_by]
|
59 |
+
)
|
60 |
+
arxiv_res = [
|
61 |
+
{"content": 'Title: ' + i.title + '\nPdf_Url: <a href="' + i.pdf_url + '"></a> \nSummary: ' + i.summary} for
|
62 |
+
i in list(arxiv_client.results(search))]
|
63 |
+
except Exception as e:
|
64 |
+
return ArXiv.be_output("**ERROR**: " + str(e))
|
65 |
|
66 |
if not arxiv_res:
|
67 |
return ArXiv.be_output("")
|
68 |
|
69 |
df = pd.DataFrame(arxiv_res)
|
70 |
if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
|
71 |
+
return df
|
graph/component/baidu.py
CHANGED
@@ -45,16 +45,20 @@ class Baidu(ComponentBase, ABC):
|
|
45 |
if not ans:
|
46 |
return Baidu.be_output("")
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
58 |
|
59 |
if not baidu_res:
|
60 |
return Baidu.be_output("")
|
|
|
45 |
if not ans:
|
46 |
return Baidu.be_output("")
|
47 |
|
48 |
+
try:
|
49 |
+
url = 'https://www.baidu.com/s?wd=' + ans + '&rn=' + str(self._param.top_n)
|
50 |
+
headers = {
|
51 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'}
|
52 |
+
response = requests.get(url=url, headers=headers)
|
53 |
|
54 |
+
url_res = re.findall(r"'url': \\\"(.*?)\\\"}", response.text)
|
55 |
+
title_res = re.findall(r"'title': \\\"(.*?)\\\",\\n", response.text)
|
56 |
+
body_res = re.findall(r"\"contentText\":\"(.*?)\"", response.text)
|
57 |
+
baidu_res = [{"content": re.sub('<em>|</em>', '', '<a href="' + url + '">' + title + '</a> ' + body)} for
|
58 |
+
url, title, body in zip(url_res, title_res, body_res)]
|
59 |
+
del body_res, url_res, title_res
|
60 |
+
except Exception as e:
|
61 |
+
return Baidu.be_output("**ERROR**: " + str(e))
|
62 |
|
63 |
if not baidu_res:
|
64 |
return Baidu.be_output("")
|
graph/component/duckduckgo.py
CHANGED
@@ -46,16 +46,19 @@ class DuckDuckGo(ComponentBase, ABC):
|
|
46 |
if not ans:
|
47 |
return DuckDuckGo.be_output("")
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
59 |
|
60 |
if not duck_res:
|
61 |
return DuckDuckGo.be_output("")
|
|
|
46 |
if not ans:
|
47 |
return DuckDuckGo.be_output("")
|
48 |
|
49 |
+
try:
|
50 |
+
if self._param.channel == "text":
|
51 |
+
with DDGS() as ddgs:
|
52 |
+
# {'title': '', 'href': '', 'body': ''}
|
53 |
+
duck_res = [{"content": '<a href="' + i["href"] + '">' + i["title"] + '</a> ' + i["body"]} for i
|
54 |
+
in ddgs.text(ans, max_results=self._param.top_n)]
|
55 |
+
elif self._param.channel == "news":
|
56 |
+
with DDGS() as ddgs:
|
57 |
+
# {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''}
|
58 |
+
duck_res = [{"content": '<a href="' + i["url"] + '">' + i["title"] + '</a> ' + i["body"]} for i
|
59 |
+
in ddgs.news(ans, max_results=self._param.top_n)]
|
60 |
+
except Exception as e:
|
61 |
+
return DuckDuckGo.be_output("**ERROR**: " + str(e))
|
62 |
|
63 |
if not duck_res:
|
64 |
return DuckDuckGo.be_output("")
|
graph/component/pubmed.py
CHANGED
@@ -46,14 +46,18 @@ class PubMed(ComponentBase, ABC):
|
|
46 |
if not ans:
|
47 |
return PubMed.be_output("")
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
57 |
|
58 |
if not pubmed_res:
|
59 |
return PubMed.be_output("")
|
|
|
46 |
if not ans:
|
47 |
return PubMed.be_output("")
|
48 |
|
49 |
+
try:
|
50 |
+
Entrez.email = self._param.email
|
51 |
+
pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
|
52 |
+
pubmedcnt = ET.fromstring(
|
53 |
+
Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8"))
|
54 |
+
pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find(
|
55 |
+
"ArticleTitle").text + '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find(
|
56 |
+
"MedlineCitation").find("PMID").text + '">' + '</a>\n' + 'Abstract:' + child.find(
|
57 |
+
"MedlineCitation").find("Article").find("Abstract").find("AbstractText").text} for child in
|
58 |
+
pubmedcnt.findall("PubmedArticle")]
|
59 |
+
except Exception as e:
|
60 |
+
return PubMed.be_output("**ERROR**: " + str(e))
|
61 |
|
62 |
if not pubmed_res:
|
63 |
return PubMed.be_output("")
|
graph/component/wikipedia.py
CHANGED
@@ -51,16 +51,15 @@ class Wikipedia(ComponentBase, ABC):
|
|
51 |
if not ans:
|
52 |
return Wikipedia.be_output("")
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
page = wiki_engine.page(title=wiki_key, auto_suggest=False)
|
60 |
wiki_res.append({"content": '<a href="' + page.url + '">' + page.title + '</a> ' + page.summary})
|
61 |
-
|
62 |
-
|
63 |
-
pass
|
64 |
|
65 |
if not wiki_res:
|
66 |
return Wikipedia.be_output("")
|
|
|
51 |
if not ans:
|
52 |
return Wikipedia.be_output("")
|
53 |
|
54 |
+
try:
|
55 |
+
wiki_res = []
|
56 |
+
wikipedia.set_lang(self._param.language)
|
57 |
+
wiki_engine = wikipedia
|
58 |
+
for wiki_key in wiki_engine.search(ans, results=self._param.top_n):
|
59 |
page = wiki_engine.page(title=wiki_key, auto_suggest=False)
|
60 |
wiki_res.append({"content": '<a href="' + page.url + '">' + page.title + '</a> ' + page.summary})
|
61 |
+
except Exception as e:
|
62 |
+
return Wikipedia.be_output("**ERROR**: " + str(e))
|
|
|
63 |
|
64 |
if not wiki_res:
|
65 |
return Wikipedia.be_output("")
|