H commited on
Commit
cab96b4
·
1 Parent(s): c9f94cd

Fix component exception (#1603)

Browse files

### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

graph/component/arxiv.py CHANGED
@@ -47,22 +47,25 @@ class ArXiv(ComponentBase, ABC):
47
  if not ans:
48
  return ArXiv.be_output("")
49
 
50
- sort_choices = {"relevance": arxiv.SortCriterion.Relevance,
51
- "lastUpdatedDate": arxiv.SortCriterion.LastUpdatedDate,
52
- 'submittedDate': arxiv.SortCriterion.SubmittedDate}
53
- arxiv_client = arxiv.Client()
54
- search = arxiv.Search(
55
- query=ans,
56
- max_results=self._param.top_n,
57
- sort_by=sort_choices[self._param.sort_by]
58
- )
59
- arxiv_res = [
60
- {"content": 'Title: ' + i.title + '\nPdf_Url: <a href="' + i.pdf_url + '"></a> \nSummary: ' + i.summary} for
61
- i in list(arxiv_client.results(search))]
 
 
 
62
 
63
  if not arxiv_res:
64
  return ArXiv.be_output("")
65
 
66
  df = pd.DataFrame(arxiv_res)
67
  if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
68
- return df
 
47
  if not ans:
48
  return ArXiv.be_output("")
49
 
50
+ try:
51
+ sort_choices = {"relevance": arxiv.SortCriterion.Relevance,
52
+ "lastUpdatedDate": arxiv.SortCriterion.LastUpdatedDate,
53
+ 'submittedDate': arxiv.SortCriterion.SubmittedDate}
54
+ arxiv_client = arxiv.Client()
55
+ search = arxiv.Search(
56
+ query=ans,
57
+ max_results=self._param.top_n,
58
+ sort_by=sort_choices[self._param.sort_by]
59
+ )
60
+ arxiv_res = [
61
+ {"content": 'Title: ' + i.title + '\nPdf_Url: <a href="' + i.pdf_url + '"></a> \nSummary: ' + i.summary} for
62
+ i in list(arxiv_client.results(search))]
63
+ except Exception as e:
64
+ return ArXiv.be_output("**ERROR**: " + str(e))
65
 
66
  if not arxiv_res:
67
  return ArXiv.be_output("")
68
 
69
  df = pd.DataFrame(arxiv_res)
70
  if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
71
+ return df
graph/component/baidu.py CHANGED
@@ -45,16 +45,20 @@ class Baidu(ComponentBase, ABC):
45
  if not ans:
46
  return Baidu.be_output("")
47
 
48
- url = 'https://www.baidu.com/s?wd=' + ans + '&rn=' + str(self._param.top_n)
49
- headers = {
50
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'}
51
- response = requests.get(url=url, headers=headers)
 
52
 
53
- url_res = re.findall(r"'url': \\\"(.*?)\\\"}", response.text)
54
- title_res = re.findall(r"'title': \\\"(.*?)\\\",\\n", response.text)
55
- body_res = re.findall(r"\"contentText\":\"(.*?)\"", response.text)
56
- baidu_res = [{"content": re.sub('<em>|</em>', '', '<a href="' + url + '">' + title + '</a> ' + body)} for url, title, body in zip(url_res, title_res, body_res)]
57
- del body_res, url_res, title_res
 
 
 
58
 
59
  if not baidu_res:
60
  return Baidu.be_output("")
 
45
  if not ans:
46
  return Baidu.be_output("")
47
 
48
+ try:
49
+ url = 'https://www.baidu.com/s?wd=' + ans + '&rn=' + str(self._param.top_n)
50
+ headers = {
51
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'}
52
+ response = requests.get(url=url, headers=headers)
53
 
54
+ url_res = re.findall(r"'url': \\\"(.*?)\\\"}", response.text)
55
+ title_res = re.findall(r"'title': \\\"(.*?)\\\",\\n", response.text)
56
+ body_res = re.findall(r"\"contentText\":\"(.*?)\"", response.text)
57
+ baidu_res = [{"content": re.sub('<em>|</em>', '', '<a href="' + url + '">' + title + '</a> ' + body)} for
58
+ url, title, body in zip(url_res, title_res, body_res)]
59
+ del body_res, url_res, title_res
60
+ except Exception as e:
61
+ return Baidu.be_output("**ERROR**: " + str(e))
62
 
63
  if not baidu_res:
64
  return Baidu.be_output("")
graph/component/duckduckgo.py CHANGED
@@ -46,16 +46,19 @@ class DuckDuckGo(ComponentBase, ABC):
46
  if not ans:
47
  return DuckDuckGo.be_output("")
48
 
49
- if self._param.channel == "text":
50
- with DDGS() as ddgs:
51
- # {'title': '', 'href': '', 'body': ''}
52
- duck_res = [{"content": '<a href="' + i["href"] + '">' + i["title"] + '</a> ' + i["body"]} for i in
53
- ddgs.text(ans, max_results=self._param.top_n)]
54
- elif self._param.channel == "news":
55
- with DDGS() as ddgs:
56
- # {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''}
57
- duck_res = [{"content": '<a href="' + i["url"] + '">' + i["title"] + '</a> ' + i["body"]} for i in
58
- ddgs.news(ans, max_results=self._param.top_n)]
 
 
 
59
 
60
  if not duck_res:
61
  return DuckDuckGo.be_output("")
 
46
  if not ans:
47
  return DuckDuckGo.be_output("")
48
 
49
+ try:
50
+ if self._param.channel == "text":
51
+ with DDGS() as ddgs:
52
+ # {'title': '', 'href': '', 'body': ''}
53
+ duck_res = [{"content": '<a href="' + i["href"] + '">' + i["title"] + '</a> ' + i["body"]} for i
54
+ in ddgs.text(ans, max_results=self._param.top_n)]
55
+ elif self._param.channel == "news":
56
+ with DDGS() as ddgs:
57
+ # {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''}
58
+ duck_res = [{"content": '<a href="' + i["url"] + '">' + i["title"] + '</a> ' + i["body"]} for i
59
+ in ddgs.news(ans, max_results=self._param.top_n)]
60
+ except Exception as e:
61
+ return DuckDuckGo.be_output("**ERROR**: " + str(e))
62
 
63
  if not duck_res:
64
  return DuckDuckGo.be_output("")
graph/component/pubmed.py CHANGED
@@ -46,14 +46,18 @@ class PubMed(ComponentBase, ABC):
46
  if not ans:
47
  return PubMed.be_output("")
48
 
49
- Entrez.email = self._param.email
50
- pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
51
- pubmedcnt = ET.fromstring(
52
- Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8"))
53
- pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find(
54
- "ArticleTitle").text + '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find(
55
- "MedlineCitation").find("PMID").text + '">' + '</a>\n' + 'Abstract:' + child.find("MedlineCitation").find(
56
- "Article").find("Abstract").find("AbstractText").text} for child in pubmedcnt.findall("PubmedArticle")]
 
 
 
 
57
 
58
  if not pubmed_res:
59
  return PubMed.be_output("")
 
46
  if not ans:
47
  return PubMed.be_output("")
48
 
49
+ try:
50
+ Entrez.email = self._param.email
51
+ pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
52
+ pubmedcnt = ET.fromstring(
53
+ Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8"))
54
+ pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find(
55
+ "ArticleTitle").text + '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find(
56
+ "MedlineCitation").find("PMID").text + '">' + '</a>\n' + 'Abstract:' + child.find(
57
+ "MedlineCitation").find("Article").find("Abstract").find("AbstractText").text} for child in
58
+ pubmedcnt.findall("PubmedArticle")]
59
+ except Exception as e:
60
+ return PubMed.be_output("**ERROR**: " + str(e))
61
 
62
  if not pubmed_res:
63
  return PubMed.be_output("")
graph/component/wikipedia.py CHANGED
@@ -51,16 +51,15 @@ class Wikipedia(ComponentBase, ABC):
51
  if not ans:
52
  return Wikipedia.be_output("")
53
 
54
- wiki_res = []
55
- wikipedia.set_lang(self._param.language)
56
- wiki_engine = wikipedia
57
- for wiki_key in wiki_engine.search(ans, results=self._param.top_n):
58
- try:
59
  page = wiki_engine.page(title=wiki_key, auto_suggest=False)
60
  wiki_res.append({"content": '<a href="' + page.url + '">' + page.title + '</a> ' + page.summary})
61
- except Exception as e:
62
- print(e)
63
- pass
64
 
65
  if not wiki_res:
66
  return Wikipedia.be_output("")
 
51
  if not ans:
52
  return Wikipedia.be_output("")
53
 
54
+ try:
55
+ wiki_res = []
56
+ wikipedia.set_lang(self._param.language)
57
+ wiki_engine = wikipedia
58
+ for wiki_key in wiki_engine.search(ans, results=self._param.top_n):
59
  page = wiki_engine.page(title=wiki_key, auto_suggest=False)
60
  wiki_res.append({"content": '<a href="' + page.url + '">' + page.title + '</a> ' + page.summary})
61
+ except Exception as e:
62
+ return Wikipedia.be_output("**ERROR**: " + str(e))
 
63
 
64
  if not wiki_res:
65
  return Wikipedia.be_output("")