Spaces:

retopara
/

ragflow

Build error

App Files Files Community

H commited on Jul 19, 2024

Commit

cab96b4

1 Parent(s): c9f94cd

Fix component exception (#1603)

Browse files

### What problem does this PR solve?

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Files changed (5) hide show

graph/component/arxiv.py +16 -13
graph/component/baidu.py +13 -9
graph/component/duckduckgo.py +13 -10
graph/component/pubmed.py +12 -8
graph/component/wikipedia.py +7 -8

graph/component/arxiv.py CHANGED Viewed

@@ -47,22 +47,25 @@ class ArXiv(ComponentBase, ABC):
         if not ans:
             return ArXiv.be_output("")
-        sort_choices = {"relevance": arxiv.SortCriterion.Relevance,
-                        "lastUpdatedDate": arxiv.SortCriterion.LastUpdatedDate,
-                        'submittedDate': arxiv.SortCriterion.SubmittedDate}
-        arxiv_client = arxiv.Client()
-        search = arxiv.Search(
-            query=ans,
-            max_results=self._param.top_n,
-            sort_by=sort_choices[self._param.sort_by]
-        )
-        arxiv_res = [
-            {"content": 'Title: ' + i.title + '\nPdf_Url: <a href="' + i.pdf_url + '"></a> \nSummary: ' + i.summary} for
-            i in list(arxiv_client.results(search))]
         if not arxiv_res:
             return ArXiv.be_output("")
         df = pd.DataFrame(arxiv_res)
         if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
-        return df

         if not ans:
             return ArXiv.be_output("")
+        try:
+            sort_choices = {"relevance": arxiv.SortCriterion.Relevance,
+                            "lastUpdatedDate": arxiv.SortCriterion.LastUpdatedDate,
+                            'submittedDate': arxiv.SortCriterion.SubmittedDate}
+            arxiv_client = arxiv.Client()
+            search = arxiv.Search(
+                query=ans,
+                max_results=self._param.top_n,
+                sort_by=sort_choices[self._param.sort_by]
+            )
+            arxiv_res = [
+                {"content": 'Title: ' + i.title + '\nPdf_Url: <a href="' + i.pdf_url + '"></a> \nSummary: ' + i.summary} for
+                i in list(arxiv_client.results(search))]
+        except Exception as e:
+            return ArXiv.be_output("**ERROR**: " + str(e))
         if not arxiv_res:
             return ArXiv.be_output("")
         df = pd.DataFrame(arxiv_res)
         if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
+        return df

graph/component/baidu.py CHANGED Viewed

@@ -45,16 +45,20 @@ class Baidu(ComponentBase, ABC):
         if not ans:
             return Baidu.be_output("")
-        url = 'https://www.baidu.com/s?wd=' + ans + '&rn=' + str(self._param.top_n)
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'}
-        response = requests.get(url=url, headers=headers)
-        url_res = re.findall(r"'url': \\\"(.*?)\\\"}", response.text)
-        title_res = re.findall(r"'title': \\\"(.*?)\\\",\\n", response.text)
-        body_res = re.findall(r"\"contentText\":\"(.*?)\"", response.text)
-        baidu_res = [{"content": re.sub('<em>|</em>', '', '<a href="' + url + '">' + title + '</a>    ' + body)} for url, title, body in zip(url_res, title_res, body_res)]
-        del body_res, url_res, title_res
         if not baidu_res:
             return Baidu.be_output("")

         if not ans:
             return Baidu.be_output("")
+        try:
+            url = 'https://www.baidu.com/s?wd=' + ans + '&rn=' + str(self._param.top_n)
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'}
+            response = requests.get(url=url, headers=headers)
+            url_res = re.findall(r"'url': \\\"(.*?)\\\"}", response.text)
+            title_res = re.findall(r"'title': \\\"(.*?)\\\",\\n", response.text)
+            body_res = re.findall(r"\"contentText\":\"(.*?)\"", response.text)
+            baidu_res = [{"content": re.sub('<em>|</em>', '', '<a href="' + url + '">' + title + '</a>    ' + body)} for
+                         url, title, body in zip(url_res, title_res, body_res)]
+            del body_res, url_res, title_res
+        except Exception as e:
+            return Baidu.be_output("**ERROR**: " + str(e))
         if not baidu_res:
             return Baidu.be_output("")

graph/component/duckduckgo.py CHANGED Viewed

@@ -46,16 +46,19 @@ class DuckDuckGo(ComponentBase, ABC):
         if not ans:
             return DuckDuckGo.be_output("")
-        if self._param.channel == "text":
-            with DDGS() as ddgs:
-                # {'title': '', 'href': '', 'body': ''}
-                duck_res = [{"content": '<a href="' + i["href"] + '">' + i["title"] + '</a>    ' + i["body"]} for i in
-                            ddgs.text(ans, max_results=self._param.top_n)]
-        elif self._param.channel == "news":
-            with DDGS() as ddgs:
-                # {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''}
-                duck_res = [{"content": '<a href="' + i["url"] + '">' + i["title"] + '</a>    ' + i["body"]} for i in
-                            ddgs.news(ans, max_results=self._param.top_n)]
         if not duck_res:
             return DuckDuckGo.be_output("")

         if not ans:
             return DuckDuckGo.be_output("")
+        try:
+            if self._param.channel == "text":
+                with DDGS() as ddgs:
+                    # {'title': '', 'href': '', 'body': ''}
+                    duck_res = [{"content": '<a href="' + i["href"] + '">' + i["title"] + '</a>    ' + i["body"]} for i
+                                in ddgs.text(ans, max_results=self._param.top_n)]
+            elif self._param.channel == "news":
+                with DDGS() as ddgs:
+                    # {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''}
+                    duck_res = [{"content": '<a href="' + i["url"] + '">' + i["title"] + '</a>    ' + i["body"]} for i
+                                in ddgs.news(ans, max_results=self._param.top_n)]
+        except Exception as e:
+            return DuckDuckGo.be_output("**ERROR**: " + str(e))
         if not duck_res:
             return DuckDuckGo.be_output("")

graph/component/pubmed.py CHANGED Viewed

@@ -46,14 +46,18 @@ class PubMed(ComponentBase, ABC):
         if not ans:
             return PubMed.be_output("")
-        Entrez.email = self._param.email
-        pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
-        pubmedcnt = ET.fromstring(
-            Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8"))
-        pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find(
-            "ArticleTitle").text + '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find(
-            "MedlineCitation").find("PMID").text + '">' + '</a>\n' + 'Abstract:' + child.find("MedlineCitation").find(
-            "Article").find("Abstract").find("AbstractText").text} for child in pubmedcnt.findall("PubmedArticle")]
         if not pubmed_res:
             return PubMed.be_output("")

         if not ans:
             return PubMed.be_output("")
+        try:
+            Entrez.email = self._param.email
+            pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
+            pubmedcnt = ET.fromstring(
+                Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8"))
+            pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find(
+                "ArticleTitle").text + '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find(
+                "MedlineCitation").find("PMID").text + '">' + '</a>\n' + 'Abstract:' + child.find(
+                "MedlineCitation").find("Article").find("Abstract").find("AbstractText").text} for child in
+                          pubmedcnt.findall("PubmedArticle")]
+        except Exception as e:
+            return PubMed.be_output("**ERROR**: " + str(e))
         if not pubmed_res:
             return PubMed.be_output("")

graph/component/wikipedia.py CHANGED Viewed

@@ -51,16 +51,15 @@ class Wikipedia(ComponentBase, ABC):
         if not ans:
             return Wikipedia.be_output("")
-        wiki_res = []
-        wikipedia.set_lang(self._param.language)
-        wiki_engine = wikipedia
-        for wiki_key in wiki_engine.search(ans, results=self._param.top_n):
-            try:
                 page = wiki_engine.page(title=wiki_key, auto_suggest=False)
                 wiki_res.append({"content": '<a href="' + page.url + '">' + page.title + '</a> ' + page.summary})
-            except Exception as e:
-                print(e)
-                pass
         if not wiki_res:
             return Wikipedia.be_output("")

         if not ans:
             return Wikipedia.be_output("")
+        try:
+            wiki_res = []
+            wikipedia.set_lang(self._param.language)
+            wiki_engine = wikipedia
+            for wiki_key in wiki_engine.search(ans, results=self._param.top_n):
                 page = wiki_engine.page(title=wiki_key, auto_suggest=False)
                 wiki_res.append({"content": '<a href="' + page.url + '">' + page.title + '</a> ' + page.summary})
+        except Exception as e:
+            return Wikipedia.be_output("**ERROR**: " + str(e))
         if not wiki_res:
             return Wikipedia.be_output("")