shubhobm commited on
Commit
fd4c06d
·
1 Parent(s): 0fbbeae

feat(app): added scraper thanks to @leondz

Browse files
Files changed (2) hide show
  1. app.py +37 -24
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,12 +1,14 @@
1
  import gradio as gr
 
2
  from datetime import date
 
3
 
4
  from avidtools.datamodels.report import Report
5
  from avidtools.datamodels.components import *
6
  from avidtools.datamodels.enums import *
7
 
8
  # def generate_report():
9
- def generate_report(classof,type,risk_domain,sep,lifecycle):
10
  # def generate_report(scraped_input, selections):
11
  report = Report()
12
 
@@ -24,18 +26,18 @@ def generate_report(classof,type,risk_domain,sep,lifecycle):
24
  type = type,
25
  description = LangValue(
26
  lang = 'eng',
27
- value = scraped_input['title']
28
  )
29
  )
30
  report.references = [
31
  Reference(
32
- label = scraped_input['description'],
33
- url = scraped_input['url']
34
  )
35
  ]
36
  report.description = LangValue(
37
  lang = 'eng',
38
- value = scraped_input['description']
39
  )
40
  report.impact = Impact(
41
  avid = AvidTaxonomy(
@@ -49,24 +51,28 @@ def generate_report(classof,type,risk_domain,sep,lifecycle):
49
 
50
  return report.dict()
51
 
52
- scraped_input = {
53
- "title": "### title",
54
- "description": "description",
55
- "url": "https://link.to.arxiv.paper"
56
- }
57
-
58
- # selections = {
59
- # "classof": ClassEnum.llm,
60
- # "type": TypeEnum.detection,
61
- # "avid": {
62
- # "risk_domain": ["Security"],
63
- # "sep": [SepEnum.E0101],
64
- # "lifecycle": [LifecycleEnum.L05]
65
- # }
66
- # }
 
 
 
 
 
67
 
68
  demo = gr.Blocks(theme=gr.themes.Soft())
69
- # demo = gr.Blocks(theme='gradio/darkdefault')
70
 
71
  with demo:
72
 
@@ -88,9 +94,10 @@ with demo:
88
  ## Step 1: \n\
89
  Select a model and a method of detection.
90
  """)
 
91
  with gr.Box():
92
- title = gr.Markdown(scraped_input['title'])
93
- description = gr.Markdown(scraped_input['description'])
94
 
95
  with gr.Column(scale=3):
96
  gr.Markdown("""## Step 2: \
@@ -112,9 +119,15 @@ with demo:
112
  report_button = gr.Button("Generate Report")
113
  report_json = gr.Json(label="AVID Report")
114
 
 
 
 
 
 
 
115
  report_button.click(
116
  fn=generate_report,
117
- inputs=[classof,type,risk_domain,sep,lifecycle],
118
  outputs=[report_json]
119
  )
120
 
 
1
  import gradio as gr
2
+ import urllib.request
3
  from datetime import date
4
+ from bs4 import BeautifulSoup
5
 
6
  from avidtools.datamodels.report import Report
7
  from avidtools.datamodels.components import *
8
  from avidtools.datamodels.enums import *
9
 
10
  # def generate_report():
11
+ def generate_report(uri,title,abstract,classof,type,risk_domain,sep,lifecycle):
12
  # def generate_report(scraped_input, selections):
13
  report = Report()
14
 
 
26
  type = type,
27
  description = LangValue(
28
  lang = 'eng',
29
+ value = title
30
  )
31
  )
32
  report.references = [
33
  Reference(
34
+ label = title,
35
+ url = uri
36
  )
37
  ]
38
  report.description = LangValue(
39
  lang = 'eng',
40
+ value = abstract
41
  )
42
  report.impact = Impact(
43
  avid = AvidTaxonomy(
 
51
 
52
  return report.dict()
53
 
54
+ # parses out title and abstract: works only on arxiv or acl
55
+ def parse_uri(uri):
56
+ if not uri.startswith("https://arxiv.org") and not uri.startswith("https://aclanthology.org"):
57
+ raise gr.Error("Please supply an arxiv.org or aclanthology.org link!")
58
+
59
+ try:
60
+ response = urllib.request.urlopen(uri)
61
+ html = response.read()
62
+ doc = BeautifulSoup(html, features="lxml")
63
+ except:
64
+ raise gr.Error("Cannot fetch information. Please check the link!")
65
+
66
+ if uri.startswith("https://arxiv.org"):
67
+ title = doc.find("meta", property="og:title")["content"]
68
+ abstract = doc.find("meta", property="og:description")["content"].replace("\n", "")
69
+ else:
70
+ title = doc.find("meta", property="og:title")["content"]
71
+ abstract = doc.find("div", {"class": "card-body"}).text[8:].replace("\n", "")
72
+
73
+ return f"### {title}\n", abstract
74
 
75
  demo = gr.Blocks(theme=gr.themes.Soft())
 
76
 
77
  with demo:
78
 
 
94
  ## Step 1: \n\
95
  Select a model and a method of detection.
96
  """)
97
+ input_uri = gr.Text("Enter URL")
98
  with gr.Box():
99
+ title = gr.Markdown("### ")
100
+ description = gr.Markdown("")
101
 
102
  with gr.Column(scale=3):
103
  gr.Markdown("""## Step 2: \
 
119
  report_button = gr.Button("Generate Report")
120
  report_json = gr.Json(label="AVID Report")
121
 
122
+ input_uri.change(
123
+ fn=parse_uri,
124
+ inputs=[input_uri],
125
+ outputs=[title,description]
126
+ )
127
+
128
  report_button.click(
129
  fn=generate_report,
130
+ inputs=[input_uri,title,description,classof,type,risk_domain,sep,lifecycle],
131
  outputs=[report_json]
132
  )
133
 
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  gradio==3.35.2
 
2
 
3
  # for avidtools
4
  pydantic
 
1
  gradio==3.35.2
2
+ bs4
3
 
4
  # for avidtools
5
  pydantic