shubhobm's picture
feat(app): added scraper thanks to @leondz
fd4c06d
raw
history blame
4.75 kB
import gradio as gr
import urllib.request
from datetime import date
from bs4 import BeautifulSoup
from avidtools.datamodels.report import Report
from avidtools.datamodels.components import *
from avidtools.datamodels.enums import *
# def generate_report():
def generate_report(uri,title,abstract,classof,type,risk_domain,sep,lifecycle):
# def generate_report(scraped_input, selections):
report = Report()
# report.affects = Affects(
# developer = [],
# deployer = ['Hugging Face'],
# artifacts = [Artifact(
# type = ArtifactTypeEnum.model,
# name = model_id
# )]
# )
report.problemtype = Problemtype(
# classof = clas,
classof = classof,
type = type,
description = LangValue(
lang = 'eng',
value = title
)
)
report.references = [
Reference(
label = title,
url = uri
)
]
report.description = LangValue(
lang = 'eng',
value = abstract
)
report.impact = Impact(
avid = AvidTaxonomy(
risk_domain = risk_domain,
sep_view = sep,
lifecycle_view = lifecycle,
taxonomy_version = '0.2'
)
)
report.reported_date = date.today()
return report.dict()
# parses out title and abstract: works only on arxiv or acl
def parse_uri(uri):
if not uri.startswith("https://arxiv.org") and not uri.startswith("https://aclanthology.org"):
raise gr.Error("Please supply an arxiv.org or aclanthology.org link!")
try:
response = urllib.request.urlopen(uri)
html = response.read()
doc = BeautifulSoup(html, features="lxml")
except:
raise gr.Error("Cannot fetch information. Please check the link!")
if uri.startswith("https://arxiv.org"):
title = doc.find("meta", property="og:title")["content"]
abstract = doc.find("meta", property="og:description")["content"].replace("\n", "")
else:
title = doc.find("meta", property="og:title")["content"]
abstract = doc.find("div", {"class": "card-body"}).text[8:].replace("\n", "")
return f"### {title}\n", abstract
demo = gr.Blocks(theme=gr.themes.Soft())
with demo:
gr.Markdown("# Report AI Vulnerability Research")
gr.Markdown("""
As language models become more prevalent in day-to-day technology, it's important to develop methods to \
investigate their biases and limitations. To this end, researchers are developing metrics like \
BOLD, HONEST, and WinoBias that calculate scores which represent their tendency to generate "unfair" text across \
different collections of prompts. With the widgets below, you can choose a model and a metric to run your own \
evaluations.
Generating these scores is only half the battle, though! What do you do with these numbers once you've evaluated \
a model? [AVID](https://avidml.org)'s data model makes it easy to collect and communicate your findings with \
structured reports.
""")
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("""
## Step 1: \n\
Select a model and a method of detection.
""")
input_uri = gr.Text("Enter URL")
with gr.Box():
title = gr.Markdown("### ")
description = gr.Markdown("")
with gr.Column(scale=3):
gr.Markdown("""## Step 2: \
Categorize your report.""")
classof = gr.Radio(label="Class", choices=[ce.value for ce in ClassEnum])
type = gr.Radio(label="Type", choices=[te.value for te in TypeEnum])
risk_domain = gr.CheckboxGroup(label="Risk Domain", choices=['Security','Ethics','Performance'])
sep = gr.CheckboxGroup(label="Effect Categories", choices=[se.value for se in SepEnum])
lifecycle = gr.CheckboxGroup(label="Lifecycle Categories", choices=[le.value for le in LifecycleEnum])
with gr.Column(scale=5):
gr.Markdown("""
## Step 3: \n\
Generate a report that you can submit to AVID.
The title and abstract get auto-populated from Step 1. The taxonomy categories populate from your selections in Step 2.
""")
report_button = gr.Button("Generate Report")
report_json = gr.Json(label="AVID Report")
input_uri.change(
fn=parse_uri,
inputs=[input_uri],
outputs=[title,description]
)
report_button.click(
fn=generate_report,
inputs=[input_uri,title,description,classof,type,risk_domain,sep,lifecycle],
outputs=[report_json]
)
demo.launch()