report-vuln-research

Running

File size: 4,814 Bytes

d8c1c18
 
 
 
a508253
3b232e3
fd4c06d
3b232e3
fd4c06d
3b232e3
 
 
 
 
d523c31
fd4c06d
d523c31
3b232e3
 
d523c31
 
 
 
 
 
 
 
3b232e3
d523c31
 
 
3b232e3
 
fd4c06d
3b232e3
 
d523c31
3b232e3
fd4c06d
 
3b232e3
 
 
 
fd4c06d
3b232e3
 
 
d523c31
 
 
3b232e3
 
 
 
 
 
 
fd4c06d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b232e3
2d23a0c
 
 
3b232e3
 
 
 
d523c31
3b232e3
2d23a0c
 
 
 
3b232e3
 
2d23a0c
3b232e3
2d23a0c
 
3b232e3
2d23a0c
 
fd4c06d
 
d523c31
2d23a0c
3b232e3
2d23a0c
d523c31
 
 
2d23a0c
d523c31
 
 
2d23a0c
3b232e3
2d23a0c
3b232e3
d523c31
3b232e3
 
 
 
2d23a0c
 
 
 
 
fd4c06d
 
 
 
 
3b232e3
 
fd4c06d
3b232e3

# # install correct gradio version
# import os
# os.system("pip uninstall -y gradio")
# os.system("pip install gradio==3.35.2")

import gradio as gr
import urllib.request
from datetime import date
from bs4 import BeautifulSoup

from avidtools.datamodels.report import Report
from avidtools.datamodels.components import *
from avidtools.datamodels.enums import *

# def generate_report():
def generate_report(uri,title,abstract,classof,type,risk_domain,sep,lifecycle):
# def generate_report(scraped_input, selections):
    report = Report()

    # report.affects = Affects(
    #     developer = [],
    #     deployer = ['Hugging Face'],
    #     artifacts = [Artifact(
    #         type = ArtifactTypeEnum.model,
    #         name = model_id
    #     )]
    # )    
    report.problemtype = Problemtype(
        # classof = clas,
        classof = classof,
        type = type,
        description = LangValue(
            lang = 'eng',
            value = title
        )
    )
    report.references = [
        Reference(
            label = title,
            url = uri
        )
    ]
    report.description = LangValue(
        lang = 'eng',
        value = abstract
    )
    report.impact = Impact(
        avid = AvidTaxonomy(
            risk_domain = risk_domain,
            sep_view = sep,
            lifecycle_view = lifecycle,
            taxonomy_version = '0.2'
        )
    )
    report.reported_date = date.today()
    
    return report.dict()

# parses out title and abstract: works only on arxiv or acl
def parse_uri(uri):
    if not uri.startswith("https://arxiv.org") and not uri.startswith("https://aclanthology.org"):
        raise gr.Error("Please supply an arxiv.org or aclanthology.org link!")

    try:
        response = urllib.request.urlopen(uri)
        html = response.read()
        doc = BeautifulSoup(html, features="lxml")
    except:
        raise gr.Error("Cannot fetch information. Please check the link!")

    if uri.startswith("https://arxiv.org"):
        title = doc.find("meta", property="og:title")["content"]
        abstract = doc.find("meta", property="og:description")["content"].replace("\n", "")
    else:
        title = doc.find("meta", property="og:title")["content"]
        abstract = doc.find("div", {"class": "card-body"}).text[8:].replace("\n", "")

    return f"### {title}\n", abstract

def empty_all_fields():
    return None, None, None, None, None, {}

demo = gr.Blocks(theme=gr.themes.Soft())

with demo:

    gr.Markdown("# Report AI Vulnerability Research")
    gr.Markdown("""
    AI Researchers are focusing increasing on evaluating AI models and finding novel failure modes in them. \
    With the deluge of AI research coming up, it's really difficult to track and stay updated on recent papers on AI failures.\
    [AVID](https://avidml.org) makes it easy to do that. This space demonstrates how you can report an interesting research paper \
    (maybe yours!) on LLM vulnerabilities to AVID in a structured manner.
    """)
    with gr.Row():
        with gr.Column(scale=3):
            gr.Markdown("""
            ## Step 1: Paste a link.\n\
            (arXiv or ACL Anthology)
            """)
            input_uri = gr.Text(label="Link",value="Enter URL")
        with gr.Column(scale=8):
                title = gr.Markdown("### ")
                description = gr.Markdown("")

    with gr.Row():
        with gr.Column(scale=3):
            gr.Markdown("""## Step 2: Categorize the research.""")

            classof = gr.Radio(label="Class", choices=[ce.value for ce in ClassEnum])
            type = gr.Radio(label="Type", choices=[te.value for te in TypeEnum])
            lifecycle = gr.CheckboxGroup(label="Lifecycle Categories", choices=[le.value for le in LifecycleEnum])
            risk_domain = gr.CheckboxGroup(label="Risk Domain", choices=['Security','Ethics','Performance'])
            sep = gr.CheckboxGroup(label="Effect Categories", choices=[se.value for se in SepEnum])

        with gr.Column(scale=3):
            gr.Markdown("""
            ## Step 3: Generate a report to submit to AVID.

            The title and abstract get auto-populated from Step 1. The taxonomy categories populate from your selections in Step 2.
            """)
            report_button = gr.Button("Generate Report")
            report_json = gr.Json(label="AVID Report")

    input_uri.change(
        fn=empty_all_fields,
        inputs=[],
        outputs=[classof,type,risk_domain,sep,lifecycle,report_json]
    )
    input_uri.change(
        fn=parse_uri,
        inputs=[input_uri],
        outputs=[title,description]
    )
    report_button.click(
        fn=generate_report,
        inputs=[input_uri,title,description,classof,type,risk_domain,sep,lifecycle],
        outputs=[report_json]
    )

demo.launch()