URLGuardian / app.py
chgrdj's picture
Update app.py
208bdc9 verified
raw
history blame
2.14 kB
import streamlit as st
from transformers import pipeline, AutoConfig, AutoModelForSequenceClassification, AutoTokenizer
@st.cache_resource
def load_classifier(model_path: str):
id2label = {0: "Safe", 1: "Unsafe"}
label2id = {"Safe": 0, "Unsafe": 1}
config = AutoConfig.from_pretrained(model_path, id2label=id2label, label2id=label2id)
model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)
tokenizer = AutoTokenizer.from_pretrained(model_path)
return pipeline("text-classification", model=model, tokenizer=tokenizer)
def defang_url(url: str) -> str:
"""
Defangs the URL to prevent it from being clickable.
This function replaces the protocol and dots.
For example:
https://example.com --> hxxps://example[.]com
"""
# Replace the protocol
if url.startswith("https://"):
url = url.replace("https://", "hxxps://")
elif url.startswith("http://"):
url = url.replace("http://", "hxxp://")
# Replace periods in the rest of the URL
return url.replace(".", "[.]")
st.title("URL Typosquatting Detection with URLGuardian")
st.markdown(
"This app uses the **URLGuardian** classifier developed by Anvilogic to detect potential suspicious URL. "
"Enter a URL to assess!"
)
model_path = "./URLGuardian"
classifier = load_classifier(model_path)
url = st.text_input("Enter the URL:", value="https://example.com")
if st.button("Check Safety of the url"):
if url:
result = classifier(url)[0]
label = result["label"]
score = result["score"]
defanged_url = defang_url(url)
if label=='Safe':
st.success(
f"The URL '{defanged_url}' is considered safe with a confidence of {score * 100:.2f}%."
)
else:
st.error(
f"The URL '{defanged_url}' is considered suspicious with a confidence of {score * 100:.2f}%."
)
# Optionally, you can display the full result for debugging purposes:
st.write("Full classification output:", result)
else:
st.error("Please enter a URL.")