Spaces:
Running
Running
import streamlit as st | |
from transformers import pipeline, AutoConfig, AutoModelForSequenceClassification, AutoTokenizer | |
def load_classifier(model_path: str): | |
id2label = {0: "Safe", 1: "Unsafe"} | |
label2id = {"Safe": 0, "Unsafe": 1} | |
config = AutoConfig.from_pretrained(model_path, id2label=id2label, label2id=label2id) | |
model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config) | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
return pipeline("text-classification", model=model, tokenizer=tokenizer) | |
def defang_url(url: str) -> str: | |
""" | |
Defangs the URL to prevent it from being clickable. | |
This function replaces the protocol and dots. | |
For example: | |
https://example.com --> hxxps://example[.]com | |
""" | |
# Replace the protocol | |
if url.startswith("https://"): | |
url = url.replace("https://", "hxxps://") | |
elif url.startswith("http://"): | |
url = url.replace("http://", "hxxp://") | |
# Replace periods in the rest of the URL | |
return url.replace(".", "[.]") | |
st.title("URL Typosquatting Detection with URLGuardian") | |
st.markdown( | |
"This app uses the **URLGuardian** classifier developed by Anvilogic to detect potential suspicious URL. " | |
"Enter a URL to assess!" | |
) | |
model_path = "./URLGuardian" | |
classifier = load_classifier(model_path) | |
url = st.text_input("Enter the URL:", value="https://example.com") | |
if st.button("Check Safety of the url"): | |
if url: | |
result = classifier(url)[0] | |
label = result["label"] | |
score = result["score"] | |
defanged_url = defang_url(url) | |
if label=='Safe': | |
st.success( | |
f"The URL '{defanged_url}' is considered safe with a confidence of {score * 100:.2f}%." | |
) | |
else: | |
st.error( | |
f"The URL '{defanged_url}' is considered suspicious with a confidence of {score * 100:.2f}%." | |
) | |
# Optionally, you can display the full result for debugging purposes: | |
st.write("Full classification output:", result) | |
else: | |
st.error("Please enter a URL.") |