Spaces:
Running
Running
File size: 2,133 Bytes
f577cac f2d2a0e f577cac f2d2a0e f577cac 208bdc9 48ceccc 208bdc9 48ceccc 208bdc9 f577cac c859fdf f577cac 5c95703 f577cac 48ceccc f577cac 5c95703 f577cac 5c95703 208bdc9 f2d2a0e f577cac 208bdc9 f577cac 5c95703 208bdc9 f577cac 5c95703 f577cac 5c95703 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import streamlit as st
from transformers import pipeline, AutoConfig, AutoModelForSequenceClassification, AutoTokenizer
@st.cache_resource
def load_classifier(model_path: str):
id2label = {0: "Safe", 1: "Unsafe"}
label2id = {"Safe": 0, "Unsafe": 1}
config = AutoConfig.from_pretrained(model_path, id2label=id2label, label2id=label2id)
model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)
tokenizer = AutoTokenizer.from_pretrained(model_path)
return pipeline("text-classification", model=model, tokenizer=tokenizer)
def defang_url(url: str) -> str:
"""
Defangs the URL to prevent it from being clickable.
This function replaces the protocol and dots.
For example:
https://example.com --> hxxps://example[.]com
"""
# Replace the protocol
if url.startswith("https://"):
url = url.replace("https://", "hxxps://")
elif url.startswith("http://"):
url = url.replace("http://", "hxxp://")
# Replace periods in the rest of the URL
return url.replace(".", "[.]")
st.title("URL Typosquatting Detection with URLGuardian")
st.markdown(
"This app uses the **URLGuardian** classifier developed by Anvilogic to detect potential suspicious URL. "
"Enter a URL to assess!"
)
model_path = "./URLGuardian"
classifier = load_classifier(model_path)
url = st.text_input("Enter the URL:", value="example.com")
if st.button("Check Safety of the url"):
if url:
result = classifier(url)[0]
label = result["label"]
score = result["score"]
defanged_url = defang_url(url)
if label=='Safe':
st.success(
f"The URL '{defanged_url}' is considered safe with a confidence of {score * 100:.2f}%."
)
else:
st.error(
f"The URL '{defanged_url}' is considered suspicious with a confidence of {score * 100:.2f}%."
)
# Optionally, you can display the full result for debugging purposes:
st.write("Full classification output:", result)
else:
st.error("Please enter a URL.") |