File size: 2,141 Bytes
f577cac
f2d2a0e
f577cac
 
 
f2d2a0e
 
 
 
 
 
f577cac
208bdc9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f577cac
 
c859fdf
 
f577cac
 
5c95703
f577cac
 
 
 
5c95703
 
f577cac
5c95703
 
208bdc9
f2d2a0e
f577cac
208bdc9
f577cac
 
5c95703
208bdc9
f577cac
5c95703
 
f577cac
5c95703
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import streamlit as st
from transformers import pipeline, AutoConfig, AutoModelForSequenceClassification, AutoTokenizer

@st.cache_resource
def load_classifier(model_path: str):
    id2label = {0: "Safe", 1: "Unsafe"}
    label2id = {"Safe": 0, "Unsafe": 1}
    config = AutoConfig.from_pretrained(model_path, id2label=id2label, label2id=label2id)
    model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    return pipeline("text-classification", model=model, tokenizer=tokenizer)

def defang_url(url: str) -> str:
    """
    Defangs the URL to prevent it from being clickable.
    This function replaces the protocol and dots.
    For example:
        https://example.com  -->  hxxps://example[.]com
    """
    # Replace the protocol
    if url.startswith("https://"):
        url = url.replace("https://", "hxxps://")
    elif url.startswith("http://"):
        url = url.replace("http://", "hxxp://")
    
    # Replace periods in the rest of the URL
    return url.replace(".", "[.]")

st.title("URL Typosquatting Detection with URLGuardian")
st.markdown(
    "This app uses the **URLGuardian** classifier developed by Anvilogic to detect potential suspicious URL. "
    "Enter a URL to assess!"
)

model_path = "./URLGuardian" 
classifier = load_classifier(model_path)

url = st.text_input("Enter the URL:", value="https://example.com")

if st.button("Check Safety of the url"):
    if url:
        result = classifier(url)[0]
        label = result["label"]
        score = result["score"]
        defanged_url = defang_url(url)
        if  label=='Safe':
            st.success(
                f"The URL '{defanged_url}' is considered safe with a confidence of {score * 100:.2f}%."
            )
        else:
            st.error(
                f"The URL '{defanged_url}' is considered suspicious with a confidence of {score * 100:.2f}%."
            )
        # Optionally, you can display the full result for debugging purposes:
        st.write("Full classification output:", result)
    else:
        st.error("Please enter a URL.")