File size: 2,133 Bytes
f577cac
f2d2a0e
f577cac
 
 
f2d2a0e
 
 
 
 
 
f577cac
208bdc9
 
 
 
 
 
48ceccc
 
 
 
 
 
208bdc9
48ceccc
208bdc9
 
f577cac
 
c859fdf
 
f577cac
 
5c95703
f577cac
 
48ceccc
f577cac
5c95703
 
f577cac
5c95703
 
208bdc9
f2d2a0e
f577cac
208bdc9
f577cac
 
5c95703
208bdc9
f577cac
5c95703
 
f577cac
5c95703
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import streamlit as st
from transformers import pipeline, AutoConfig, AutoModelForSequenceClassification, AutoTokenizer

@st.cache_resource
def load_classifier(model_path: str):
    id2label = {0: "Safe", 1: "Unsafe"}
    label2id = {"Safe": 0, "Unsafe": 1}
    config = AutoConfig.from_pretrained(model_path, id2label=id2label, label2id=label2id)
    model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    return pipeline("text-classification", model=model, tokenizer=tokenizer)

def defang_url(url: str) -> str:
    """
    Defangs the URL to prevent it from being clickable.
    This function replaces the protocol and dots.
    For example:
        https://example.com  -->  hxxps://example[.]com
    """
    # Replace the protocol
    if url.startswith("https://"):
        url = url.replace("https://", "hxxps://")
    elif url.startswith("http://"):
        url = url.replace("http://", "hxxp://")
    
    # Replace periods in the rest of the URL
    return url.replace(".", "[.]")

st.title("URL Typosquatting Detection with URLGuardian")
st.markdown(
    "This app uses the **URLGuardian** classifier developed by Anvilogic to detect potential suspicious URL. "
    "Enter a URL to assess!"
)

model_path = "./URLGuardian" 
classifier = load_classifier(model_path)

url = st.text_input("Enter the URL:", value="example.com")

if st.button("Check Safety of the url"):
    if url:
        result = classifier(url)[0]
        label = result["label"]
        score = result["score"]
        defanged_url = defang_url(url)
        if  label=='Safe':
            st.success(
                f"The URL '{defanged_url}' is considered safe with a confidence of {score * 100:.2f}%."
            )
        else:
            st.error(
                f"The URL '{defanged_url}' is considered suspicious with a confidence of {score * 100:.2f}%."
            )
        # Optionally, you can display the full result for debugging purposes:
        st.write("Full classification output:", result)
    else:
        st.error("Please enter a URL.")