Spaces:

Anvilogic
/

URLGuardian

Sleeping

App Files Files Community

URLGuardian / app.py

chgrdj

Update app.py

208bdc9 verified 6 months ago

raw

history blame

2.14 kB

	import streamlit as st
	from transformers import pipeline, AutoConfig, AutoModelForSequenceClassification, AutoTokenizer

	@st.cache_resource
	def load_classifier(model_path: str):
	id2label = {0: "Safe", 1: "Unsafe"}
	label2id = {"Safe": 0, "Unsafe": 1}
	config = AutoConfig.from_pretrained(model_path, id2label=id2label, label2id=label2id)
	model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)
	tokenizer = AutoTokenizer.from_pretrained(model_path)
	return pipeline("text-classification", model=model, tokenizer=tokenizer)

	def defang_url(url: str) -> str:
	"""
	Defangs the URL to prevent it from being clickable.
	This function replaces the protocol and dots.
	For example:
	https://example.com --> hxxps://example[.]com
	"""
	# Replace the protocol
	if url.startswith("https://"):
	url = url.replace("https://", "hxxps://")
	elif url.startswith("http://"):
	url = url.replace("http://", "hxxp://")

	# Replace periods in the rest of the URL
	return url.replace(".", "[.]")

	st.title("URL Typosquatting Detection with URLGuardian")
	st.markdown(
	"This app uses the URLGuardian classifier developed by Anvilogic to detect potential suspicious URL. "
	"Enter a URL to assess!"
	)

	model_path = "./URLGuardian"
	classifier = load_classifier(model_path)

	url = st.text_input("Enter the URL:", value="https://example.com")

	if st.button("Check Safety of the url"):
	if url:
	result = classifier(url)[0]
	label = result["label"]
	score = result["score"]
	defanged_url = defang_url(url)
	if label=='Safe':
	st.success(
	f"The URL '{defanged_url}' is considered safe with a confidence of {score * 100:.2f}%."
	)
	else:
	st.error(
	f"The URL '{defanged_url}' is considered suspicious with a confidence of {score * 100:.2f}%."
	)
	# Optionally, you can display the full result for debugging purposes:
	st.write("Full classification output:", result)
	else:
	st.error("Please enter a URL.")