Spaces:

HARISH20205
/

kebos-ai

Sleeping

App Files Files Community

kebos-ai / app.py

HARISH20205

no binary

8b6e40c 5 months ago

raw

history blame contribute delete

3.28 kB

	import asyncio
	import pandas as pd
	from concurrent.futures import ThreadPoolExecutor
	from flask import Flask, request, render_template
	from catboost import CatBoostClassifier # Import CatBoost
	from url_process import extract_url_features # Ensure you have the appropriate feature extraction function
	import os
	# Batch Processing: Ensures URLs are processed in manageable chunks
	def process_urls_in_batches(urls, batch_size=10):
	for i in range(0, len(urls), batch_size):
	yield urls[i:i + batch_size]

	# Async function for non-blocking DNS lookups and HTTP requests
	async def async_extract_features(url):
	features = await asyncio.to_thread(extract_url_features, url)
	return features

	# ThreadPoolExecutor for CPU-bound tasks like feature extraction
	def extract_features_in_parallel(urls):
	with ThreadPoolExecutor(max_workers=5) as executor:
	return list(executor.map(extract_url_features, urls))

	# Load the CatBoost model for inference
	def predict_with_catboost(features_df, model_path):
	try:
	print(f"Attempting to load model from: {model_path}")
	print(f"File exists: {os.path.exists(model_path)}")
	print(f"File size: {os.path.getsize(model_path)}")

	model = CatBoostClassifier()
	model.load_model(model_path)
	predictions = model.predict(features_df)
	return predictions
	except Exception as e:
	print(f"Error loading model: {str(e)}")
	raise

	# Flask App Setup
	app = Flask(__name__)

	@app.route("/", methods=["GET", "POST"])
	async def index():
	result = None
	url_features = None

	if request.method == "POST":
	# Get the URL input from the form
	url = request.form["url"]

	try:
	# Asynchronously process the URL features
	features = await async_extract_features(url)

	# Convert the features to DataFrame (in case you need to do further processing)
	features_df = pd.DataFrame([features])

	# Try multiple possible model locations
	possible_paths = [
	os.path.join(os.getcwd(), "catboost_model.bin"),
	"/app/catboost_model.bin", # Docker container path
	"catboost_model.bin"
	]

	model_path = None
	for path in possible_paths:
	if os.path.exists(path):
	model_path = path
	break

	if model_path is None:
	raise FileNotFoundError("Model file not found in any expected location")

	predictions = predict_with_catboost(features_df, model_path)

	# Determine if the URL is malicious or legitimate
	if predictions[0] == 1:
	result = "Malicious"
	else:
	result = "Legitimate"

	# Optionally, display the extracted features
	url_features = features

	except Exception as e:
	result = f"Error processing URL: {str(e)}"

	return render_template("index.html", result=result, url_features=url_features)

	if __name__ == "__main__":
	app.run(debug=False,host="0.0.0.0",port=7860)