File size: 3,879 Bytes
f3352b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c58259
fca3ef1
 
 
 
5c58259
fca3ef1
 
 
 
 
 
 
 
 
 
 
 
 
 
5c58259
 
 
 
 
f3352b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c58259
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3352b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f56e7e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import asyncio
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from flask import Flask, request, render_template
from catboost import CatBoostClassifier  # Import CatBoost
from url_process import extract_url_features  # Ensure you have the appropriate feature extraction function
import os
# Batch Processing: Ensures URLs are processed in manageable chunks
def process_urls_in_batches(urls, batch_size=10):
    for i in range(0, len(urls), batch_size):
        yield urls[i:i + batch_size]

# Async function for non-blocking DNS lookups and HTTP requests
async def async_extract_features(url):
    features = await asyncio.to_thread(extract_url_features, url)
    return features

# ThreadPoolExecutor for CPU-bound tasks like feature extraction
def extract_features_in_parallel(urls):
    with ThreadPoolExecutor(max_workers=5) as executor:
        return list(executor.map(extract_url_features, urls))

# Load the CatBoost model for inference
def predict_with_catboost(features_df, model_path):
    try:
        print(f"Attempting to load model from: {model_path}")
        print(f"File exists: {os.path.exists(model_path)}")
        print(f"File size: {os.path.getsize(model_path)}")
        
        model = CatBoostClassifier()
        # Try different loading approaches
        try:
            print("Attempting to load model without format...")
            model.load_model(model_path)
        except Exception as e1:
            print(f"First attempt failed: {str(e1)}")
            try:
                print("Attempting to load model with format='binnar'...")
                model.load_model(model_path, format='binnar')
            except Exception as e2:
                print(f"Second attempt failed: {str(e2)}")
                print("Attempting final load with format='binary'...")
                model.load_model(model_path, format='binary')
        
        predictions = model.predict(features_df)
        return predictions
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        raise

# Flask App Setup
app = Flask(__name__)

@app.route("/", methods=["GET", "POST"])
async def index():
    result = None
    url_features = None

    if request.method == "POST":
        # Get the URL input from the form
        url = request.form["url"]
        
        try:
            # Asynchronously process the URL features
            features = await async_extract_features(url)
            
            # Convert the features to DataFrame (in case you need to do further processing)
            features_df = pd.DataFrame([features])
            
            # Try multiple possible model locations
            possible_paths = [
                os.path.join(os.getcwd(), "catboost_model.bin"),
                "/app/catboost_model.bin",  # Docker container path
                "catboost_model.bin"
            ]
            
            model_path = None
            for path in possible_paths:
                if os.path.exists(path):
                    model_path = path
                    break
                    
            if model_path is None:
                raise FileNotFoundError("Model file not found in any expected location")
                
            predictions = predict_with_catboost(features_df, model_path)
            
            # Determine if the URL is malicious or legitimate
            if predictions[0] == 1:
                result = "Malicious"
            else:
                result = "Legitimate"
            
            # Optionally, display the extracted features
            url_features = features
            
        except Exception as e:
            result = f"Error processing URL: {str(e)}"
    
    return render_template("index.html", result=result, url_features=url_features)

if __name__ == "__main__":
    app.run(debug=False,host="0.0.0.0",port=7860)