File size: 4,874 Bytes
f3352b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f56e7e
f3352b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import asyncio
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from flask import Flask, request, render_template
from catboost import CatBoostClassifier  # Import CatBoost
from url_process import extract_url_features  # Ensure you have the appropriate feature extraction function
import os
# Batch Processing: Ensures URLs are processed in manageable chunks
def process_urls_in_batches(urls, batch_size=10):
    for i in range(0, len(urls), batch_size):
        yield urls[i:i + batch_size]

# Async function for non-blocking DNS lookups and HTTP requests
async def async_extract_features(url):
    features = await asyncio.to_thread(extract_url_features, url)
    return features

# ThreadPoolExecutor for CPU-bound tasks like feature extraction
def extract_features_in_parallel(urls):
    with ThreadPoolExecutor(max_workers=5) as executor:
        return list(executor.map(extract_url_features, urls))

# Load the CatBoost model for inference
def predict_with_catboost(features_df, model_path):
    model = CatBoostClassifier()
    model.load_model(model_path)
    predictions = model.predict(features_df)
    return predictions

# Flask App Setup
app = Flask(__name__)

@app.route("/", methods=["GET", "POST"])
async def index():
    result = None
    url_features = None

    if request.method == "POST":
        # Get the URL input from the form
        url = request.form["url"]
        
        try:
            # Asynchronously process the URL features
            features = await async_extract_features(url)
            
            # Convert the features to DataFrame (in case you need to do further processing)
            features_df = pd.DataFrame([features])
            
            # Perform prediction using the CatBoost model
            model_path = (os.path.join(os.getcwd(),"catboost_model.bin"))
            # model_path = "F:\\pyro guard\\model\\catboost_model.bin"  # Specify your CatBoost model path here
            predictions = predict_with_catboost(features_df, model_path)
            
            # Determine if the URL is malicious or legitimate
            if predictions[0] == 1:
                result = "Malicious"
            else:
                result = "Legitimate"
            
            # Optionally, display the extracted features
            url_features = features
            
        except Exception as e:
            result = f"Error processing URL: {str(e)}"
    
    return render_template("index.html", result=result, url_features=url_features)

if __name__ == "__main__":
    app.run(debug=False,host="0.0.0.0",port=7860)

'''
import asyncio
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from flask import Flask, request, render_template
from catboost import CatBoostClassifier
from url_process import extract_url_features, predict_urls  # Import necessary functions

# Flask App Setup
app = Flask(__name__)

# Batch Processing: Ensures URLs are processed in manageable chunks
def process_urls_in_batches(urls, batch_size=10):
    for i in range(0, len(urls), batch_size):
        yield urls[i:i + batch_size]

# Async function for non-blocking DNS lookups and HTTP requests
async def async_extract_features(url):
    features = await asyncio.to_thread(extract_url_features, url)
    return features

# ThreadPoolExecutor for CPU-bound tasks like feature extraction
def extract_features_in_parallel(urls):
    with ThreadPoolExecutor(max_workers=5) as executor:
        return list(executor.map(extract_url_features, urls))

# Load the CatBoost model for inference
def predict_with_catboost(features_df, model_path):
    model = CatBoostClassifier()
    model.load_model(model_path)
    predictions = model.predict(features_df)
    return predictions

@app.route("/", methods=["GET", "POST"])
async def index():
    result = None
    url_features = None

    if request.method == "POST":
        # Get the URL input from the form
        url = request.form["url"]

        try:
            # Asynchronously process the URL features
            features = await async_extract_features(url)

            # Convert the features to a DataFrame for further processing
            features_df = pd.DataFrame([features])

            # Perform prediction using the CatBoost model
            model_path = "F:\\pyro guard\\model\\catboost_model.bin"  # Specify your CatBoost model path
            predictions = predict_with_catboost(features_df, model_path)

            # Determine if the URL is malicious or legitimate
            if predictions[0] == 1:
                result = "Malicious"
            else:
                result = "Legitimate"

        except Exception as e:
            result = f"Error processing URL: {str(e)}"

    return render_template("index.html", result=result, url_features=url_features)

if __name__ == "__main__":
    app.run(debug=True)

'''