File size: 3,216 Bytes
a10547f
b6c250a
 
 
12c727d
f0656c0
a10547f
12c727d
b6c250a
a10547f
12c727d
 
b6c250a
8be3cb5
 
 
 
12c727d
b6c250a
12c727d
9e4102b
 
 
 
 
12c727d
b0e84c2
92a5021
 
 
 
 
 
 
 
97e6f0b
92a5021
 
ee4f4ef
 
92a5021
 
 
97e6f0b
92a5021
 
ee4f4ef
 
92a5021
 
b6c250a
b0e84c2
 
 
 
 
f0656c0
 
 
 
 
 
 
 
 
 
 
 
 
b6c250a
 
f0656c0
 
 
 
b0e84c2
 
 
 
f0656c0
b0e84c2
 
f0656c0
 
 
 
 
 
 
12c727d
d924060
b6c250a
b0e84c2
f0656c0
12c727d
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import gradio as gr
import joblib
import pandas as pd
import datasets
import json
import numpy as np

# Load the model
pipe = joblib.load("./model.pkl")

title = "Premium Amount Prediction"
description = "This model predicts the Premium Amount. Drag and drop any slice from the dataset or edit values as you wish in the dataframe component below."

# Load and prepare dataset
df = datasets.load_dataset("silvaKenpachi/mental_health")["train"].to_pandas()
df.dropna(axis=0, inplace=True)

# Load configuration
with open("./config.json") as f:
    config_dict = json.load(f)
all_headers = config_dict["sklearn"]["columns"]

# Filter headers to only include those present in the dataset
headers = [col for col in all_headers if col in df.columns]

# Define input and output interfaces
#inputs = [gr.Dataframe(headers=headers, row_count=(2, "dynamic"), col_count=(len(headers), "fixed"), label="Input Data", interactive=True)]

#working code that returns only 2 rows in output
#inputs = [gr.Dataframe(headers=all_headers, row_count=(2, "dynamic"), col_count=(len(all_headers), "fixed"), label="Input Data", interactive=True)]
#outputs = [gr.Dataframe(row_count=(2, "dynamic"), col_count=(1, "fixed"), label="Predictions", headers=["Depression"])]

# Define input and output interfaces with dynamic row counts
inputs = [gr.Dataframe(
    headers=headers,
    row_count=(10, "dynamic"),  # Use tuple format (min_rows, "dynamic")
    col_count=(len(headers), "fixed"),
    label="Input Data",
    interactive=True,
    max_rows=100000 
)]

outputs = [gr.Dataframe(
    row_count=(10, "dynamic"),  # Use tuple format (min_rows, "dynamic")
    col_count=(1, "fixed"),
    label="Predictions",
    headers=["Depression"],
    max_rows=100000 
)]


#def infer(inputs):
    #data = pd.DataFrame(inputs, columns=headers)
    #predictions = pipe.predict(data)
    #return pd.DataFrame(predictions, columns=["Depression"])

#code to fix missing columns with na
#def infer(inputs):
    #data = pd.DataFrame(inputs, columns=headers)
    # Add missing columns with default values (e.g., 0)
    #for col in all_headers:
        #if col not in data.columns:
            #data[col] = 0
    # Ensure the order of columns matches the training data
    #data = data[all_headers]
    #predictions = pipe.predict(data)
    #return pd.DataFrame(predictions, columns=["Depression"])


def infer(inputs):
    data = pd.DataFrame(inputs, columns=headers)
    
    # Replace empty strings with NaN
    data = data.replace('', np.nan)
    
    # Add missing columns with default values (e.g., 0)
    for col in all_headers:
        if col not in data.columns:
            data[col] = 0
    
    # Ensure the order of columns matches the training data
    data = data[all_headers]
    
    # Fill NaN values with default values (e.g., 0)
    data = data.fillna(0)
    
    # Convert all data to float
    data = data.astype(float)
    
    predictions = pipe.predict(data)
    return pd.DataFrame(predictions, columns=["Depression"])



gr.Interface(
    fn=infer,
    inputs=inputs,
    outputs=outputs,
    title=title,
    description=description,
    examples=[df[headers].head(3).values.tolist()],
    cache_examples=False
).launch(debug=True)