resolverkatla commited on
Commit
a822fa8
·
verified ·
1 Parent(s): b0355e8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -0
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import io
6
+
7
+ # Load the dataset directly from the file system.
8
+ # Make sure 'titanic (1).csv' is uploaded to your Hugging Face Space alongside app.py
9
+ try:
10
+ df = pd.read_csv('titanic (1).csv')
11
+ except FileNotFoundError:
12
+ gr.Warning("titanic (1).csv not found. Please ensure it's uploaded to your Space.")
13
+ # Create an empty dataframe to prevent errors if file is missing during initial load
14
+ df = pd.DataFrame(columns=['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Survived'])
15
+
16
+
17
+ # Preprocessing similar to the Kaggle notebook
18
+ # Fill missing 'Age' with the median
19
+ df['Age'].fillna(df['Age'].median(), inplace=True)
20
+ # Fill missing 'Embarked' with the most frequent value
21
+ df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
22
+ # Convert 'Sex' to numerical (optional for plotting, but good for consistency)
23
+ # Use .loc to avoid SettingWithCopyWarning if df is a slice
24
+ df.loc[:, 'Sex'] = df['Sex'].map({'male': 0, 'female': 1})
25
+
26
+
27
+ def plot_survival_by_feature(feature):
28
+ """
29
+ Generates a bar plot showing the survival rate by the selected feature.
30
+ """
31
+ # Create a copy to avoid modifying the original DataFrame in-place within the function scope
32
+ plot_df = df.copy()
33
+
34
+ plt.figure(figsize=(8, 5))
35
+ if feature in ['Sex', 'Pclass', 'Embarked']:
36
+ sns.barplot(x=feature, y='Survived', data=plot_df, palette='viridis')
37
+ plt.title(f'Survival Rate by {feature}')
38
+ plt.ylabel('Survival Rate')
39
+ if feature == 'Sex':
40
+ plt.xticks([0, 1], ['Male', 'Female'])
41
+ elif feature == 'Age':
42
+ # Bin age for better visualization in a bar plot context
43
+ bins = [0, 12, 18, 35, 60, 80]
44
+ labels = ['Children', 'Teenagers', 'Young Adults', 'Adults', 'Seniors']
45
+ plot_df['AgeGroup'] = pd.cut(plot_df['Age'], bins=bins, labels=labels, right=False)
46
+ sns.barplot(x='AgeGroup', y='Survived', data=plot_df, palette='viridis')
47
+ plt.title('Survival Rate by Age Group')
48
+ plt.ylabel('Survival Rate')
49
+ plt.xlabel('Age Group')
50
+ elif feature == 'Fare':
51
+ # Bin fare for better visualization
52
+ bins = [0, 10, 30, 100, 500]
53
+ labels = ['Low', 'Medium', 'High', 'Very High']
54
+ plot_df['FareGroup'] = pd.cut(plot_df['Fare'], bins=bins, labels=labels, right=False)
55
+ sns.barplot(x='FareGroup', y='Survived', data=plot_df, palette='viridis')
56
+ plt.title('Survival Rate by Fare Group')
57
+ plt.ylabel('Survival Rate')
58
+ plt.xlabel('Fare Group')
59
+ else:
60
+ # For SibSp and Parch, treat as categorical if few unique values, otherwise numeric distribution
61
+ if plot_df[feature].nunique() < 10: # If less than 10 unique values, treat as categories
62
+ sns.barplot(x=feature, y='Survived', data=plot_df, palette='viridis')
63
+ plt.title(f'Survival Rate by {feature}')
64
+ plt.ylabel('Survival Rate')
65
+ else:
66
+ sns.histplot(data=plot_df, x=feature, hue='Survived', kde=True, palette='viridis')
67
+ plt.title(f'Survival Distribution by {feature}')
68
+ plt.ylabel('Count')
69
+ plt.xlabel(feature)
70
+
71
+
72
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
73
+ plt.tight_layout()
74
+ # Save plot to a BytesIO object
75
+ buf = io.BytesIO()
76
+ plt.savefig(buf, format='png')
77
+ buf.seek(0)
78
+ plt.close() # Close the plot to free up memory
79
+ return buf.getvalue()
80
+
81
+
82
+ # Gradio Interface
83
+ with gr.Blocks() as demo:
84
+ gr.Markdown(
85
+ """
86
+ # Titanic Survival Explorer
87
+ Explore the factors influencing survival on the Titanic using the provided dataset.
88
+ Select a feature from the dropdown to see its relationship with survival rates.
89
+ """
90
+ )
91
+
92
+ with gr.Tab("Dataset Overview"):
93
+ gr.Markdown("### Raw Titanic Dataset")
94
+ gr.Dataframe(
95
+ value=df,
96
+ headers=list(df.columns),
97
+ # Infer datatype as much as possible, or specify if needed for precision
98
+ # Gradio often does a good job inferring from a DataFrame
99
+ row_count=(len(df), "fixed"),
100
+ col_count=(len(df.columns), "fixed"),
101
+ interactive=False
102
+ )
103
+
104
+ with gr.Tab("Survival Analysis by Feature"):
105
+ feature_choice = gr.Dropdown(
106
+ choices=['Sex', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked'],
107
+ label="Select Feature for Analysis",
108
+ value='Sex' # Default value
109
+ )
110
+ plot_output = gr.Image(type="pil", label="Survival Plot")
111
+
112
+ feature_choice.change(plot_survival_by_feature, inputs=feature_choice, outputs=plot_output)
113
+ # Initial plot when the app loads
114
+ demo.load(plot_survival_by_feature, inputs=feature_choice, outputs=plot_output)
115
+
116
+ gr.Markdown(
117
+ """
118
+ ---
119
+ *Note: Age and Fare are binned for visualization purposes. Missing Age and Embarked values are imputed.*
120
+ """
121
+ )
122
+
123
+ demo.launch()