Spaces:

resolverkatla
/

Titanic_Survival

Sleeping

App Files Files Community

resolverkatla commited on May 26

Commit

a822fa8

verified ·

1 Parent(s): b0355e8

Create app.py

Browse files

Files changed (1) hide show

app.py +123 -0

app.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import gradio as gr
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import io
+# Load the dataset directly from the file system.
+# Make sure 'titanic (1).csv' is uploaded to your Hugging Face Space alongside app.py
+try:
+    df = pd.read_csv('titanic (1).csv')
+except FileNotFoundError:
+    gr.Warning("titanic (1).csv not found. Please ensure it's uploaded to your Space.")
+    # Create an empty dataframe to prevent errors if file is missing during initial load
+    df = pd.DataFrame(columns=['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Survived'])
+# Preprocessing similar to the Kaggle notebook
+# Fill missing 'Age' with the median
+df['Age'].fillna(df['Age'].median(), inplace=True)
+# Fill missing 'Embarked' with the most frequent value
+df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
+# Convert 'Sex' to numerical (optional for plotting, but good for consistency)
+# Use .loc to avoid SettingWithCopyWarning if df is a slice
+df.loc[:, 'Sex'] = df['Sex'].map({'male': 0, 'female': 1})
+def plot_survival_by_feature(feature):
+    """
+    Generates a bar plot showing the survival rate by the selected feature.
+    """
+    # Create a copy to avoid modifying the original DataFrame in-place within the function scope
+    plot_df = df.copy()
+    plt.figure(figsize=(8, 5))
+    if feature in ['Sex', 'Pclass', 'Embarked']:
+        sns.barplot(x=feature, y='Survived', data=plot_df, palette='viridis')
+        plt.title(f'Survival Rate by {feature}')
+        plt.ylabel('Survival Rate')
+        if feature == 'Sex':
+            plt.xticks([0, 1], ['Male', 'Female'])
+    elif feature == 'Age':
+        # Bin age for better visualization in a bar plot context
+        bins = [0, 12, 18, 35, 60, 80]
+        labels = ['Children', 'Teenagers', 'Young Adults', 'Adults', 'Seniors']
+        plot_df['AgeGroup'] = pd.cut(plot_df['Age'], bins=bins, labels=labels, right=False)
+        sns.barplot(x='AgeGroup', y='Survived', data=plot_df, palette='viridis')
+        plt.title('Survival Rate by Age Group')
+        plt.ylabel('Survival Rate')
+        plt.xlabel('Age Group')
+    elif feature == 'Fare':
+        # Bin fare for better visualization
+        bins = [0, 10, 30, 100, 500]
+        labels = ['Low', 'Medium', 'High', 'Very High']
+        plot_df['FareGroup'] = pd.cut(plot_df['Fare'], bins=bins, labels=labels, right=False)
+        sns.barplot(x='FareGroup', y='Survived', data=plot_df, palette='viridis')
+        plt.title('Survival Rate by Fare Group')
+        plt.ylabel('Survival Rate')
+        plt.xlabel('Fare Group')
+    else:
+        # For SibSp and Parch, treat as categorical if few unique values, otherwise numeric distribution
+        if plot_df[feature].nunique() < 10: # If less than 10 unique values, treat as categories
+            sns.barplot(x=feature, y='Survived', data=plot_df, palette='viridis')
+            plt.title(f'Survival Rate by {feature}')
+            plt.ylabel('Survival Rate')
+        else:
+            sns.histplot(data=plot_df, x=feature, hue='Survived', kde=True, palette='viridis')
+            plt.title(f'Survival Distribution by {feature}')
+            plt.ylabel('Count')
+            plt.xlabel(feature)
+    plt.grid(axis='y', linestyle='--', alpha=0.7)
+    plt.tight_layout()
+    # Save plot to a BytesIO object
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png')
+    buf.seek(0)
+    plt.close() # Close the plot to free up memory
+    return buf.getvalue()
+# Gradio Interface
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+        # Titanic Survival Explorer
+        Explore the factors influencing survival on the Titanic using the provided dataset.
+        Select a feature from the dropdown to see its relationship with survival rates.
+        """
+    )
+    with gr.Tab("Dataset Overview"):
+        gr.Markdown("### Raw Titanic Dataset")
+        gr.Dataframe(
+            value=df,
+            headers=list(df.columns),
+            # Infer datatype as much as possible, or specify if needed for precision
+            # Gradio often does a good job inferring from a DataFrame
+            row_count=(len(df), "fixed"),
+            col_count=(len(df.columns), "fixed"),
+            interactive=False
+        )
+    with gr.Tab("Survival Analysis by Feature"):
+        feature_choice = gr.Dropdown(
+            choices=['Sex', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked'],
+            label="Select Feature for Analysis",
+            value='Sex' # Default value
+        )
+        plot_output = gr.Image(type="pil", label="Survival Plot")
+        feature_choice.change(plot_survival_by_feature, inputs=feature_choice, outputs=plot_output)
+        # Initial plot when the app loads
+        demo.load(plot_survival_by_feature, inputs=feature_choice, outputs=plot_output)
+    gr.Markdown(
+        """
+        ---
+        *Note: Age and Fare are binned for visualization purposes. Missing Age and Embarked values are imputed.*
+        """
+    )
+demo.launch()