Spaces:

resolverkatla
/

Titanic_Survival

Sleeping

App Files Files Community

resolverkatla commited on May 26

Commit

a2466a9

verified ·

1 Parent(s): 4adc3b1

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -140

app.py CHANGED Viewed

@@ -1,149 +1,47 @@
-import gradio as gr
 import pandas as pd
-from sklearn.model_selection import train_test_split
 from sklearn.ensemble import RandomForestClassifier
-from sklearn.metrics import accuracy_score
-import io # Keep io, though not strictly used in this version, it's harmless.
-# --- Data Loading and Preprocessing ---
-# Load the dataset named 'titanic.csv'
-# Make sure 'titanic.csv' is uploaded to your Hugging Face Space or is in the same directory
-try:
-    df = pd.read_csv('titanic.csv')
-except FileNotFoundError:
-    raise FileNotFoundError("titanic.csv not found. Please ensure it's downloaded and named 'titanic.csv', then uploaded to your Hugging Face Space.")
-# Drop irrelevant columns and 'PassengerId' which is not a feature
-# These columns are typically present in a standard Titanic dataset.
-df = df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
-# Handle missing 'Age' with median imputation
-df['Age'].fillna(df['Age'].median(), inplace=True)
-# Handle missing 'Fare' with median imputation (Fare can also have missing values sometimes)
-df['Fare'].fillna(df['Fare'].median(), inplace=True)
-# Handle missing 'Embarked' with mode imputation
-df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
-# Convert categorical features to numerical using one-hot encoding
-# We drop 'Embarked_C' to avoid multicollinearity (as per common practice)
-df = pd.get_dummies(df, columns=['Sex', 'Embarked'], drop_first=True)
-# Define features (X) and target (y)
-X = df.drop('Survived', axis=1)
-y = df['Survived']
-# Split data into training and testing sets
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-# Train a RandomForestClassifier model
 model = RandomForestClassifier(n_estimators=100, random_state=42)
-model.fit(X_train, y_train)
-# --- Model Evaluation (for display in app) ---
-y_pred = model.predict(X_test)
-accuracy = accuracy_score(y_test, y_pred)
-accuracy_message = f"Model Accuracy on Test Set: {accuracy:.2f}"
-# --- Prediction Function for Gradio ---
-def predict_survival(pclass, sex, age, sibsp, parch, fare, embarked):
-    # Create a dictionary for the input values
-    input_dict = {
-        'Pclass': pclass,
-        'Age': age,
-        'SibSp': sibsp,
-        'Parch': parch,
-        'Fare': fare,
-        # These match the one-hot encoded columns created during training
-        'Sex_male': 1 if sex == 'male' else 0,
-        'Embarked_Q': 1 if embarked == 'Q' else 0, # Assuming 'Q' is 'Embarked_Q'
-        'Embarked_S': 1 if embarked == 'S' else 0  # Assuming 'S' is 'Embarked_S'
-    }
-    # Create a DataFrame from the input values
-    input_data = pd.DataFrame([input_dict])
-    # Ensure all columns expected by the model are present in the input_data, even if 0
-    # This handles cases where a category might not be present in a single input but was in training
-    for col in X.columns:
-        if col not in input_data.columns:
-            input_data[col] = 0
-    # Reorder columns to match the training data's column order
-    input_data = input_data[X.columns]
-    # Make prediction
     prediction = model.predict(input_data)[0]
-    prediction_proba = model.predict_proba(input_data)[0]
-    if prediction == 1:
-        return f"Prediction: Survived ({prediction_proba[1]:.2%} confidence)", "green"
-    else:
-        return f"Prediction: Did Not Survive ({prediction_proba[0]:.2%} confidence)", "red"
-# --- Gradio Interface ---
-# CSS to style the output textbox background
-with gr.Blocks(css=".green {background-color: #e6ffe6 !important;}.red {background-color: #ffe6e6 !important;}") as demo:
-    gr.Markdown(
-        """
-        # Titanic Survival Predictor
-        Enter passenger details to predict their survival on the Titanic.
-        """
-    )
-    gr.Markdown(f"### Model Performance: {accuracy_message}")
-    with gr.Row():
-        pclass_input = gr.Radio(choices=[1, 2, 3], label="Pclass", value=3)
-        sex_input = gr.Radio(choices=['male', 'female'], label="Sex", value='male')
-        age_input = gr.Slider(minimum=0.5, maximum=80, value=30, label="Age", step=0.5)
-    with gr.Row():
-        sibsp_input = gr.Number(label="SibSp (Siblings/Spouses Aboard)", value=0)
-        parch_input = gr.Number(label="Parch (Parents/Children Aboard)", value=0)
-        fare_input = gr.Number(label="Fare", value=30.0)
-    with gr.Row():
-        embarked_input = gr.Radio(choices=['C', 'Q', 'S'], label="Embarked (Port of Embarkation)", value='S')
-    predict_btn = gr.Button("Predict Survival")
-    output_text = gr.Textbox(label="Survival Prediction", interactive=False)
-    # This label is used internally to get the color, its content is not directly shown
-    output_color_indicator = gr.Label(visible=False)
-    # Function to update the textbox styling based on prediction
-    def update_output_style(text, color):
-        if color == "green":
-            return gr.Textbox(value=text, label="Survival Prediction", interactive=False, elem_classes="green")
-        elif color == "red":
-            return gr.Textbox(value=text, label="Survival Prediction", interactive=False, elem_classes="red")
-        else:
-            return gr.Textbox(value=text, label="Survival Prediction", interactive=False)
-    predict_btn.click(
-        fn=predict_survival,
-        inputs=[pclass_input, sex_input, age_input, sibsp_input, parch_input, fare_input, embarked_input],
-        outputs=[output_text, output_color_indicator]
-    ).then(
-        fn=update_output_style,
-        inputs=[output_text, output_color_indicator],
-        outputs=output_text
-    )
-    gr.Markdown(
-        """
-        ---
-        **Feature Definitions:**
-        * **Pclass:** Passenger Class (1 = 1st, 2 = 2nd, 3 = 3rd)
-        * **Sex:** Sex (male/female)
-        * **Age:** Age in years
-        * **SibSp:** Number of siblings/spouses aboard the Titanic
-        * **Parch:** Number of parents/children aboard the Titanic
-        * **Fare:** Passenger fare
-        * **Embarked:** Port of Embarkation (C = Cherbourg, Q = Queenstown, S = Southampton)
-        *Note: This app expects a `titanic.csv` file. Missing 'Age', 'Fare', and 'Embarked' values are imputed. Categorical features are one-hot encoded.*
-        """
-    )
-demo.launch()

 import pandas as pd
+import numpy as np
+import gradio as gr
 from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+# Load dataset
+df = pd.read_csv("titanic.csv")
+df = df[["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Survived"]].dropna()
+# Encode 'Sex'
+df["Sex"] = LabelEncoder().fit_transform(df["Sex"])  # male=1, female=0
+# Features & target
+X = df.drop("Survived", axis=1)
+y = df["Survived"]
+# Train model
 model = RandomForestClassifier(n_estimators=100, random_state=42)
+model.fit(X, y)
+# Prediction function
+def predict_survival(pclass, sex, age, sibsp, parch, fare):
+    sex_encoded = 1 if sex == "male" else 0
+    input_data = np.array([[pclass, sex_encoded, age, sibsp, parch, fare]])
     prediction = model.predict(input_data)[0]
+    return "✅ Survived" if prediction == 1 else "❌ Did not survive"
+# Gradio interface
+iface = gr.Interface(
+    fn=predict_survival,
+    inputs=[
+        gr.Dropdown([1, 2, 3], label="Passenger Class"),
+        gr.Radio(["male", "female"], label="Sex"),
+        gr.Slider(0, 80, step=1, label="Age"),
+        gr.Slider(0, 10, step=1, label="Siblings/Spouses Aboard"),
+        gr.Slider(0, 10, step=1, label="Parents/Children Aboard"),
+        gr.Slider(0, 500, step=1, label="Fare"),
+    ],
+    outputs="text",
+    title="🚢 Titanic Survival Predictor",
+    description="Enter passenger details to predict their survival on the Titanic."
+)
+if __name__ == "__main__":
+    iface.launch()