Tzetha's picture
Update app.py
af75592 verified
import streamlit as st
import pandas as pd
import numpy as np
import pickle
import os
import subprocess
# Ensure scikit-learn is installed
try:
import sklearn
except ImportError:
subprocess.run(["pip", "install", "scikit-learn", "--quiet"])
import sklearn
from sklearn.linear_model import LinearRegression # Now it should work
from sklearn.preprocessing import LabelEncoder
# Load dataset
@st.cache_data
def load_data():
df = pd.read_csv("car_data.csv") # Replace with your dataset
return df
def preprocess_data(df):
label_encoders = {}
for column in ["brand", "model", "fuel_type"]: # Encode categorical features
le = LabelEncoder()
df[column] = le.fit_transform(df[column])
label_encoders[column] = le
return df, label_encoders
# Train the model (Manual Data Split)
def train_model(df):
X = df[["year", "mileage", "brand", "model", "fuel_type"]].values
y = df["price"].values
# Manual 80-20 split
split_index = int(0.8 * len(X)) # 80% for training, 20% for testing
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]
model = LinearRegression()
model.fit(X_train, y_train)
return model
df = load_data()
df, label_encoders = preprocess_data(df)
model = train_model(df)
# Streamlit UI
st.title("Car Price Prediction App")
st.sidebar.header("Enter Car Details")
year = st.sidebar.slider("Year", int(df["year"].min()), int(df["year"].max()), 2015)
mileage = st.sidebar.number_input("Mileage (in km)", min_value=0, max_value=500000, value=50000)
brand = st.sidebar.selectbox("Brand", label_encoders["brand"].classes_)
filtered_models = df[df["brand"] == label_encoders["brand"].transform([brand])[0]]["model"].unique()
model_name = st.sidebar.selectbox("Model", label_encoders["model"].inverse_transform(filtered_models))
fuel_type = st.sidebar.selectbox("Fuel Type", label_encoders["fuel_type"].classes_)
if st.sidebar.button("Predict Price"):
brand_encoded = label_encoders["brand"].transform([brand])[0]
model_encoded = label_encoders["model"].transform([model_name])[0]
fuel_encoded = label_encoders["fuel_type"].transform([fuel_type])[0]
input_data = np.array([[year, mileage, brand_encoded, model_encoded, fuel_encoded]])
predicted_price = model.predict(input_data)[0]
st.write(f"### Predicted Car Price: ${predicted_price:,.2f}")