import streamlit as st import pandas as pd import numpy as np import pickle import os import subprocess # Ensure scikit-learn is installed try: import sklearn except ImportError: subprocess.run(["pip", "install", "scikit-learn", "--quiet"]) import sklearn from sklearn.linear_model import LinearRegression # Now it should work from sklearn.preprocessing import LabelEncoder # Load dataset @st.cache_data def load_data(): df = pd.read_csv("car_data.csv") # Replace with your dataset return df def preprocess_data(df): label_encoders = {} for column in ["brand", "model", "fuel_type"]: # Encode categorical features le = LabelEncoder() df[column] = le.fit_transform(df[column]) label_encoders[column] = le return df, label_encoders # Train the model (Manual Data Split) def train_model(df): X = df[["year", "mileage", "brand", "model", "fuel_type"]].values y = df["price"].values # Manual 80-20 split split_index = int(0.8 * len(X)) # 80% for training, 20% for testing X_train, X_test = X[:split_index], X[split_index:] y_train, y_test = y[:split_index], y[split_index:] model = LinearRegression() model.fit(X_train, y_train) return model df = load_data() df, label_encoders = preprocess_data(df) model = train_model(df) # Streamlit UI st.title("Car Price Prediction App") st.sidebar.header("Enter Car Details") year = st.sidebar.slider("Year", int(df["year"].min()), int(df["year"].max()), 2015) mileage = st.sidebar.number_input("Mileage (in km)", min_value=0, max_value=500000, value=50000) brand = st.sidebar.selectbox("Brand", label_encoders["brand"].classes_) filtered_models = df[df["brand"] == label_encoders["brand"].transform([brand])[0]]["model"].unique() model_name = st.sidebar.selectbox("Model", label_encoders["model"].inverse_transform(filtered_models)) fuel_type = st.sidebar.selectbox("Fuel Type", label_encoders["fuel_type"].classes_) if st.sidebar.button("Predict Price"): brand_encoded = label_encoders["brand"].transform([brand])[0] model_encoded = label_encoders["model"].transform([model_name])[0] fuel_encoded = label_encoders["fuel_type"].transform([fuel_type])[0] input_data = np.array([[year, mileage, brand_encoded, model_encoded, fuel_encoded]]) predicted_price = model.predict(input_data)[0] st.write(f"### Predicted Car Price: ${predicted_price:,.2f}")