Spaces:

Teery
/

streamiltLogReg

Sleeping

File size: 3,027 Bytes

06145ba

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
from sklearn.preprocessing import StandardScaler


class LogisticRegression():
    def __init__(self, learning_rate =0.01, n_inputs=500):
        self.learning_rate = learning_rate
        self.n_inputs = n_inputs
        self.coef_ = None
        self.intercept_ = None
        
    def fit(self, X, y):
        X = np.array(X)
        n_samp, n_feat = X.shape
        self.coef_ = np.zeros(n_feat)
        self.intercept_ = 0
        for _ in range(self.n_inputs):
            lin_pred = [email protected]_ + self.intercept_
            predictions = 1/(1+np.exp(-lin_pred))
            
            di = predictions-y
            dc = X * di.reshape(-1, 1)
            
            self.coef_ -= self.learning_rate * dc.mean(axis=0)
            self.intercept_ -= self.learning_rate * di.mean()

st.title('Выберите файл')
uploaded_file = st.file_uploader(label='', label_visibility='collapsed', type=["csv"])
if uploaded_file:
    df = pd.read_csv(uploaded_file).drop('Unnamed: 0', axis=1)
    st.write(df.head(3))
    target = st.selectbox('Выберите столбец, который будет целью обучения', *[df.columns])
    features = st.multiselect('Выберите признаки', *[df.columns])
    if features:
        new_df = df.copy()
        ss = StandardScaler()
        new_df[features] = ss.fit_transform(new_df[features])

        logreg = LogisticRegression(0.01, 5000)
        logreg.fit(new_df[features], new_df[target].to_numpy())

        outdict = pd.DataFrame(logreg.coef_.reshape(1,-1), columns=features)
        st. write(outdict)

        st.write('# Графики :chart_with_upwards_trend:')
        x = st.selectbox("Выберите X", *[df.columns])
        y = st.selectbox('Выберите Y', *[df.columns])
        if x and y:
            choose = st.selectbox('Выберите график', ['Диаграмма рассеяния', 'Столбчатая диаграмма', 'Линейная диаграмма'])
            if choose == 'Диаграмма рассеяния':
                plt.figure(figsize=(20, 15))
                plt.scatter(x=new_df[x], y=new_df[y])
                plt.xlabel(x)
                plt.ylabel(y)
                plt.grid(True)
                st.pyplot(plt)
            elif choose == 'Столбчатая диаграмма':
                plt.figure(figsize=(20, 15))
                plt.bar(x=new_df[x].sort_values(), height=new_df[y].sort_values())
                plt.xlabel(x)
                plt.ylabel(y)
                plt.grid(True)
                st.pyplot(plt)
            elif choose == 'Линейная диаграмма':
                plt.figure(figsize=(20, 15))
                plt.plot(new_df[x].sort_values(), new_df[y].sort_values())
                plt.xlabel(x)
                plt.ylabel(y)
                plt.grid(True)
                st.pyplot(plt)