Upload 2 files
Browse files- TrainImprove.py +57 -0
- ml-st1.py +45 -0
TrainImprove.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
from sklearn.model_selection import train_test_split
|
4 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
5 |
+
from sklearn.metrics import accuracy_score
|
6 |
+
from tensorflow.keras.models import Sequential
|
7 |
+
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
|
8 |
+
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
|
9 |
+
from tensorflow.keras.regularizers import l2
|
10 |
+
from joblib import dump
|
11 |
+
|
12 |
+
# 1. Read Data
|
13 |
+
data = pd.read_excel('gender.xlsx')
|
14 |
+
|
15 |
+
# 2. Preprocess Data
|
16 |
+
data['Gender'] = data['Gender'].map({'M': 1, 'F': 0})
|
17 |
+
|
18 |
+
# 3. Convert text data into numerical data using TF-IDF
|
19 |
+
tfidf = TfidfVectorizer(analyzer='char', ngram_range=(1, 3))
|
20 |
+
X = tfidf.fit_transform(data['Name']).toarray() # Convert names into numerical features
|
21 |
+
y = data['Gender'].values # Labels: 1 for Male, 0 for Female
|
22 |
+
|
23 |
+
# 4. Split the dataset into training and testing sets
|
24 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
25 |
+
|
26 |
+
|
27 |
+
# 5. Build the Neural Network Model
|
28 |
+
model = Sequential()
|
29 |
+
model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.01), input_shape=(X_train.shape[1],))) # L2 regularization
|
30 |
+
model.add(BatchNormalization()) # Batch normalization
|
31 |
+
model.add(Dropout(0.5)) # Dropout to prevent overfitting
|
32 |
+
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01))) # L2 regularization
|
33 |
+
model.add(BatchNormalization()) # Batch normalization
|
34 |
+
model.add(Dropout(0.5)) # Dropout to prevent overfitting
|
35 |
+
model.add(Dense(1, activation='sigmoid')) # Output layer with sigmoid for binary classification
|
36 |
+
|
37 |
+
# 6. Compile the model
|
38 |
+
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
|
39 |
+
|
40 |
+
# 7. Define callbacks
|
41 |
+
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) # Early stopping
|
42 |
+
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001) # Learning rate reduction
|
43 |
+
|
44 |
+
# 8. Train the model with epochs and callbacks
|
45 |
+
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2,
|
46 |
+
callbacks=[early_stopping, reduce_lr])
|
47 |
+
|
48 |
+
# 9. Save the model after training
|
49 |
+
model.save('gender_prediction_model_Improve.h5')
|
50 |
+
|
51 |
+
# 10. Save the TF-IDF vectorizer
|
52 |
+
dump(tfidf, 'tfidf_vectorizer_Improve.joblib')
|
53 |
+
|
54 |
+
# 11. Evaluate the model
|
55 |
+
y_pred = (model.predict(X_test) > 0.5).astype("int32") # Convert probabilities to binary output
|
56 |
+
accuracy = accuracy_score(y_test, y_pred)
|
57 |
+
print(f"Model Accuracy: {accuracy * 100:.2f}%")
|
ml-st1.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
import pandas as pd
|
4 |
+
from tensorflow.keras.models import load_model
|
5 |
+
from joblib import load
|
6 |
+
|
7 |
+
# Set Streamlit page configuration
|
8 |
+
st.set_page_config(page_title="Gender Prediction", page_icon="🧑🎓", layout="centered")
|
9 |
+
|
10 |
+
# Load the pre-trained model
|
11 |
+
@st.cache_resource
|
12 |
+
def load_prediction_model():
|
13 |
+
return load_model('gender_prediction_model.h5')
|
14 |
+
|
15 |
+
# Load the TF-IDF vectorizer
|
16 |
+
@st.cache_resource
|
17 |
+
def load_vectorizer():
|
18 |
+
tfidf_vectorizer_file = 'tfidf_vectorizer.joblib'
|
19 |
+
if not os.path.exists(tfidf_vectorizer_file):
|
20 |
+
st.error(f"❌ {tfidf_vectorizer_file} not found. Please ensure the file exists in the current directory.")
|
21 |
+
st.stop()
|
22 |
+
return load(tfidf_vectorizer_file)
|
23 |
+
|
24 |
+
# Prediction function
|
25 |
+
def predict_gender(name, model, tfidf):
|
26 |
+
vectorized_name = tfidf.transform([name]).toarray() # Transform name into feature vector
|
27 |
+
gender = model.predict(vectorized_name) > 0.5 # Get prediction
|
28 |
+
return 'Male' if gender[0][0] == 1 else 'Female'
|
29 |
+
|
30 |
+
# Load model and vectorizer
|
31 |
+
model = load_prediction_model()
|
32 |
+
tfidf = load_vectorizer()
|
33 |
+
|
34 |
+
# Streamlit UI
|
35 |
+
st.title("Gender Prediction from Name")
|
36 |
+
st.write("Enter a name to predict the gender using the pre-trained model.")
|
37 |
+
|
38 |
+
# Input form
|
39 |
+
name = st.text_input("Enter a name:")
|
40 |
+
if st.button("Predict"):
|
41 |
+
if name:
|
42 |
+
predicted_gender = predict_gender(name, model, tfidf)
|
43 |
+
st.success(f"The predicted gender for '{name}' is: **{predicted_gender}**")
|
44 |
+
else:
|
45 |
+
st.warning("Please enter a valid name.")
|