neelnsoni13 commited on
Commit
32e93f1
·
verified ·
1 Parent(s): d885946

Upload 2 files

Browse files
Files changed (2) hide show
  1. TrainImprove.py +57 -0
  2. ml-st1.py +45 -0
TrainImprove.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from sklearn.metrics import accuracy_score
6
+ from tensorflow.keras.models import Sequential
7
+ from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
8
+ from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
9
+ from tensorflow.keras.regularizers import l2
10
+ from joblib import dump
11
+
12
+ # 1. Read Data
13
+ data = pd.read_excel('gender.xlsx')
14
+
15
+ # 2. Preprocess Data
16
+ data['Gender'] = data['Gender'].map({'M': 1, 'F': 0})
17
+
18
+ # 3. Convert text data into numerical data using TF-IDF
19
+ tfidf = TfidfVectorizer(analyzer='char', ngram_range=(1, 3))
20
+ X = tfidf.fit_transform(data['Name']).toarray() # Convert names into numerical features
21
+ y = data['Gender'].values # Labels: 1 for Male, 0 for Female
22
+
23
+ # 4. Split the dataset into training and testing sets
24
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
25
+
26
+
27
+ # 5. Build the Neural Network Model
28
+ model = Sequential()
29
+ model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.01), input_shape=(X_train.shape[1],))) # L2 regularization
30
+ model.add(BatchNormalization()) # Batch normalization
31
+ model.add(Dropout(0.5)) # Dropout to prevent overfitting
32
+ model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01))) # L2 regularization
33
+ model.add(BatchNormalization()) # Batch normalization
34
+ model.add(Dropout(0.5)) # Dropout to prevent overfitting
35
+ model.add(Dense(1, activation='sigmoid')) # Output layer with sigmoid for binary classification
36
+
37
+ # 6. Compile the model
38
+ model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
39
+
40
+ # 7. Define callbacks
41
+ early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) # Early stopping
42
+ reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001) # Learning rate reduction
43
+
44
+ # 8. Train the model with epochs and callbacks
45
+ model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2,
46
+ callbacks=[early_stopping, reduce_lr])
47
+
48
+ # 9. Save the model after training
49
+ model.save('gender_prediction_model_Improve.h5')
50
+
51
+ # 10. Save the TF-IDF vectorizer
52
+ dump(tfidf, 'tfidf_vectorizer_Improve.joblib')
53
+
54
+ # 11. Evaluate the model
55
+ y_pred = (model.predict(X_test) > 0.5).astype("int32") # Convert probabilities to binary output
56
+ accuracy = accuracy_score(y_test, y_pred)
57
+ print(f"Model Accuracy: {accuracy * 100:.2f}%")
ml-st1.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import pandas as pd
4
+ from tensorflow.keras.models import load_model
5
+ from joblib import load
6
+
7
+ # Set Streamlit page configuration
8
+ st.set_page_config(page_title="Gender Prediction", page_icon="🧑‍🎓", layout="centered")
9
+
10
+ # Load the pre-trained model
11
+ @st.cache_resource
12
+ def load_prediction_model():
13
+ return load_model('gender_prediction_model.h5')
14
+
15
+ # Load the TF-IDF vectorizer
16
+ @st.cache_resource
17
+ def load_vectorizer():
18
+ tfidf_vectorizer_file = 'tfidf_vectorizer.joblib'
19
+ if not os.path.exists(tfidf_vectorizer_file):
20
+ st.error(f"❌ {tfidf_vectorizer_file} not found. Please ensure the file exists in the current directory.")
21
+ st.stop()
22
+ return load(tfidf_vectorizer_file)
23
+
24
+ # Prediction function
25
+ def predict_gender(name, model, tfidf):
26
+ vectorized_name = tfidf.transform([name]).toarray() # Transform name into feature vector
27
+ gender = model.predict(vectorized_name) > 0.5 # Get prediction
28
+ return 'Male' if gender[0][0] == 1 else 'Female'
29
+
30
+ # Load model and vectorizer
31
+ model = load_prediction_model()
32
+ tfidf = load_vectorizer()
33
+
34
+ # Streamlit UI
35
+ st.title("Gender Prediction from Name")
36
+ st.write("Enter a name to predict the gender using the pre-trained model.")
37
+
38
+ # Input form
39
+ name = st.text_input("Enter a name:")
40
+ if st.button("Predict"):
41
+ if name:
42
+ predicted_gender = predict_gender(name, model, tfidf)
43
+ st.success(f"The predicted gender for '{name}' is: **{predicted_gender}**")
44
+ else:
45
+ st.warning("Please enter a valid name.")