import gradio as gr import numpy as np import pandas as pd import tensorflow as tf import pickle import spacy from tqdm import tqdm import gc import os # Download the SpaCy model os.system("python -m spacy download en_core_web_lg") # Load models model_1 = tf.keras.models.load_model("model_1.h5") model_2 = tf.keras.models.load_model("model_2.h5") model_3 = tf.keras.models.load_model("model_3.h5") model_4 = tf.keras.models.load_model("model_4.h5") # Load dictionaries with open('word_dict.pkl', 'rb') as f: word_dict = pickle.load(f) with open('lemma_dict.pkl', 'rb') as f: lemma_dict = pickle.load(f) # Load SpaCy NLP model nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger']) nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP) def preprocess_text(text): """Preprocess the input text using SpaCy and return word indices.""" docs = nlp.pipe([text], n_process=1) word_seq = [] for doc in docs: for token in doc: if token.pos_ != "PUNCT": if token.text not in word_dict: word_dict[token.text] = 0 # OOV_INDEX word_seq.append(word_dict[token.text]) return word_seq def classify_question(text): # Preprocess the text seq = preprocess_text(text) padded_seq = tf.keras.preprocessing.sequence.pad_sequences([seq], maxlen=55) # Adjust maxlen if needed BATCH_SIZE = 512 # Get predictions from each model pred1 = 0.15 * np.squeeze(model_1.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2)) pred2 = 0.35 * np.squeeze(model_2.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2)) pred3 = 0.15 * np.squeeze(model_3.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2)) pred4 = 0.35 * np.squeeze(model_4.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2)) # Combine predictions avg_pred = pred1 + pred2 + pred3 + pred4 label = "Insincere" if avg_pred > 0.35 else "Sincere" # Create a list of probabilities for each model probs = { "Probability": float(avg_pred), "Model Probabilities": {"Model 1": float(pred1), "Model 2": float(pred2), "Model 3": float(pred3), "Model 4": float(pred4)}, "Sequence": seq } return label, probs # Example questions examples = [ "How do you train a pigeon to send messages?", "Is USA a shithole country owing to a shithole president?", "Why is Indian educationa total bullshit?", "Which person has given the least f**ks and still turned out successful?" ] # Gradio Interface interface = gr.Interface( fn=classify_question, inputs=[ gr.Textbox(lines=2, placeholder="Enter your question here..."), ], outputs=[ "text", # Output for label "json" # Output for probabilities ], title="Quora Insincere Questions Classifier", examples=examples, description="Enter your question to classify it as sincere or insincere. Select an example question below." ) interface.launch()