|
import gradio as gr |
|
import string |
|
import re |
|
import pickle |
|
import huggingface_hub |
|
|
|
import numpy as np |
|
import nltk |
|
nltk.download('stopwords') |
|
nltk.download('wordnet') |
|
nltk.download('omw-1.4') |
|
from nltk.corpus import stopwords |
|
|
|
|
|
|
|
def clean_review(review): |
|
review = review.lower() |
|
review = re.sub(r"http\S+|www.\S+", "", review) |
|
review = re.sub(r"<[^>]*>", "", review) |
|
review = review.replace(".", " ") |
|
|
|
review = "".join([c for c in review if c not in string.punctuation]) |
|
review = " ".join([word for word in re.split('\W+', review) |
|
if word not in stopwords.words('english')]) |
|
wn = nltk.WordNetLemmatizer() |
|
review = " ".join([wn.lemmatize(word, 'r') for word in re.split('\W+', review)]) |
|
|
|
return review |
|
|
|
def find_occurrence(frequency, word, label): |
|
n = 0 |
|
if (word, label) in frequency: |
|
n = frequency[(word, label)] |
|
|
|
return n |
|
|
|
def classify_text(freqs, logprior, text): |
|
loglikelihood = {} |
|
p_w_pos = {} |
|
p_w_neg = {} |
|
|
|
|
|
vocab = set([word for word, label in freqs.keys()]) |
|
V = len(vocab) |
|
|
|
|
|
num_pos = num_neg = 0 |
|
for word, label in freqs.keys(): |
|
|
|
if label > 0: |
|
|
|
|
|
num_pos += freqs[(word, label)] |
|
|
|
|
|
else: |
|
|
|
|
|
num_neg += freqs[(word, label)] |
|
|
|
|
|
|
|
|
|
word_l = clean_review(text).split() |
|
|
|
|
|
total_prob = 0 |
|
|
|
|
|
total_prob += logprior |
|
|
|
|
|
for word in word_l: |
|
|
|
freq_pos = find_occurrence(freqs, word, 1) |
|
freq_neg = find_occurrence(freqs, word, 0) |
|
|
|
|
|
p_w_pos[word] = (freq_pos + 1) / (num_pos + V) |
|
p_w_neg[word] = (freq_neg + 1) / (num_neg + V) |
|
|
|
if freq_pos + freq_neg > 0: |
|
|
|
loglikelihood[word] = np.log(p_w_pos[word] / p_w_neg[word]) |
|
|
|
total_prob += loglikelihood[word] |
|
else: |
|
loglikelihood[word] = '' |
|
|
|
if total_prob > 0: |
|
total_prob = 1 |
|
else: |
|
total_prob = 0 |
|
|
|
return total_prob |
|
|
|
model_path = huggingface_hub.hf_hub_download("ajaykarthick/naive-bayes-review-classify-model", "naive-bayes-text-classifier-model") |
|
|
|
model_params = pickle.load(open(model_path, mode='rb')) |
|
freqs = model_params['freqs_dict'] |
|
logprior = model_params['logprior'] |
|
|
|
|
|
def greet(name): |
|
total_prob = classify_text(freqs, logprior, name) |
|
print(name, str(total_prob)) |
|
return 'POSITIVE' if total_prob == 0 else 'NEGATIVE' |
|
|
|
iface = gr.Interface(fn=greet, inputs="text", outputs="text") |
|
iface.launch() |