Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import string | |
| import re | |
| import pickle | |
| import huggingface_hub | |
| import numpy as np | |
| import nltk | |
| nltk.download('stopwords') | |
| nltk.download('wordnet') | |
| nltk.download('omw-1.4') | |
| from nltk.corpus import stopwords | |
| def clean_review(review): | |
| review = review.lower() | |
| review = re.sub(r"http\S+|www.\S+", "", review) | |
| review = re.sub(r"<[^>]*>", "", review) | |
| review = review.replace(".", " ") | |
| review = "".join([c for c in review if c not in string.punctuation]) | |
| review = " ".join([word for word in re.split('\W+', review) | |
| if word not in stopwords.words('english')]) | |
| wn = nltk.WordNetLemmatizer() | |
| review = " ".join([wn.lemmatize(word, 'r') for word in re.split('\W+', review)]) | |
| return review | |
| def find_occurrence(frequency, word, label): | |
| n = 0 | |
| if (word, label) in frequency: | |
| n = frequency[(word, label)] | |
| return n | |
| def classify_text(freqs, logprior, text): | |
| loglikelihood = {} | |
| p_w_pos = {} | |
| p_w_neg = {} | |
| # calculate V, the number of unique words in the vocabulary | |
| vocab = set([word for word, label in freqs.keys()]) | |
| V = len(vocab) | |
| #calculate num_pos and num_neg - the total number of positive and negative words for all documents | |
| num_pos = num_neg = 0 | |
| for word, label in freqs.keys(): | |
| # if the label is positive (greater than zero) | |
| if label > 0: | |
| # Increment the number of positive words by the count for this (word, label) pair | |
| num_pos += freqs[(word, label)] | |
| # else, the label is negative | |
| else: | |
| # increment the number of negative words by the count for this (word,label) pair | |
| num_neg += freqs[(word, label)] | |
| # process the review to get a list of words | |
| word_l = clean_review(text).split() | |
| # initialize probability to zero | |
| total_prob = 0 | |
| # add the logprior | |
| total_prob += logprior | |
| # For each word in the vocabulary... | |
| for word in word_l: | |
| # get the positive and negative frequency of the word | |
| freq_pos = find_occurrence(freqs, word, 1) | |
| freq_neg = find_occurrence(freqs, word, 0) | |
| # calculate the probability that each word is positive, and negative | |
| p_w_pos[word] = (freq_pos + 1) / (num_pos + V) | |
| p_w_neg[word] = (freq_neg + 1) / (num_neg + V) | |
| if freq_pos + freq_neg > 0: | |
| # calculate the log likelihood of the word | |
| loglikelihood[word] = np.log(p_w_pos[word] / p_w_neg[word]) | |
| # add the log likelihood of that word to the probability | |
| total_prob += loglikelihood[word] | |
| else: | |
| loglikelihood[word] = '' | |
| if total_prob > 0: | |
| total_prob = 1 | |
| else: | |
| total_prob = 0 | |
| return total_prob | |
| model_path = huggingface_hub.hf_hub_download("ajaykarthick/naive-bayes-review-classify-model", "naive-bayes-text-classifier-model") | |
| model_params = pickle.load(open(model_path, mode='rb')) | |
| freqs = model_params['freqs_dict'] | |
| logprior = model_params['logprior'] | |
| def greet(name): | |
| total_prob = classify_text(freqs, logprior, name) | |
| print(name, str(total_prob)) | |
| return 'POSITIVE' if total_prob == 0 else 'NEGATIVE' | |
| iface = gr.Interface(fn=greet, inputs="text", outputs="text") | |
| iface.launch() |