Spaces:

Vipul-Chauhan
/

news-classification-model

Sleeping

File size: 1,861 Bytes

import pickle as pkl

import re
import numpy as np

import nltk
from nltk.stem import PorterStemmer
from textblob import Word
from nltk.corpus import stopwords

nltk.download('stopwords')
nltk.download('wordnet')

def process_row(row):
    from string import punctuation
    row = re.sub('(\S+@\S+)(com|\s+com)', ' ', row)
    row = re.sub('(\S+@\S+)', ' ', row)
    punctuation = punctuation + '\n' + '—“,”‘-’' + '0123456789'
    row = ''.join(word for word in row if word not in punctuation)
    row = row.lower()
    stop = stopwords.words('english')
    row = ' '.join(word for word in row.split() if word not in stop ) 
    row = " ".join([Word(word).lemmatize() for word in row.split()])
    ps = PorterStemmer()
    row = " ".join([ps.stem(word) for word in row.split()])
    row = re.sub('\s{1,}', ' ', row)
    row = " ".join([word for word in row.split() if len(word) > 2])
    return row

def predict_class(doc):
  model = pkl.load(open("logistic_model.pk","rb"))
  vectorizer = pkl.load(open("tfidf_vectorizer.pk","rb"))
  clean_doc=process_row(doc)
  vector =np.array(vectorizer.transform([clean_doc]).todense())
  class_pred = model.predict(vector)
  # print(class_pred[0])
  return class_pred[0]

# predict_class(" Barack Obama is seeking for a conference to be conducted in USA")

import gradio as gr
iface = gr.Interface(fn = predict_class,
                     inputs = gr.Textbox(type="text", label="Enter Your Document"), 
                    #  outputs = ["text", "text"], 
                     outputs = [
                         gr.Textbox(type="text", value=". . . ", label="Predicted Class")     
                     ],
                     title = "News Class Predictor", 
                     description ="Predicts whether the News belongs to 'Politics'class or 'Sports' class")
iface.launch(inline = False, debug = True)