import re import string import pandas as pd from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, confusion_matrix from sklearn import feature_extraction, linear_model, model_selection, preprocessing from sklearn.metrics import accuracy_score,precision_score from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline import matplotlib.pyplot as plt import seaborn as sns import nltk from nltk.corpus import stopwords from nltk.stem import PorterStemmer from nltk.tokenize import sent_tokenize, word_tokenize from wordcloud import WordCloud, STOPWORDS from tokenizers import ( decoders, models, normalizers, pre_tokenizers, processors, trainers, Tokenizer, ) import gc import warnings warnings.filterwarnings("ignore") nltk.download('punkt') nltk.download('stopwords') from fastapi import FastAPI, Request import pickle model = pickle.load(open("/content/fakenewsdetection/fakenews.sav", 'rb')) import gradio as gr def predict(text): text=pd.DataFrame([text], columns=["text"]) text=text["text"] text=text.apply(wordpre) text=lower_and_tokenize(text) text = text.apply(lambda x: [lemmatizer.lemmatize(word) for word in x]) text = text.apply(lambda x: ' '.join(x)) # tokenize and encode sequences in the test set tokens_text = tokenizer.batch_encode_plus( text.tolist(), max_length = max_seq_len, padding="max_length", truncation=True, return_token_type_ids=True, add_special_tokens = True, ) # for text set text_seq = torch.tensor(tokens_text['input_ids']) text_mask = torch.tensor(tokens_text['attention_mask']) text_y=torch.tensor([0]) # wrap tensors text_data = TensorDataset(text_seq, text_mask,text_y) # sampler for sampling the data during testing text_sampler = SequentialSampler(text_data) # dataLoader for test set text_dataloader = DataLoader(text_data, sampler = text_sampler, batch_size=batch_size) random.seed(seed_val) torch.manual_seed(seed_val) torch.cuda.manual_seed_all(seed_val) total_eval_accuracy = 0 total_eval_loss = 0 y_true = [] y_pred = [] total_t0 = time.time() use=listmodel[bestidx] use.eval() total_eval_accuracy = 0 total_eval_loss = 0 y_true = [] y_pred = [] t0=time.time() for batch in text_dataloader: input_ids = batch[0].to(device) input_mask = batch[1].to(device) labels = batch[2].to(device) with torch.no_grad(): out = model(input_ids, input_mask,labels=labels) del input_ids,input_mask gc.collect() loss = out[0] logi = out.logits logits_tensor = torch.tensor(logi) loss = criterion(logits_tensor, labels) total_eval_loss += loss.item() pred = torch.argmax(logits_tensor, dim = 1) y_pred.append(pred.flatten()) del pred,logi,out,logits_tensor gc.collect() result=str() if y_pred==[1]: predict="This may be a fake news." else: predict="This may be a real news." return result demo = gr.Interface( fn=predict, inputs=[gr.Textbox(label="Text", lines=3)], outputs=[gr.Textbox(label="Predict", lines=1)], ) if __name__ == "__main__": demo.launch(share=True)