Spaces:

demomern
/

Apps-Reviews-for-Requirements-Elicitation

Sleeping

App Files Files Community

Apps-Reviews-for-Requirements-Elicitation / app.py

demomern

Create app.py

3ca505b almost 2 years ago

raw

history blame

2.87 kB

	import re
	import emoji
	import spacy
	import joblib
	from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
	from sklearn.neural_network import MLPClassifier
	from sklearn.preprocessing import LabelEncoder
	from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score
	import gradio as gr

	nlp = spacy.load("en_core_web_sm")


	# load the TF-IDF vectorizer to a file
	cv = joblib.load('tfidf_vectorizer.pkl')

	# load the MLP classifier to a file
	mlp_label = joblib.load('mlpLabel.pkl')

	# load the MLP Aspect classifier to a file
	mlp_aspect_label = joblib.load('mlpAspectLabel.pkl')


	def remove_html(text) :
	patt_html = r"<.*?>"
	text = re.sub(patt_html, "", text)
	return text

	def remove_url(text):
	patt_url = r"https?://\S+\|www\.\S+"
	text = re.sub(patt_url, "", text)
	return text

	def emoji_to_text(text) :
	res_str = ""
	for ch in text :
	if emoji.is_emoji(ch) :
	res_str += f" {emoji.demojize(ch)} "
	# print(ch, emoji.demojize(ch))
	else :
	res_str += ch
	return res_str

	def clean_review_text(text):

	# remove HTML Tags
	text = remove_html(text)

	# remove url to call function remover_url
	text = remove_url(text)

	# convert text emoji into text
	text = emoji_to_text(text)

	# convert all text into lower case
	text = text.lower()

	# create spacy document to remove :
	# token.is_stop => return true if word is stop word ( is, am, are, a, an, the etc )
	# token.is_punct => return true if word is punctuation ( ., !, , :, ; etc)
	# token.is_space => return true if word as a space like tab, space ..
	# token.lemma_ convert any word into root word ( go \| went \| gone \| going => go )
	doc = nlp(text)

	clean_tokens_wds = [ token.lemma_ for token in doc if not ( token.is_stop or token.is_punct or token.is_space ) ]


	return " ".join(clean_tokens_wds)



	label = ['negative', 'neutral', 'positive']
	aspect_label = ['Card Decks and Challenges', 'Card Play and Board Games',
	'Fun and Coin Collecting', 'Game Scores and Features',
	'Game Updates and User Desires', 'Gameplay and App Experience',
	'Gameplay and Trading', 'Gameplay and User Experience',
	'Property and Land Management', 'Subway Adventures']


	def return_label_aspect(Review):
	review_vec = cv.transform([clean_review_text(Review)])
	pred_label = mlp_label.predict_proba(review_vec)[0]
	pred_aspect = mlp_aspect_label.predict_proba(review_vec)[0]

	pred_label = { label[i]: round(pred_label[i], 2) for i in range(3) }
	pred_aspect = { aspect_label[i]: round(pred_aspect[i], 2) for i in range(10) }
	return pred_label, pred_aspect



	iface = gr.Interface(fn=return_label_aspect, inputs="text", outputs=[gr.Label(), gr.Label()])
	iface.launch(inline = False)