Spaces:

WebraftAI
/

Text-Completion

Paused

Text-Completion / app.py

DHRUV SHEKHAWAT

Create app.py

5ba996d over 2 years ago

4.03 kB

	import streamlit as st
	import tensorflow as tf
	from keras.layers import Input, Dense, Embedding, MultiHeadAttention
	from keras.layers import Dropout, LayerNormalization
	from keras.models import Model
	from keras.utils import pad_sequences
	import numpy as np

	class TransformerChatbot(Model):
	def __init__(self, vocab_size, max_len, d_model, n_head, ff_dim, dropout_rate):
	super(TransformerChatbot, self).__init__()
	self.embedding = Embedding(vocab_size, d_model)
	self.attention = MultiHeadAttention(num_heads=n_head, key_dim=d_model)
	self.norm1 = LayerNormalization(epsilon=1e-6)
	self.dropout1 = Dropout(dropout_rate)
	self.dense1 = Dense(ff_dim, activation="relu")
	self.dense2 = Dense(d_model)
	self.norm2 = LayerNormalization(epsilon=1e-6)
	self.dropout2 = Dropout(dropout_rate)
	self.flatten = tf.keras.layers.Flatten()
	self.fc = Dense(vocab_size, activation="softmax")
	self.max_len = max_len

	def call(self, inputs):
	x = self.embedding(inputs)
	# Masking
	mask = self.create_padding_mask(inputs)
	attn_output = self.attention(x, x, x, attention_mask=mask)
	x = x + attn_output
	x = self.norm1(x)
	x = self.dropout1(x)
	x = self.dense1(x)
	x = self.dense2(x)
	x = self.norm2(x)
	x = self.dropout2(x)
	x = self.fc(x)
	return x

	def create_padding_mask(self, seq):
	mask = tf.cast(tf.math.equal(seq, 0), tf.float32)
	return mask[:, tf.newaxis, tf.newaxis, :]
	st.title("UniGLM TEXT completion Model")
	st.subheader("Next Word Prediction AI Model by Webraft-AI")
	#Picking what NLP task you want to do
	option = st.selectbox('Model',('12M Param')) #option is stored in this variable
	#Textbox for text user is entering
	st.subheader("Enter the text you'd like to analyze.")
	text = st.text_input('Enter word: ') #text is stored in this variable

	if option == '12M Param':
	loaded_dict = np.load("dict_predict3.bin.npz", allow_pickle=True)
	word_to_num = loaded_dict["word_to_num"].item()
	num_to_word = loaded_dict["num_to_word"].item()
	X = loaded_dict["X"].item()
	Y = loaded_dict["Y"].item()
	X_train = pad_sequences([X])
	y_train = pad_sequences([Y])
	vocab_size = 100000
	max_len = 1
	d_model = 64 # 64 , 1024
	n_head = 4 # 8 , 16
	ff_dim = 256 # 256 , 2048
	dropout_rate = 0.1 # 0.5 , 0.2


	chatbot = TransformerChatbot(vocab_size, max_len, d_model, n_head, ff_dim, dropout_rate)
	chatbot.load_weights("predict3")
	chatbot.build(input_shape=(None, max_len)) # Build the model
	chatbot.compile(optimizer="adam", loss="sparse_categorical_crossentropy")
	other_text1 = text
	for i in range(1):

	other_text1 = other_text1.lower()
	other_words1 = other_text1.split()
	if len(other_words1) > 1:
	st.write("Error: Found more than 1 word . There should not be more than one word in the prompt ")
	for word in other_words1:
	if word not in word_to_num:
	st.write("Error: The word ` ",word," ` doesn't exist in the vocabulary and hence the model wasn't train on that. ")
	else:
	other_num1 = word_to_num[word]

	given_X1 = other_num1
	input_sequence1 = pad_sequences([given_X1], maxlen=max_len, padding='post')
	output_sentence = other_text1 + ""
	for _ in range(16):
	predicted_token = np.argmax(chatbot.predict(input_sequence1), axis=-1)
	predicted_token = predicted_token.item()
	out = num_to_word[predicted_token]


	output_sentence += " " + out
	if out == ".":
	break
	given_X1 = given_X1[1:]
	given_X1.append(predicted_token)
	input_sequence1 = pad_sequences([given_X1], maxlen=max_len, padding='post')
	out = output_sentence


	else:
	out = "Wrong Model"

	st.write("Predicted Text: ")
	st.write(out)