Spaces:

Obai33
/

MSC-English-Poetry-Generation

Sleeping

App Files Files Community

MSC-English-Poetry-Generation / app.py

Obai33

Update app.py

bd83e53 verified 11 months ago

raw

history blame contribute delete

2.99 kB

	# -- coding: utf-8 --
	"""Copy of english model testing.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/13LT1keMRDkMSrOYjvzkneI_PaRnLQWl0
	"""

	#!pip install gradio

	from nltk.corpus import stopwords
	import pandas as pd

	import numpy as np
	import tensorflow as tf
	from tensorflow.keras.preprocessing.text import Tokenizer
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	from tensorflow.keras.models import Sequential, Model
	from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Input, GRU

	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer
	import nltk
	import requests
	nltk.download('stopwords')
	nltk.download('wordnet')
	nltk.download('punkt')
	nltk.download('averaged_perceptron_tagger')

	eurl = 'https://raw.githubusercontent.com/sofiagiaccotto/newengpoemdatasetNLP/main/poems.txt'
	ans = requests.get(eurl)
	edf = ans.text

	tokenizer = Tokenizer()

	corpus = edf.lower().split("\n")

	tokenizer.fit_on_texts(corpus)
	total_words = len(tokenizer.word_index) + 1

	print(tokenizer.word_index)
	print(total_words)

	input_sequences = []
	for line in corpus:
	token_list = tokenizer.texts_to_sequences([line])[0]
	for i in range(1, len(token_list)):
	n_gram_sequence = token_list[:i+1]
	input_sequences.append(n_gram_sequence)

	# pad sequences
	max_sequence_len = max([len(x) for x in input_sequences])
	input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

	# create predictors and label
	xs, labels = input_sequences[:,:-1],input_sequences[:,-1]

	ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)

	import requests
	# URL of the model
	url = 'https://github.com/Obai33/NLP_PoemGenerationDatasets/raw/main/modeleng1.h5'
	# Local file path to save the model
	local_filename = 'modeleng1.h5'

	# Download the model file
	response = requests.get(url)
	with open(local_filename, 'wb') as f:
	f.write(response.content)

	# Load the pre-trained model
	model = tf.keras.models.load_model(local_filename)

	def generate_english_text(seed_text, next_words=50):
	generated_text = seed_text
	for _ in range(next_words):
	token_list = tokenizer.texts_to_sequences([generated_text])[0]
	token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
	predicted = np.argmax(model.predict(token_list), axis=-1)
	output_word = ""
	for word, index in tokenizer.word_index.items():
	if index == predicted:
	output_word = word
	break
	generated_text += " " + output_word
	return generated_text

	import gradio as gr

	# Update Gradio interface to include both Arabic and English outputs
	iface = gr.Interface(
	fn=generate_english_text,
	inputs="text",
	outputs="text",
	title="English Poetry Generation",
	description="Enter English text to generate a small poem.",
	theme="compact"
	)
	# Run the interface
	iface.launch()