Obai33's picture
Update app.py
bd83e53 verified
# -*- coding: utf-8 -*-
"""Copy of english model testing.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/13LT1keMRDkMSrOYjvzkneI_PaRnLQWl0
"""
#!pip install gradio
from nltk.corpus import stopwords
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Input, GRU
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import nltk
import requests
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
eurl = 'https://raw.githubusercontent.com/sofiagiaccotto/newengpoemdatasetNLP/main/poems.txt'
ans = requests.get(eurl)
edf = ans.text
tokenizer = Tokenizer()
corpus = edf.lower().split("\n")
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1
print(tokenizer.word_index)
print(total_words)
input_sequences = []
for line in corpus:
token_list = tokenizer.texts_to_sequences([line])[0]
for i in range(1, len(token_list)):
n_gram_sequence = token_list[:i+1]
input_sequences.append(n_gram_sequence)
# pad sequences
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
# create predictors and label
xs, labels = input_sequences[:,:-1],input_sequences[:,-1]
ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)
import requests
# URL of the model
url = 'https://github.com/Obai33/NLP_PoemGenerationDatasets/raw/main/modeleng1.h5'
# Local file path to save the model
local_filename = 'modeleng1.h5'
# Download the model file
response = requests.get(url)
with open(local_filename, 'wb') as f:
f.write(response.content)
# Load the pre-trained model
model = tf.keras.models.load_model(local_filename)
def generate_english_text(seed_text, next_words=50):
generated_text = seed_text
for _ in range(next_words):
token_list = tokenizer.texts_to_sequences([generated_text])[0]
token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
predicted = np.argmax(model.predict(token_list), axis=-1)
output_word = ""
for word, index in tokenizer.word_index.items():
if index == predicted:
output_word = word
break
generated_text += " " + output_word
return generated_text
import gradio as gr
# Update Gradio interface to include both Arabic and English outputs
iface = gr.Interface(
fn=generate_english_text,
inputs="text",
outputs="text",
title="English Poetry Generation",
description="Enter English text to generate a small poem.",
theme="compact"
)
# Run the interface
iface.launch()