# -*- coding: utf-8 -*- """Copy of english model testing.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/13LT1keMRDkMSrOYjvzkneI_PaRnLQWl0 """ #!pip install gradio from nltk.corpus import stopwords import pandas as pd import numpy as np import tensorflow as tf from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.models import Sequential, Model from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Input, GRU from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer import nltk import requests nltk.download('stopwords') nltk.download('wordnet') nltk.download('punkt') nltk.download('averaged_perceptron_tagger') eurl = 'https://raw.githubusercontent.com/sofiagiaccotto/newengpoemdatasetNLP/main/poems.txt' ans = requests.get(eurl) edf = ans.text tokenizer = Tokenizer() corpus = edf.lower().split("\n") tokenizer.fit_on_texts(corpus) total_words = len(tokenizer.word_index) + 1 print(tokenizer.word_index) print(total_words) input_sequences = [] for line in corpus: token_list = tokenizer.texts_to_sequences([line])[0] for i in range(1, len(token_list)): n_gram_sequence = token_list[:i+1] input_sequences.append(n_gram_sequence) # pad sequences max_sequence_len = max([len(x) for x in input_sequences]) input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')) # create predictors and label xs, labels = input_sequences[:,:-1],input_sequences[:,-1] ys = tf.keras.utils.to_categorical(labels, num_classes=total_words) import requests # URL of the model url = 'https://github.com/Obai33/NLP_PoemGenerationDatasets/raw/main/modeleng1.h5' # Local file path to save the model local_filename = 'modeleng1.h5' # Download the model file response = requests.get(url) with open(local_filename, 'wb') as f: f.write(response.content) # Load the pre-trained model model = tf.keras.models.load_model(local_filename) def generate_english_text(seed_text, next_words=50): generated_text = seed_text for _ in range(next_words): token_list = tokenizer.texts_to_sequences([generated_text])[0] token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre') predicted = np.argmax(model.predict(token_list), axis=-1) output_word = "" for word, index in tokenizer.word_index.items(): if index == predicted: output_word = word break generated_text += " " + output_word return generated_text import gradio as gr # Update Gradio interface to include both Arabic and English outputs iface = gr.Interface( fn=generate_english_text, inputs="text", outputs="text", title="English Poetry Generation", description="Enter English text to generate a small poem.", theme="compact" ) # Run the interface iface.launch()