Obai33 commited on
Commit
fefdc0e
·
verified ·
1 Parent(s): 4ba1016
Files changed (1) hide show
  1. app.py +101 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Copy of english model testing.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/13LT1keMRDkMSrOYjvzkneI_PaRnLQWl0
8
+ """
9
+
10
+ !pip install gradio
11
+
12
+ from nltk.corpus import stopwords
13
+ import pandas as pd
14
+
15
+ import numpy as np
16
+ import tensorflow as tf
17
+ from tensorflow.keras.preprocessing.text import Tokenizer
18
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
19
+ from tensorflow.keras.models import Sequential, Model
20
+ from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Input, GRU
21
+
22
+ from nltk.corpus import stopwords
23
+ from nltk.stem import WordNetLemmatizer
24
+ import nltk
25
+ import requests
26
+ nltk.download('stopwords')
27
+ nltk.download('wordnet')
28
+ nltk.download('punkt')
29
+ nltk.download('averaged_perceptron_tagger')
30
+
31
+ eurl = 'https://raw.githubusercontent.com/sofiagiaccotto/newengpoemdatasetNLP/main/poems.txt'
32
+ ans = requests.get(eurl)
33
+ edf = ans.text
34
+
35
+ tokenizer = Tokenizer()
36
+
37
+ corpus = edf.lower().split("\n")
38
+
39
+ tokenizer.fit_on_texts(corpus)
40
+ total_words = len(tokenizer.word_index) + 1
41
+
42
+ print(tokenizer.word_index)
43
+ print(total_words)
44
+
45
+ input_sequences = []
46
+ for line in corpus:
47
+ token_list = tokenizer.texts_to_sequences([line])[0]
48
+ for i in range(1, len(token_list)):
49
+ n_gram_sequence = token_list[:i+1]
50
+ input_sequences.append(n_gram_sequence)
51
+
52
+ # pad sequences
53
+ max_sequence_len = max([len(x) for x in input_sequences])
54
+ input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
55
+
56
+ # create predictors and label
57
+ xs, labels = input_sequences[:,:-1],input_sequences[:,-1]
58
+
59
+ ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)
60
+
61
+ import requests
62
+ # URL of the model
63
+ url = 'https://github.com/Obai33/NLP_PoemGenerationDatasets/raw/main/modeleng1.h5'
64
+ # Local file path to save the model
65
+ local_filename = 'modeleng1.h5'
66
+
67
+ # Download the model file
68
+ response = requests.get(url)
69
+ with open(local_filename, 'wb') as f:
70
+ f.write(response.content)
71
+
72
+ # Load the pre-trained model
73
+ model = tf.keras.models.load_model(local_filename)
74
+
75
+ def generate_english_text(seed_text, next_words=50):
76
+ generated_text = seed_text
77
+ for _ in range(next_words):
78
+ token_list = tokenizer.texts_to_sequences([generated_text])[0]
79
+ token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
80
+ predicted = np.argmax(model.predict(token_list), axis=-1)
81
+ output_word = ""
82
+ for word, index in tokenizer.word_index.items():
83
+ if index == predicted:
84
+ output_word = word
85
+ break
86
+ generated_text += " " + output_word
87
+ return generated_text
88
+
89
+ import gradio as gr
90
+
91
+ # Update Gradio interface to include both Arabic and English outputs
92
+ iface = gr.Interface(
93
+ fn=generate_english_text,
94
+ inputs="text",
95
+ outputs="text",
96
+ title="English Poetry Generation",
97
+ description="Enter English text to generate a small poem.",
98
+ theme="compact"
99
+ )
100
+ # Run the interface
101
+ iface.launch()