Alyaboelnasr commited on
Commit
31af933
·
verified ·
1 Parent(s): 83b11d0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +250 -0
app.py ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline
4
+ import random
5
+ from gtts import gTTS
6
+ import re
7
+
8
+ # Load the BLIP model for generating captions
9
+ blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
10
+ blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
11
+
12
+
13
+ # Load GPT-2 model for story generation
14
+ story_generator = pipeline("text-generation", model="gpt2")
15
+
16
+ # Load translation model (English to Arabic)
17
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ar")
18
+
19
+ # Default characters and settings
20
+ default_characters = [
21
+ "Ali", "Fatima", "Omar", "Amina", "Zaid", "Layla", "Hassan", "Sara",
22
+ "Yusuf", "Noura", "Khalid", "Rania", "Amir", "Jasmine", "Farah", "Sami",
23
+ "Aisha", "Rami", "Zayn", "Dalia", "Bilal", "Ibtisam", "Mansour", "Afnan",
24
+ "Jamal", "Asma", "Khadija", "Hadi", "Maya", "Samir", "Nabil", "Lina",
25
+ "Tariq", "Yara", "Munir", "Ranya", "Firas", "Nadia", "Alaa", "Nida",
26
+ "Omar", "Dina", "Zein", "Rami", "Yasmin", "Salma", "Jamil", "Khaled"
27
+ ]
28
+
29
+ default_settings = [
30
+ "a mystical forest", "a bustling city", "an ancient castle", "a snowy mountain village",
31
+ "a sunny beach", "a dark cave", "a magical kingdom", "a quiet library",
32
+ "a colorful carnival", "a haunted house", "a space station", "a serene garden",
33
+ "a busy marketplace", "a futuristic city", "a pirate ship", "a wild savannah",
34
+ "a snowy tundra", "a tropical island", "an underwater city", "a hidden valley",
35
+ "a dragon's lair", "an enchanted meadow", "a witch's cottage", "a cozy cabin",
36
+ "a bustling train station", "a giant's castle", "a fairy-tale village",
37
+ "a mysterious island", "a historical battlefield", "an abandoned factory",
38
+ "a magical forest glen", "a secret garden", "a royal palace", "a shimmering lagoon",
39
+ "a giant treehouse", "a whimsical tree-lined street", "a rugged mountain range",
40
+ "a starry night sky", "a bright sunny day", "a lively zoo", "an exciting amusement park"
41
+ ]
42
+
43
+
44
+ def preprocess_image(image):
45
+ return image.convert("RGB").resize((256, 256))
46
+
47
+
48
+ def get_captions(images):
49
+ captions = []
50
+ for img in images:
51
+ processed_img = preprocess_image(img)
52
+ inputs = blip_processor(images=processed_img, return_tensors="pt")
53
+ caption = blip_model.generate(**inputs)
54
+ caption_text = blip_processor.decode(caption[0], skip_special_tokens=True)
55
+ captions.append(caption_text)
56
+ return " ".join(captions)
57
+
58
+
59
+ def clean_up_caption(caption):
60
+ caption = caption.strip().capitalize()
61
+ scene_descriptions = ["scene", "view", "image", "photo", "picture"]
62
+ for word in scene_descriptions:
63
+ caption = caption.replace(word, "").strip()
64
+
65
+ # Simplifying specific terms
66
+ caption = caption.replace("a snowy scene", "on a snowy mountain").replace("log cabin", "log cabin").strip()
67
+
68
+ return caption
69
+
70
+
71
+ def generate_relevant_setting(captions):
72
+ keywords_to_settings = {
73
+ "castle": "an ancient castle",
74
+ "forest": "a mystical forest",
75
+ "mountain": "a snowy mountain village",
76
+ "beach": "a sunny beach"
77
+ }
78
+
79
+ for keyword, setting in keywords_to_settings.items():
80
+ if keyword in captions.lower():
81
+ return setting
82
+ return "a mysterious place"
83
+
84
+
85
+ def integrate_caption_into_story(caption, character, setting):
86
+ cleaned_caption = clean_up_caption(caption)
87
+
88
+ # Lists for interaction scenarios
89
+ animals = ["cat", "dog", "bear", "lion", "eagle", "bird", "rabbit", "tiger", "elephant", "fish", "horse", "wolf",
90
+ "deer"]
91
+ beings = ["girl", "boy", "man", "woman", "child", "hero", "princess", "prince", "wizard", "witch", "monster",
92
+ "creature"]
93
+ items = ["book", "sword", "fruit", "food", "technology", "map", "key", "potion", "gem", "tool", "lantern", "shield"]
94
+
95
+ if "cabin" in cleaned_caption and "snow" in cleaned_caption:
96
+ return f"In a cozy log cabin on a snowy mountain, there lived a brave character named {character}."
97
+ elif "mountain" in cleaned_caption:
98
+ return f"High up in the mountains, {character} embarked on an adventure."
99
+ elif any(animal in cleaned_caption for animal in animals):
100
+ return f"One day, {character} encountered a {cleaned_caption} that changed everything."
101
+ elif any(being in cleaned_caption for being in beings):
102
+ return f"A {cleaned_caption} approached {character} with a sense of wonder."
103
+ elif "treasure" in cleaned_caption:
104
+ return f"{character} stumbled upon a treasure in the {cleaned_caption}, which held many secrets."
105
+ elif any(item in cleaned_caption for item in items):
106
+ return f"{character} picked up a {cleaned_caption}, which turned out to be very special."
107
+ else:
108
+ return f"{character} was surrounded by {cleaned_caption} during their adventure."
109
+
110
+
111
+ def trim_story(story, max_lines, max_words=None):
112
+ # Split the story into sentences using regex to handle punctuation correctly
113
+ sentences = re.split(r'(?<=[.!?]) +', story.strip())
114
+ trimmed_lines = []
115
+ line_count = 0
116
+ word_count = 0
117
+
118
+ for sentence in sentences:
119
+ sentence_word_count = len(sentence.split())
120
+
121
+ # If max_words is specified, check against it
122
+ if max_words is not None and (word_count + sentence_word_count) > max_words:
123
+ break
124
+
125
+ # Check if adding this sentence would exceed max_lines
126
+ if line_count < max_lines:
127
+ trimmed_lines.append(sentence.strip())
128
+ line_count += 1
129
+ word_count += sentence_word_count
130
+ else:
131
+ break
132
+
133
+
134
+ return ' '.join(trimmed_lines)
135
+
136
+
137
+ def generate_story_from_images(images, story_length, character, setting, tone, language):
138
+ try:
139
+ if isinstance(images, Image.Image):
140
+ images = [images]
141
+
142
+ images = [Image.fromarray(image) if not isinstance(image, Image.Image) else image for image in images]
143
+ combined_captions = get_captions(images)
144
+
145
+ if not character.strip():
146
+ character = random.choice(default_characters) # Random character
147
+ if not setting.strip():
148
+ setting = generate_relevant_setting(combined_captions)
149
+
150
+ # Set the maximum tokens and lines based on story length
151
+ if story_length == "Short":
152
+ max_new_tokens = 50
153
+ max_lines = 5
154
+ elif story_length == "Medium":
155
+ max_new_tokens = 100
156
+ max_lines = 15
157
+ else: # Long
158
+ max_new_tokens = 300
159
+ max_lines = 90
160
+
161
+ caption_in_story = integrate_caption_into_story(combined_captions, character, setting)
162
+
163
+ story_prompt = (
164
+ f"Once upon a time, in {setting}, there was a kind and brave character named {character}. "
165
+ f"{caption_in_story} They had many adventures filled with fun and wonder."
166
+ ) if tone == "Kids Story" else (
167
+ f"In {setting}, a character named {character} faced challenges and deep emotions. "
168
+ f"{caption_in_story} Their journey was filled with tension, suspense, and moments of heartfelt struggle."
169
+ )
170
+
171
+ # Check length and truncate if necessary
172
+ if len(story_prompt) > 1024: # Limit input length to prevent exceeding model limits
173
+ story_prompt = story_prompt[:1024]
174
+
175
+ # Generate the story
176
+ story = story_generator(
177
+ story_prompt,
178
+ max_new_tokens=max_new_tokens,
179
+ truncation=True,
180
+ pad_token_id=story_generator.tokenizer.eos_token_id,
181
+ num_return_sequences=1
182
+ )
183
+ generated_story = story[0]['generated_text']
184
+ trimmed_story = trim_story(generated_story, max_lines)
185
+
186
+ if language == "Arabic":
187
+ trimmed_story = translator(trimmed_story)[0]['translation_text']
188
+
189
+ return combined_captions, trimmed_story
190
+
191
+ except Exception as e:
192
+ return str(e), "Error generating story."
193
+
194
+
195
+ def text_to_speech(story_text, language):
196
+ lang = 'ar' if language == "Arabic" else 'en'
197
+ tts = gTTS(text=story_text, lang=lang)
198
+ audio_file = "story.mp3"
199
+ tts.save(audio_file)
200
+ return audio_file
201
+
202
+
203
+ # Create a Gradio interface
204
+ with gr.Blocks() as interface:
205
+ with gr.Row():
206
+ image_input = gr.Image(type="pil", label="Upload Images")
207
+
208
+ character_setting_language = gr.Row()
209
+ character_input = gr.Textbox(label="Character Name (Optional)",
210
+ placeholder="Enter character name...")
211
+ setting_input = gr.Textbox(label="Setting (Optional)", placeholder="Enter story setting...")
212
+ story_length = gr.Radio(
213
+ choices=["Short", "Medium", "Long"],
214
+ label="Select Story Length",
215
+ value="Medium" # Default value
216
+ )
217
+
218
+ language = gr.Radio(
219
+ choices=["English", "Arabic"],
220
+ label="Select Story Language",
221
+ value="English" # Default value
222
+ )
223
+
224
+ tone = gr.Radio(
225
+ choices=["Kids Story", "Drama"],
226
+ label="Select Story Style/Tone",
227
+ value="Kids Story" # Default value
228
+ )
229
+
230
+ image_descriptions = gr.Textbox(label="Image Descriptions", interactive=False)
231
+ generated_story = gr.Textbox(label="Generated Story", interactive=False)
232
+
233
+ submit_button = gr.Button("Generate Story")
234
+ listen_button = gr.Button("Listen to Story")
235
+
236
+ submit_button.click(
237
+ fn=generate_story_from_images,
238
+ inputs=[image_input, story_length, character_input, setting_input, tone, language],
239
+ outputs=[image_descriptions, generated_story]
240
+ )
241
+
242
+ listen_button.click(
243
+ fn=text_to_speech,
244
+ inputs=[generated_story, language],
245
+ outputs=gr.Audio(label="Story Audio", type="filepath") # Output audio file
246
+ )
247
+
248
+ # Launch the app
249
+ if __name__ == "__main__":
250
+ interface.launch()