openfree commited on
Commit
75c3863
Β·
verified Β·
1 Parent(s): 604df90

Update app-backup1.py

Browse files
Files changed (1) hide show
  1. app-backup1.py +472 -79
app-backup1.py CHANGED
@@ -55,24 +55,68 @@ hf_token = os.getenv("HF_TOKEN")
55
  openai_api_key = os.getenv("OPENAI_API_KEY")
56
  client = OpenAI(api_key=openai_api_key)
57
 
58
- # Initialize translation pipeline
59
- translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
 
 
 
 
 
 
60
 
61
  # Korean text detection function
62
  def contains_korean(text):
63
  korean_pattern = re.compile('[γ„±-γ…Žγ…-γ…£κ°€-힣]')
64
  return bool(korean_pattern.search(text))
65
 
66
- def translate_korean_prompt(prompt):
67
  """
68
  Translate Korean prompt to English if Korean text is detected
 
69
  """
70
- if contains_korean(prompt):
71
- translated = translator(prompt)[0]['translation_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  print(f"Original Korean prompt: {prompt}")
73
- print(f"Translated English prompt: {translated}")
74
- return translated
75
- return prompt
 
 
 
76
 
77
  def enhance_prompt(prompt, type="t2v"):
78
  system_prompt = system_prompt_t2v if type == "t2v" else system_prompt_i2v
@@ -195,58 +239,55 @@ pipeline = XoraVideoPipeline(
195
  vae=vae,
196
  ).to(device)
197
 
 
 
 
 
 
 
 
 
 
198
  # Preset options for resolution and frame configuration
199
  # Convert frames to seconds assuming 25 FPS
200
  preset_options = [
201
- {"label": "1216x704, 1.6초", "width": 1216, "height": 704, "num_frames": 41},
202
- {"label": "1088x704, 2.0초", "width": 1088, "height": 704, "num_frames": 49},
203
- {"label": "1056x640, 2.3초", "width": 1056, "height": 640, "num_frames": 57},
204
- {"label": "992x608, 2.6초", "width": 992, "height": 608, "num_frames": 65},
205
- {"label": "896x608, 2.9초", "width": 896, "height": 608, "num_frames": 73},
206
- {"label": "896x544, 3.2초", "width": 896, "height": 544, "num_frames": 81},
207
- {"label": "832x544, 3.6초", "width": 832, "height": 544, "num_frames": 89},
208
- {"label": "800x512, 3.9초", "width": 800, "height": 512, "num_frames": 97},
209
- {"label": "768x512, 3.9초", "width": 768, "height": 512, "num_frames": 97},
210
- {"label": "800x480, 4.2초", "width": 800, "height": 480, "num_frames": 105},
211
- {"label": "736x480, 4.5초", "width": 736, "height": 480, "num_frames": 113},
212
- {"label": "704x480, 4.8초", "width": 704, "height": 480, "num_frames": 121},
213
- {"label": "704x448, 5.2초", "width": 704, "height": 448, "num_frames": 129},
214
- {"label": "672x448, 5.5초", "width": 672, "height": 448, "num_frames": 137},
215
- {"label": "640x416, 6.1초", "width": 640, "height": 416, "num_frames": 153},
216
- {"label": "672x384, 6.4초", "width": 672, "height": 384, "num_frames": 161},
217
- {"label": "640x384, 6.8초", "width": 640, "height": 384, "num_frames": 169},
218
- {"label": "608x384, 7.1초", "width": 608, "height": 384, "num_frames": 177},
219
- {"label": "576x384, 7.4초", "width": 576, "height": 384, "num_frames": 185},
220
- {"label": "608x352, 7.7초", "width": 608, "height": 352, "num_frames": 193},
221
- {"label": "576x352, 8.0초", "width": 576, "height": 352, "num_frames": 201},
222
- {"label": "544x352, 8.4초", "width": 544, "height": 352, "num_frames": 209},
223
- {"label": "512x352, 9.3초", "width": 512, "height": 352, "num_frames": 233},
224
- {"label": "544x320, 9.6초", "width": 544, "height": 320, "num_frames": 241},
225
- {"label": "512x320, 10.3초", "width": 512, "height": 320, "num_frames": 257},
226
  ]
227
 
228
  def preset_changed(preset):
229
- if preset != "Custom":
230
- selected = next(item for item in preset_options if item["label"] == preset)
231
- # height, width, num_frames 값을 global λ³€μˆ˜λ‘œ μ—…λ°μ΄νŠΈ
232
- return (
233
- selected["height"],
234
- selected["width"],
235
- selected["num_frames"],
236
- gr.update(visible=False),
237
- gr.update(visible=False),
238
- gr.update(visible=False),
239
- )
240
- else:
241
- return (
242
- None,
243
- None,
244
- None,
245
- gr.update(visible=True),
246
- gr.update(visible=True),
247
- gr.update(visible=True),
248
- )
249
-
250
 
251
  def generate_video_from_text(
252
  prompt="",
@@ -256,8 +297,8 @@ def generate_video_from_text(
256
  seed=171198,
257
  num_inference_steps=41,
258
  guidance_scale=4,
259
- height=512,
260
- width=320,
261
  num_frames=257,
262
  progress=gr.Progress(),
263
  ):
@@ -335,11 +376,11 @@ def generate_video_from_image(
335
  negative_prompt="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
336
  frame_rate=25,
337
  seed=171198,
338
- num_inference_steps=50,
339
  guidance_scale=4,
340
- height=512,
341
- width=768,
342
- num_frames=121,
343
  progress=gr.Progress(),
344
  ):
345
  print("Height: ", height)
@@ -432,7 +473,7 @@ def create_advanced_options():
432
  minimum=1,
433
  maximum=50,
434
  step=1,
435
- value=50,
436
  visible=False
437
  )
438
  guidance_scale = gr.Slider(
@@ -448,7 +489,7 @@ def create_advanced_options():
448
  minimum=256,
449
  maximum=1024,
450
  step=64,
451
- value=512,
452
  visible=False,
453
  )
454
  width_slider = gr.Slider(
@@ -456,7 +497,7 @@ def create_advanced_options():
456
  minimum=256,
457
  maximum=1024,
458
  step=64,
459
- value=768,
460
  visible=False,
461
  )
462
  num_frames_slider = gr.Slider(
@@ -464,7 +505,7 @@ def create_advanced_options():
464
  minimum=1,
465
  maximum=200,
466
  step=1,
467
- value=121,
468
  visible=False,
469
  )
470
 
@@ -477,6 +518,180 @@ def create_advanced_options():
477
  num_frames_slider,
478
  ]
479
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
480
  # Gradio Interface Definition
481
  with gr.Blocks(theme=gr.themes.Soft()) as iface:
482
  with gr.Tabs():
@@ -504,14 +719,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
504
  visible=False
505
  )
506
 
507
- # ν˜„μž¬ μ„ νƒλœ 값듀을 μ €μž₯ν•  μƒνƒœ λ³€μˆ˜λ“€
508
- txt2vid_current_height = gr.State(value=512)
509
- txt2vid_current_width = gr.State(value=320)
510
- txt2vid_current_num_frames = gr.State(value=257)
511
-
512
  txt2vid_preset = gr.Dropdown(
513
  choices=[p["label"] for p in preset_options],
514
- value="512x320, 10.3초",
515
  label="Step 2: 해상도 프리셋 선택",
516
  )
517
 
@@ -562,14 +772,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
562
  visible=False
563
  )
564
 
565
- # ν˜„μž¬ μ„ νƒλœ 값듀을 μ €μž₯ν•  μƒνƒœ λ³€μˆ˜λ“€
566
- img2vid_current_height = gr.State(value=512)
567
- img2vid_current_width = gr.State(value=768)
568
- img2vid_current_num_frames = gr.State(value=97)
569
-
570
  img2vid_preset = gr.Dropdown(
571
  choices=[p["label"] for p in preset_options],
572
- value="512x320, 10.3초",
573
  label="Step 3: 해상도 프리셋 선택",
574
  )
575
 
@@ -592,7 +797,101 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
592
  with gr.Column():
593
  img2vid_output = gr.Video(label="μƒμ„±λœ λΉ„λ””μ˜€")
594
 
595
- # Event handlers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
596
  txt2vid_preset.change(
597
  fn=preset_changed,
598
  inputs=[txt2vid_preset],
@@ -617,7 +916,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
617
  txt2vid_enhance_toggle,
618
  txt2vid_negative_prompt,
619
  txt2vid_frame_rate,
620
- *txt2vid_advanced[:3], # seed, inference_steps, guidance_scale
621
  txt2vid_current_height,
622
  txt2vid_current_width,
623
  txt2vid_current_num_frames,
@@ -653,7 +952,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
653
  img2vid_enhance_toggle,
654
  img2vid_negative_prompt,
655
  img2vid_frame_rate,
656
- *img2vid_advanced[:3], # seed, inference_steps, guidance_scale
657
  img2vid_current_height,
658
  img2vid_current_width,
659
  img2vid_current_num_frames,
@@ -664,6 +963,100 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
664
  queue=True,
665
  )
666
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
667
  if __name__ == "__main__":
668
  iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(
669
  share=True, show_api=False
 
55
  openai_api_key = os.getenv("OPENAI_API_KEY")
56
  client = OpenAI(api_key=openai_api_key)
57
 
58
+ # Initialize translation pipeline with device and clean_up settings
59
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
60
+ translator = pipeline(
61
+ "translation",
62
+ model="Helsinki-NLP/opus-mt-ko-en",
63
+ device=device,
64
+ clean_up_tokenization_spaces=True
65
+ )
66
 
67
  # Korean text detection function
68
  def contains_korean(text):
69
  korean_pattern = re.compile('[γ„±-γ…Žγ…-γ…£κ°€-힣]')
70
  return bool(korean_pattern.search(text))
71
 
72
+ def translate_korean_prompt(prompt, max_length=450):
73
  """
74
  Translate Korean prompt to English if Korean text is detected
75
+ Split long text into chunks if necessary
76
  """
77
+ if not contains_korean(prompt):
78
+ return prompt
79
+
80
+ # Split long text into chunks
81
+ def split_text(text, max_length):
82
+ words = text.split()
83
+ chunks = []
84
+ current_chunk = []
85
+ current_length = 0
86
+
87
+ for word in words:
88
+ if current_length + len(word) + 1 > max_length:
89
+ chunks.append(' '.join(current_chunk))
90
+ current_chunk = [word]
91
+ current_length = len(word)
92
+ else:
93
+ current_chunk.append(word)
94
+ current_length += len(word) + 1
95
+
96
+ if current_chunk:
97
+ chunks.append(' '.join(current_chunk))
98
+ return chunks
99
+
100
+ try:
101
+ if len(prompt) > max_length:
102
+ chunks = split_text(prompt, max_length)
103
+ translated_chunks = []
104
+
105
+ for chunk in chunks:
106
+ translated = translator(chunk, max_length=512)[0]['translation_text']
107
+ translated_chunks.append(translated)
108
+
109
+ final_translation = ' '.join(translated_chunks)
110
+ else:
111
+ final_translation = translator(prompt, max_length=512)[0]['translation_text']
112
+
113
  print(f"Original Korean prompt: {prompt}")
114
+ print(f"Translated English prompt: {final_translation}")
115
+ return final_translation
116
+
117
+ except Exception as e:
118
+ print(f"Translation error: {e}")
119
+ return prompt # Return original prompt if translation fails
120
 
121
  def enhance_prompt(prompt, type="t2v"):
122
  system_prompt = system_prompt_t2v if type == "t2v" else system_prompt_i2v
 
239
  vae=vae,
240
  ).to(device)
241
 
242
+ # State λ³€μˆ˜λ“€μ˜ μ΄ˆκΈ°ν™” μˆ˜μ •
243
+ txt2vid_current_height = gr.State(value=320)
244
+ txt2vid_current_width = gr.State(value=512)
245
+ txt2vid_current_num_frames = gr.State(value=257)
246
+
247
+ img2vid_current_height = gr.State(value=320)
248
+ img2vid_current_width = gr.State(value=512)
249
+ img2vid_current_num_frames = gr.State(value=257)
250
+
251
  # Preset options for resolution and frame configuration
252
  # Convert frames to seconds assuming 25 FPS
253
  preset_options = [
254
+ {"label": "[16:9 HD] 1216x704, 1.6초", "width": 1216, "height": 704, "num_frames": 41},
255
+ {"label": "[16:9] 1088x704, 2.0초", "width": 1088, "height": 704, "num_frames": 49},
256
+ {"label": "[16:9] 1056x640, 2.3초", "width": 1056, "height": 640, "num_frames": 57},
257
+ {"label": "[16:9] 992x608, 2.6초", "width": 992, "height": 608, "num_frames": 65},
258
+ {"label": "[16:9] 896x608, 2.9초", "width": 896, "height": 608, "num_frames": 73},
259
+ {"label": "[16:9] 896x544, 3.2초", "width": 896, "height": 544, "num_frames": 81},
260
+ {"label": "[16:9] 832x544, 3.6초", "width": 832, "height": 544, "num_frames": 89},
261
+ {"label": "[16:9] 800x512, 3.9초", "width": 800, "height": 512, "num_frames": 97},
262
+ {"label": "[16:9] 768x512, 3.9초", "width": 768, "height": 512, "num_frames": 97},
263
+ {"label": "[16:9] 800x480, 4.2초", "width": 800, "height": 480, "num_frames": 105},
264
+ {"label": "[16:9] 736x480, 4.5초", "width": 736, "height": 480, "num_frames": 113},
265
+ {"label": "[3:2] 704x480, 4.8초", "width": 704, "height": 480, "num_frames": 121},
266
+ {"label": "[16:9] 704x448, 5.2초", "width": 704, "height": 448, "num_frames": 129},
267
+ {"label": "[16:9] 672x448, 5.5초", "width": 672, "height": 448, "num_frames": 137},
268
+ {"label": "[16:9] 640x416, 6.1초", "width": 640, "height": 416, "num_frames": 153},
269
+ {"label": "[16:9] 672x384, 6.4초", "width": 672, "height": 384, "num_frames": 161},
270
+ {"label": "[16:9] 640x384, 6.8초", "width": 640, "height": 384, "num_frames": 169},
271
+ {"label": "[16:9] 608x384, 7.1초", "width": 608, "height": 384, "num_frames": 177},
272
+ {"label": "[16:9] 576x384, 7.4초", "width": 576, "height": 384, "num_frames": 185},
273
+ {"label": "[16:9] 608x352, 7.7초", "width": 608, "height": 352, "num_frames": 193},
274
+ {"label": "[16:9] 576x352, 8.0초", "width": 576, "height": 352, "num_frames": 201},
275
+ {"label": "[16:9] 544x352, 8.4초", "width": 544, "height": 352, "num_frames": 209},
276
+ {"label": "[3:2] 512x352, 9.3초", "width": 512, "height": 352, "num_frames": 233},
277
+ {"label": "[16:9] 544x320, 9.6초", "width": 544, "height": 320, "num_frames": 241},
278
+ {"label": "[16:9] 512x320, 10.3초", "width": 512, "height": 320, "num_frames": 257},
279
  ]
280
 
281
  def preset_changed(preset):
282
+ selected = next(item for item in preset_options if item["label"] == preset)
283
+ return [
284
+ selected["height"],
285
+ selected["width"],
286
+ selected["num_frames"],
287
+ gr.update(visible=False),
288
+ gr.update(visible=False),
289
+ gr.update(visible=False),
290
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
  def generate_video_from_text(
293
  prompt="",
 
297
  seed=171198,
298
  num_inference_steps=41,
299
  guidance_scale=4,
300
+ height=320,
301
+ width=512,
302
  num_frames=257,
303
  progress=gr.Progress(),
304
  ):
 
376
  negative_prompt="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
377
  frame_rate=25,
378
  seed=171198,
379
+ num_inference_steps=41,
380
  guidance_scale=4,
381
+ height=320,
382
+ width=512,
383
+ num_frames=257,
384
  progress=gr.Progress(),
385
  ):
386
  print("Height: ", height)
 
473
  minimum=1,
474
  maximum=50,
475
  step=1,
476
+ value=41,
477
  visible=False
478
  )
479
  guidance_scale = gr.Slider(
 
489
  minimum=256,
490
  maximum=1024,
491
  step=64,
492
+ value=320,
493
  visible=False,
494
  )
495
  width_slider = gr.Slider(
 
497
  minimum=256,
498
  maximum=1024,
499
  step=64,
500
+ value=512,
501
  visible=False,
502
  )
503
  num_frames_slider = gr.Slider(
 
505
  minimum=1,
506
  maximum=200,
507
  step=1,
508
+ value=257,
509
  visible=False,
510
  )
511
 
 
518
  num_frames_slider,
519
  ]
520
 
521
+ system_prompt_scenario = """당신은 μ˜μƒ μŠ€ν¬λ¦½νŠΈμ— λ§žλŠ” λ°°κ²½ μ˜μƒμ„ μƒμ„±ν•˜κΈ° μœ„ν•œ ν”„λ‘¬ν”„νŠΈ μ „λ¬Έκ°€μž…λ‹ˆλ‹€.
522
+ 주어진 슀크립트의 λΆ„μœ„κΈ°μ™€ λ§₯락을 μ‹œκ°μ  배경으둜 ν‘œν˜„ν•˜λ˜, λ‹€μŒ 원칙을 λ°˜λ“œμ‹œ μ€€μˆ˜ν•˜μ„Έμš”:
523
+
524
+ 1. μ œν’ˆμ΄λ‚˜ μ„œλΉ„μŠ€λ₯Ό μ§μ ‘μ μœΌλ‘œ λ¬˜μ‚¬ν•˜μ§€ 말 것
525
+ 2. 슀크립트의 감성과 ν†€μ•€λ§€λ„ˆλ₯Ό ν‘œν˜„ν•˜λŠ” λ°°κ²½ μ˜μƒμ— 집쀑할 것
526
+ 3. 5개 μ„Ήμ…˜μ΄ ν•˜λ‚˜μ˜ μ΄μ•ΌκΈ°μ²˜λŸΌ μžμ—°μŠ€λŸ½κ²Œ μ—°κ²°λ˜λ„λ‘ ν•  것
527
+ 4. 좔상적이고 μ€μœ μ μΈ μ‹œκ° ν‘œν˜„μ„ ν™œμš©ν•  것
528
+
529
+ 각 μ„Ήμ…˜λ³„ ν”„λ‘¬ν”„νŠΈ μž‘μ„± κ°€μ΄λ“œ:
530
+ 1. λ°°κ²½ 및 ν•„μš”μ„±: 주제의 μ „λ°˜μ μΈ λΆ„μœ„κΈ°λ₯Ό ν‘œν˜„ν•˜λŠ” λ°°κ²½ 씬
531
+ 2. 문제 제기: κΈ΄μž₯κ°μ΄λ‚˜ κ°ˆλ“±μ„ μ•”μ‹œν•˜λŠ” λΆ„μœ„κΈ° μžˆλŠ” λ°°κ²½
532
+ 3. ν•΄κ²°μ±… μ œμ‹œ: 희망적이고 밝은 ν†€μ˜ λ°°κ²½ μ „ν™˜
533
+ 4. λ³Έλ‘ : μ•ˆμ •κ° 있고 신뒰도λ₯Ό λ†’μ΄λŠ” λ°°κ²½
534
+ 5. κ²°λ‘ : μž„νŒ©νŠΈ μžˆλŠ” 마무리λ₯Ό μœ„ν•œ 역동적인 λ°°κ²½
535
+
536
+ λͺ¨λ“  μ„Ήμ…˜μ΄ μΌκ΄€λœ μŠ€νƒ€μΌκ³Ό 톀을 μœ μ§€ν•˜λ©΄μ„œλ„ μžμ—°μŠ€λŸ½κ²Œ 이어지도둝 κ΅¬μ„±ν•˜μ„Έμš”.
537
+
538
+ 각 μ„Ήμ…˜μ˜ ν”„λ‘¬ν”„νŠΈ μž‘μ„±μ‹œ λ°˜λ“œμ‹œ λ‹€μŒ ꡬ쑰에 맞게 κ°œμ„ ν•΄μ£Όμ„Έμš”:
539
+ 1. μ£Όμš” λ™μž‘μ„ λͺ…ν™•ν•œ ν•œ λ¬Έμž₯으둜 μ‹œμž‘
540
+ 2. ꡬ체적인 λ™μž‘κ³Ό 제슀처λ₯Ό μ‹œκ°„ μˆœμ„œλŒ€λ‘œ μ„€λͺ…
541
+ 3. 캐릭터/객체의 μ™Έλͺ¨λ₯Ό μƒμ„Ένžˆ λ¬˜μ‚¬
542
+ 4. λ°°κ²½κ³Ό ν™˜κ²½ μ„ΈλΆ€ 사항을 ꡬ체적으둜 포함
543
+ 5. 카메라 각도와 μ›€μ§μž„μ„ λͺ…μ‹œ
544
+ 6. μ‘°λͺ…κ³Ό 색상을 μžμ„Ένžˆ μ„€λͺ…
545
+ 7. λ³€ν™”λ‚˜ κ°‘μž‘μŠ€λŸ¬μš΄ 사건을 μžμ—°μŠ€λŸ½κ²Œ 포함
546
+ λͺ¨λ“  μ„€λͺ…은 ν•˜λ‚˜μ˜ μžμ—°μŠ€λŸ¬μš΄ λ¬Έλ‹¨μœΌλ‘œ μž‘μ„±ν•˜κ³ ,
547
+ 촬영 감독이 촬영 λͺ©λ‘μ„ μ„€λͺ…ν•˜λŠ” κ²ƒμ²˜λŸΌ ꡬ체적이고 μ‹œκ°μ μœΌλ‘œ μž‘μ„±ν•˜μ„Έμš”.
548
+ 200단어λ₯Ό λ„˜μ§€ μ•Šλ„λ‘ ν•˜λ˜, μ΅œλŒ€ν•œ μƒμ„Έν•˜κ²Œ μž‘μ„±ν•˜μ„Έμš”.
549
+
550
+ """
551
+
552
+
553
+ def analyze_scenario(scenario):
554
+ """μ‹œλ‚˜λ¦¬μ˜€λ₯Ό λΆ„μ„ν•˜μ—¬ λ°°κ²½ μ˜μƒμš© ν”„λ‘¬ν”„νŠΈ 생성"""
555
+ messages = [
556
+ {"role": "system", "content": system_prompt_scenario},
557
+ {"role": "user", "content": f"""
558
+ λ‹€μŒ 슀크립트의 λΆ„μœ„κΈ°μ™€ 감성을 ν‘œν˜„ν•  수 μžˆλŠ” λ°°κ²½ μ˜μƒ ν”„λ‘¬ν”„νŠΈλ₯Ό μƒμ„±ν•΄μ£Όμ„Έμš”:
559
+
560
+ {scenario}
561
+
562
+ 각 μ„Ήμ…˜λ³„λ‘œ 직접적인 μ œν’ˆ λ¬˜μ‚¬λŠ” ν”Όν•˜κ³ , 슀크립트의 감성을 ν‘œν˜„ν•˜λŠ” λ°°κ²½ μ˜μƒμ— μ§‘μ€‘ν•΄μ£Όμ„Έμš”."""},
563
+ ]
564
+
565
+ try:
566
+ response = client.chat.completions.create(
567
+ model="gpt-4-1106-preview",
568
+ messages=messages,
569
+ max_tokens=2000,
570
+ )
571
+ prompts = response.choices[0].message.content.strip().split("\n\n")
572
+
573
+ # ν”„λ‘¬ν”„νŠΈ 처리 λ‘œμ§μ€ 동일
574
+ section_prompts = []
575
+ current_section = ""
576
+ for line in prompts:
577
+ if line.strip():
578
+ if any(section in line for section in ["1.", "2.", "3.", "4.", "5."]):
579
+ if current_section:
580
+ section_prompts.append(current_section)
581
+ current_section = line
582
+ else:
583
+ current_section += "\n" + line
584
+ if current_section:
585
+ section_prompts.append(current_section)
586
+
587
+ while len(section_prompts) < 5:
588
+ section_prompts.append("μΆ”κ°€ μ„Ήμ…˜μ΄ ν•„μš”ν•©λ‹ˆλ‹€.")
589
+ return section_prompts[:5]
590
+ except Exception as e:
591
+ print(f"Error during scenario analysis: {e}")
592
+ return ["Error occurred during analysis"] * 5
593
+
594
+ def generate_section_video(prompt, preset, section_number=1, base_seed=171198, progress=gr.Progress()):
595
+ """각 μ„Ήμ…˜μ˜ λΉ„λ””μ˜€ 생성 - μ—λŸ¬ 처리 μΆ”κ°€"""
596
+ try:
597
+ if not prompt or len(prompt.strip()) < 50:
598
+ raise gr.Error("ν”„λ‘¬ν”„νŠΈλŠ” μ΅œμ†Œ 50자 이상이어야 ν•©λ‹ˆλ‹€.")
599
+
600
+ selected = next(item for item in preset_options if item["label"] == preset)
601
+ section_seed = base_seed + section_number
602
+
603
+ return generate_video_from_text(
604
+ prompt=prompt,
605
+ height=selected["height"],
606
+ width=selected["width"],
607
+ num_frames=selected["num_frames"],
608
+ seed=section_seed,
609
+ progress=progress
610
+ )
611
+ except Exception as e:
612
+ print(f"Error in section {section_number}: {e}")
613
+ raise gr.Error(f"μ„Ήμ…˜ {section_number} 생성 쀑 였λ₯˜: {str(e)}")
614
+
615
+
616
+ # κ°œλ³„ μ„Ήμ…˜ ν”„λ‘¬ν”„νŠΈ 생성 ν•¨μˆ˜ μΆ”κ°€
617
+ def generate_single_section_prompt(scenario, section_number):
618
+ """κ°œλ³„ μ„Ήμ…˜μ— λŒ€ν•œ ν”„λ‘¬ν”„νŠΈ 생성"""
619
+ section_descriptions = {
620
+ 1: "λ°°κ²½ 및 ν•„μš”μ„±: 주제의 μ „λ°˜μ μΈ λΆ„μœ„κΈ°λ₯Ό ν‘œν˜„ν•˜λŠ” λ°°κ²½ 씬",
621
+ 2: "ν₯λ―Έ 유발: ν₯λ―Έλ₯Ό μœ λ°œν•˜κ³  κΈ°λŒ€κ°μ„ μ¦ν­μ‹œν‚€λŠ” λ°°κ²½",
622
+ 3: "ν•΄κ²°μ±… μ œμ‹œ: 희망적이고 밝��� ν†€μ˜ λ°°κ²½ μ „ν™˜",
623
+ 4: "λ³Έλ‘ : μ•ˆμ •κ° 있고 신뒰도λ₯Ό λ†’μ΄λŠ” λ°°κ²½",
624
+ 5: "κ²°λ‘ : μž„νŒ©νŠΈ μžˆλŠ” 마무리λ₯Ό μœ„ν•œ 역동적인 λ°°κ²½"
625
+ }
626
+
627
+ messages = [
628
+ {"role": "system", "content": system_prompt_scenario},
629
+ {"role": "user", "content": f"""
630
+ λ‹€μŒ 슀크립트의 {section_number}번째 μ„Ήμ…˜({section_descriptions[section_number]})에 λŒ€ν•œ
631
+ λ°°κ²½ μ˜μƒ ν”„λ‘¬ν”„νŠΈλ§Œμ„ μƒμ„±ν•΄μ£Όμ„Έμš”:
632
+
633
+ {scenario}
634
+
635
+ 직접적인 μ œν’ˆ λ¬˜μ‚¬λŠ” ν”Όν•˜κ³ , 슀크립트의 μ£Όμ œμ™€ 감성을 ν‘œν˜„ν•˜λŠ” 핡심 ν‚€μ›Œλ“œλ₯Ό λ°˜μ˜ν•œ λ°°κ²½ μ˜μƒμ— μ§‘μ€‘ν•΄μ£Όμ„Έμš”."""}
636
+ ]
637
+
638
+ try:
639
+ response = client.chat.completions.create(
640
+ model="gpt-4-1106-preview",
641
+ messages=messages,
642
+ max_tokens=500,
643
+ )
644
+ return response.choices[0].message.content.strip()
645
+ except Exception as e:
646
+ print(f"Error during prompt generation: {e}")
647
+ return "Error occurred during prompt generation"
648
+
649
+
650
+ # λΉ„λ””μ˜€ κ²°ν•© ν•¨μˆ˜ μΆ”κ°€
651
+ def combine_videos(video_paths, output_path):
652
+ """μ—¬λŸ¬ λΉ„λ””μ˜€λ₯Ό ν•˜λ‚˜λ‘œ κ²°ν•©"""
653
+ if not all(video_paths):
654
+ raise gr.Error("λͺ¨λ“  μ„Ήμ…˜μ˜ μ˜μƒμ΄ μƒμ„±λ˜μ–΄μ•Ό ν•©λ‹ˆλ‹€.")
655
+
656
+ try:
657
+ # 첫 번째 λΉ„λ””μ˜€μ˜ 속성 κ°€μ Έμ˜€κΈ°
658
+ cap = cv2.VideoCapture(video_paths[0])
659
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
660
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
661
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
662
+ cap.release()
663
+
664
+ # 좜λ ₯ λΉ„λ””μ˜€ μ„€μ •
665
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
666
+ out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
667
+
668
+ # 각 λΉ„λ””μ˜€ 순차적으둜 κ²°ν•©
669
+ for video_path in video_paths:
670
+ if video_path and os.path.exists(video_path):
671
+ cap = cv2.VideoCapture(video_path)
672
+ while True:
673
+ ret, frame = cap.read()
674
+ if not ret:
675
+ break
676
+ out.write(frame)
677
+ cap.release()
678
+
679
+ out.release()
680
+ return output_path
681
+ except Exception as e:
682
+ raise gr.Error(f"λΉ„λ””μ˜€ κ²°ν•© 쀑 였λ₯˜ λ°œμƒ: {e}")
683
+
684
+ def merge_section_videos(section1, section2, section3, section4, section5):
685
+ """μ„Ήμ…˜ λΉ„λ””μ˜€λ“€μ„ ν•˜λ‚˜λ‘œ κ²°ν•©"""
686
+ videos = [section1, section2, section3, section4, section5]
687
+
688
+ if not all(videos):
689
+ raise gr.Error("λͺ¨λ“  μ„Ήμ…˜μ˜ μ˜μƒμ΄ λ¨Όμ € μƒμ„±λ˜μ–΄μ•Ό ν•©λ‹ˆλ‹€.")
690
+
691
+ output_path = tempfile.mktemp(suffix=".mp4")
692
+ return combine_videos(videos, output_path)
693
+
694
+
695
  # Gradio Interface Definition
696
  with gr.Blocks(theme=gr.themes.Soft()) as iface:
697
  with gr.Tabs():
 
719
  visible=False
720
  )
721
 
 
 
 
 
 
722
  txt2vid_preset = gr.Dropdown(
723
  choices=[p["label"] for p in preset_options],
724
+ value="[16:9] 512x320, 10.3초",
725
  label="Step 2: 해상도 프리셋 선택",
726
  )
727
 
 
772
  visible=False
773
  )
774
 
 
 
 
 
 
775
  img2vid_preset = gr.Dropdown(
776
  choices=[p["label"] for p in preset_options],
777
+ value="[16:9] 512x320, 10.3초",
778
  label="Step 3: 해상도 프리셋 선택",
779
  )
780
 
 
797
  with gr.Column():
798
  img2vid_output = gr.Video(label="μƒμ„±λœ λΉ„λ””μ˜€")
799
 
800
+
801
+ # Scenario to Video Tab (Modified)
802
+ with gr.TabItem("μ‹œλ‚˜λ¦¬μ˜€λ‘œ λΉ„λ””μ˜€ λ§Œλ“€κΈ°(숏폼)"):
803
+ with gr.Row():
804
+ with gr.Column(scale=1):
805
+ scenario_input = gr.Textbox(
806
+ label="μ˜μƒ 슀크립트 μž…λ ₯",
807
+ placeholder="전체 μ‹œλ‚˜λ¦¬μ˜€λ₯Ό μž…λ ₯ν•˜μ„Έμš”...",
808
+ lines=10
809
+ )
810
+ scenario_preset = gr.Dropdown(
811
+ choices=[p["label"] for p in preset_options],
812
+ value="[16:9] 512x320, 10.3초",
813
+ label="ν™”λ©΄ 크기 선택"
814
+ )
815
+ analyze_btn = gr.Button("μ‹œλ‚˜λ¦¬μ˜€ 뢄석 및 ν”„λ‘¬ν”„νŠΈ 생성", variant="primary")
816
+
817
+ with gr.Column(scale=2):
818
+ with gr.Row():
819
+ # μ„Ήμ…˜ 1
820
+ with gr.Column():
821
+ section1_prompt = gr.Textbox(
822
+ label="1. λ°°κ²½ 및 ν•„μš”μ„±",
823
+ lines=4
824
+ )
825
+ with gr.Row():
826
+ section1_regenerate = gr.Button("πŸ”„ ν”„λ‘¬ν”„νŠΈ 생성")
827
+ section1_generate = gr.Button("πŸ”„ μ˜μƒ 생성")
828
+ section1_video = gr.Video(label="μ„Ήμ…˜ 1 μ˜μƒ")
829
+
830
+ # μ„Ήμ…˜ 2
831
+ with gr.Column():
832
+ section2_prompt = gr.Textbox(
833
+ label="2. ν₯λ―Έ 유발",
834
+ lines=4
835
+ )
836
+ with gr.Row():
837
+ section2_regenerate = gr.Button("πŸ”„ ν”„λ‘¬ν”„νŠΈ 생성")
838
+ section2_generate = gr.Button("πŸ”„ μ˜μƒ 생성")
839
+ section2_video = gr.Video(label="μ„Ήμ…˜ 2 μ˜μƒ")
840
+
841
+ with gr.Row():
842
+ # μ„Ήμ…˜ 3
843
+ with gr.Column():
844
+ section3_prompt = gr.Textbox(
845
+ label="3. ν•΄κ²°μ±… μ œμ‹œ",
846
+ lines=4
847
+ )
848
+ with gr.Row():
849
+ section3_regenerate = gr.Button("πŸ”„ ν”„λ‘¬ν”„νŠΈ 생성")
850
+ section3_generate = gr.Button("πŸ”„ μ˜μƒ 생성")
851
+ section3_video = gr.Video(label="μ„Ήμ…˜ 3 μ˜μƒ")
852
+
853
+ # μ„Ήμ…˜ 4
854
+ with gr.Column():
855
+ section4_prompt = gr.Textbox(
856
+ label="4. λ³Έλ‘ ",
857
+ lines=4
858
+ )
859
+ with gr.Row():
860
+ section4_regenerate = gr.Button("πŸ”„ ν”„λ‘¬ν”„νŠΈ 생성")
861
+ section4_generate = gr.Button("πŸ”„ μ˜μƒ 생성")
862
+ section4_video = gr.Video(label="μ„Ήμ…˜ 4 μ˜μƒ")
863
+
864
+ with gr.Row():
865
+ # μ„Ήμ…˜ 5
866
+ with gr.Column():
867
+ section5_prompt = gr.Textbox(
868
+ label="5. κ²°λ‘  및 κ°•μ‘°",
869
+ lines=4
870
+ )
871
+ with gr.Row():
872
+ section5_regenerate = gr.Button("πŸ”„ ν”„λ‘¬ν”„νŠΈ 생성")
873
+ section5_generate = gr.Button("πŸ”„ μ˜μƒ 생성")
874
+ section5_video = gr.Video(label="μ„Ήμ…˜ 5 μ˜μƒ")
875
+
876
+
877
+
878
+ # 톡합 μ˜μƒ μ„Ήμ…˜ μΆ”κ°€
879
+ with gr.Row():
880
+ with gr.Column(scale=1):
881
+ # 기쑴의 scenario_inputκ³Ό analyze_btn μœ μ§€
882
+ merge_videos_btn = gr.Button("톡합 μ˜μƒ 생성", variant="primary", size="lg")
883
+
884
+ with gr.Column(scale=2):
885
+ # 기쑴의 μ„Ήμ…˜ 1-5 μœ μ§€
886
+
887
+ # 톡합 μ˜μƒ 좜λ ₯ μ„Ήμ…˜ μΆ”κ°€
888
+ with gr.Row():
889
+ merged_video_output = gr.Video(label="톡합 μ˜μƒ")
890
+
891
+
892
+
893
+
894
+ # Event handlers
895
  txt2vid_preset.change(
896
  fn=preset_changed,
897
  inputs=[txt2vid_preset],
 
916
  txt2vid_enhance_toggle,
917
  txt2vid_negative_prompt,
918
  txt2vid_frame_rate,
919
+ *txt2vid_advanced[:3],
920
  txt2vid_current_height,
921
  txt2vid_current_width,
922
  txt2vid_current_num_frames,
 
952
  img2vid_enhance_toggle,
953
  img2vid_negative_prompt,
954
  img2vid_frame_rate,
955
+ *img2vid_advanced[:3],
956
  img2vid_current_height,
957
  img2vid_current_width,
958
  img2vid_current_num_frames,
 
963
  queue=True,
964
  )
965
 
966
+ # Scenario tab event handlers
967
+ analyze_btn.click(
968
+ fn=analyze_scenario,
969
+ inputs=[scenario_input],
970
+ outputs=[
971
+ section1_prompt, section2_prompt, section3_prompt,
972
+ section4_prompt, section5_prompt
973
+ ]
974
+ )
975
+
976
+ # μ„Ήμ…˜ 생성 이벀트 ν•Έλ“€λŸ¬
977
+ section1_generate.click(
978
+ fn=generate_section_video,
979
+ inputs=[section1_prompt, scenario_preset],
980
+ outputs=section1_video,
981
+ api_name=f"generate_section1"
982
+ )
983
+
984
+ section2_generate.click(
985
+ fn=lambda p, pr: generate_section_video(p, pr, 2),
986
+ inputs=[section2_prompt, scenario_preset],
987
+ outputs=section2_video,
988
+ api_name=f"generate_section2"
989
+ )
990
+
991
+ section3_generate.click(
992
+ fn=lambda p, pr: generate_section_video(p, pr, 3),
993
+ inputs=[section3_prompt, scenario_preset],
994
+ outputs=section3_video,
995
+ api_name=f"generate_section3"
996
+ )
997
+
998
+ section4_generate.click(
999
+ fn=lambda p, pr: generate_section_video(p, pr, 4),
1000
+ inputs=[section4_prompt, scenario_preset],
1001
+ outputs=section4_video,
1002
+ api_name=f"generate_section4"
1003
+ )
1004
+
1005
+ section5_generate.click(
1006
+ fn=lambda p, pr: generate_section_video(p, pr, 5),
1007
+ inputs=[section5_prompt, scenario_preset],
1008
+ outputs=section5_video,
1009
+ api_name=f"generate_section5"
1010
+ )
1011
+
1012
+
1013
+
1014
+ # μ„Ήμ…˜ 생성 이벀트 ν•Έλ“€λŸ¬
1015
+ section1_generate.click(
1016
+ fn=lambda p, pr: generate_section_video(p, pr, 1),
1017
+ inputs=[section1_prompt, scenario_preset],
1018
+ outputs=section1_video
1019
+ )
1020
+
1021
+ section2_generate.click(
1022
+ fn=lambda p, pr: generate_section_video(p, pr, 2),
1023
+ inputs=[section2_prompt, scenario_preset],
1024
+ outputs=section2_video
1025
+ )
1026
+
1027
+ section3_generate.click(
1028
+ fn=lambda p, pr: generate_section_video(p, pr, 3),
1029
+ inputs=[section3_prompt, scenario_preset],
1030
+ outputs=section3_video
1031
+ )
1032
+
1033
+ section4_generate.click(
1034
+ fn=lambda p, pr: generate_section_video(p, pr, 4),
1035
+ inputs=[section4_prompt, scenario_preset],
1036
+ outputs=section4_video
1037
+ )
1038
+
1039
+ section5_generate.click(
1040
+ fn=lambda p, pr: generate_section_video(p, pr, 5),
1041
+ inputs=[section5_prompt, scenario_preset],
1042
+ outputs=section5_video
1043
+ )
1044
+
1045
+
1046
+ # 이벀트 ν•Έλ“€λŸ¬ μΆ”κ°€
1047
+ merge_videos_btn.click(
1048
+ fn=merge_section_videos,
1049
+ inputs=[
1050
+ section1_video,
1051
+ section2_video,
1052
+ section3_video,
1053
+ section4_video,
1054
+ section5_video
1055
+ ],
1056
+ outputs=merged_video_output
1057
+ )
1058
+
1059
+
1060
  if __name__ == "__main__":
1061
  iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(
1062
  share=True, show_api=False