openfree commited on
Commit
e406956
·
verified ·
1 Parent(s): 6ee08fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -12
app.py CHANGED
@@ -73,28 +73,26 @@ def process_sonic(image, audio, dynamic_scale):
73
  raise gr.Error("Please upload an image")
74
  if audio is None:
75
  raise gr.Error("Please upload an audio file")
76
-
77
- img_md5 = get_md5(np.array(image))
78
- audio_md5 = get_md5(audio[1])
79
- print(f"Processing with image hash: {img_md5}, audio hash: {audio_md5}")
80
-
81
  sampling_rate, arr = audio[:2]
82
  if len(arr.shape) == 1:
83
  arr = arr[:, None]
84
-
85
- # numpy array -> AudioSegment 변환
86
  audio_segment = AudioSegment(
87
  arr.tobytes(),
88
  frame_rate=sampling_rate,
89
  sample_width=arr.dtype.itemsize,
90
  channels=arr.shape[1]
91
  )
92
- audio_segment = audio_segment.set_frame_rate(sampling_rate)
93
-
94
- # 오디오 길이 제한 확인 (최대 60초)
95
- MAX_DURATION_MS = 60000 # 60초
 
 
 
96
  if len(audio_segment) > MAX_DURATION_MS:
97
- print(f"Audio longer than 60 seconds ({len(audio_segment)/1000:.2f}s). Truncating to 60 seconds.")
98
  audio_segment = audio_segment[:MAX_DURATION_MS]
99
 
100
  # 파일 경로 생성
 
73
  raise gr.Error("Please upload an image")
74
  if audio is None:
75
  raise gr.Error("Please upload an audio file")
76
+
77
+ # audio -> AudioSegment
 
 
 
78
  sampling_rate, arr = audio[:2]
79
  if len(arr.shape) == 1:
80
  arr = arr[:, None]
81
+
 
82
  audio_segment = AudioSegment(
83
  arr.tobytes(),
84
  frame_rate=sampling_rate,
85
  sample_width=arr.dtype.itemsize,
86
  channels=arr.shape[1]
87
  )
88
+
89
+ # (중요) Whisper 호환을 위해 mono/16kHz 변환
90
+ audio_segment = audio_segment.set_channels(1)
91
+ audio_segment = audio_segment.set_frame_rate(16000)
92
+
93
+ # 최대 60초 제한
94
+ MAX_DURATION_MS = 60000
95
  if len(audio_segment) > MAX_DURATION_MS:
 
96
  audio_segment = audio_segment[:MAX_DURATION_MS]
97
 
98
  # 파일 경로 생성