Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -18,7 +18,9 @@ except ImportError:
|
|
18 |
import yt_dlp # Added import for yt-dlp
|
19 |
|
20 |
MODEL_NAME = "NbAiLab/nb-whisper-large"
|
21 |
-
|
|
|
|
|
22 |
|
23 |
share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
|
24 |
auth_token = os.environ.get("AUTH_TOKEN") or True
|
@@ -26,7 +28,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
26 |
print(f"Bruker enhet: {device}")
|
27 |
|
28 |
@spaces.GPU(duration=60 * 2)
|
29 |
-
def pipe(file, return_timestamps=False,
|
30 |
asr = pipeline(
|
31 |
task="automatic-speech-recognition",
|
32 |
model=MODEL_NAME,
|
@@ -44,20 +46,24 @@ def pipe(file, return_timestamps=False, lang="no"):
|
|
44 |
return asr(file, return_timestamps=return_timestamps, batch_size=24, generate_kwargs={'task': 'transcribe', 'language': lang})
|
45 |
|
46 |
def format_output(text):
|
|
|
|
|
|
|
47 |
text = re.sub(r'(\.{3,}|[.!:?])', lambda m: m.group() + '<br>', text)
|
48 |
return text
|
49 |
|
50 |
-
def transcribe(file, return_timestamps=False,
|
51 |
|
52 |
waveform, sample_rate = torchaudio.load(file)
|
53 |
audio_duration = waveform.size(1) / sample_rate
|
54 |
-
warning_message
|
55 |
|
56 |
if audio_duration > max_audio_length:
|
57 |
warning_message = (
|
58 |
"<b style='color:red;'>⚠️ Advarsel:</b> "
|
59 |
"Lydfilen er lengre enn 30 minutter. Kun de første 30 minuttene vil bli transkribert."
|
60 |
)
|
|
|
61 |
waveform = waveform[:, :int(max_audio_length * sample_rate)]
|
62 |
truncated_file = "truncated_audio.wav"
|
63 |
torchaudio.save(truncated_file, waveform, sample_rate)
|
@@ -67,6 +73,7 @@ def transcribe(file, return_timestamps=False, lang_nn=False):
|
|
67 |
file_to_transcribe = file
|
68 |
truncated = False
|
69 |
|
|
|
70 |
if not lang_nn:
|
71 |
if not return_timestamps:
|
72 |
text = pipe(file_to_transcribe)["text"]
|
@@ -82,10 +89,10 @@ def transcribe(file, return_timestamps=False, lang_nn=False):
|
|
82 |
formatted_text = "<br>".join(text)
|
83 |
else:
|
84 |
if not return_timestamps:
|
85 |
-
text = pipe(file_to_transcribe,
|
86 |
formatted_text = format_output(text)
|
87 |
else:
|
88 |
-
chunks = pipe(file_to_transcribe, return_timestamps=True,
|
89 |
text = []
|
90 |
for chunk in chunks:
|
91 |
start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
|
@@ -146,47 +153,57 @@ def yt_transcribe(yt_url, return_timestamps=False):
|
|
146 |
demo = gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.red, secondary_hue=gr.themes.colors.red))
|
147 |
|
148 |
with demo:
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
gr.Markdown(
|
155 |
"""
|
156 |
-
<h1 style="font-size: 3em;
|
157 |
-
"""
|
158 |
)
|
159 |
|
160 |
-
# Description moved to the bottom
|
161 |
-
with gr.Row():
|
162 |
-
gr.Markdown(
|
163 |
-
"""
|
164 |
-
Transkriber lange lydopptak fra mikrofon eller lydfiler med et enkelt klikk!
|
165 |
-
Demoen bruker den fintunede modellen NbAiLab/nb-whisper-large og 🤗 Transformers til å transkribere lydfiler opp til 30 minutter.
|
166 |
-
"""
|
167 |
-
)
|
168 |
-
|
169 |
mf_transcribe = gr.Interface(
|
170 |
fn=transcribe,
|
171 |
inputs=[
|
172 |
gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
|
173 |
-
gr.components.Checkbox(label="Inkluder tidskoder"),
|
174 |
gr.components.Checkbox(label="Nynorsk"),
|
175 |
],
|
176 |
|
177 |
outputs=[
|
178 |
gr.HTML(label="Varsel"),
|
179 |
gr.HTML(label="text"),
|
180 |
-
gr.File(label="Last ned transkripsjon")
|
181 |
],
|
|
|
|
|
182 |
description=(
|
183 |
-
"
|
184 |
-
"
|
185 |
),
|
186 |
allow_flagging="never",
|
|
|
187 |
)
|
188 |
|
189 |
-
#
|
190 |
-
|
191 |
-
|
192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
import yt_dlp # Added import for yt-dlp
|
19 |
|
20 |
MODEL_NAME = "NbAiLab/nb-whisper-large"
|
21 |
+
#lang = "no"
|
22 |
+
|
23 |
+
max_audio_length= 30 * 60
|
24 |
|
25 |
share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
|
26 |
auth_token = os.environ.get("AUTH_TOKEN") or True
|
|
|
28 |
print(f"Bruker enhet: {device}")
|
29 |
|
30 |
@spaces.GPU(duration=60 * 2)
|
31 |
+
def pipe(file, return_timestamps=False,lang="no"):
|
32 |
asr = pipeline(
|
33 |
task="automatic-speech-recognition",
|
34 |
model=MODEL_NAME,
|
|
|
46 |
return asr(file, return_timestamps=return_timestamps, batch_size=24, generate_kwargs={'task': 'transcribe', 'language': lang})
|
47 |
|
48 |
def format_output(text):
|
49 |
+
# Add a line break after ".", "!", ":", or "?" unless part of sequences like "..."
|
50 |
+
#text = re.sub(r'(?<!\.)[.!:?](?!\.)', lambda m: m.group() + '<br>', text)
|
51 |
+
# Ensure line break after sequences like "..." or other punctuation patterns
|
52 |
text = re.sub(r'(\.{3,}|[.!:?])', lambda m: m.group() + '<br>', text)
|
53 |
return text
|
54 |
|
55 |
+
def transcribe(file, return_timestamps=False,lang_nn=False):
|
56 |
|
57 |
waveform, sample_rate = torchaudio.load(file)
|
58 |
audio_duration = waveform.size(1) / sample_rate
|
59 |
+
warning_message=None
|
60 |
|
61 |
if audio_duration > max_audio_length:
|
62 |
warning_message = (
|
63 |
"<b style='color:red;'>⚠️ Advarsel:</b> "
|
64 |
"Lydfilen er lengre enn 30 minutter. Kun de første 30 minuttene vil bli transkribert."
|
65 |
)
|
66 |
+
# Trim the waveform to the first 30 minutes
|
67 |
waveform = waveform[:, :int(max_audio_length * sample_rate)]
|
68 |
truncated_file = "truncated_audio.wav"
|
69 |
torchaudio.save(truncated_file, waveform, sample_rate)
|
|
|
73 |
file_to_transcribe = file
|
74 |
truncated = False
|
75 |
|
76 |
+
|
77 |
if not lang_nn:
|
78 |
if not return_timestamps:
|
79 |
text = pipe(file_to_transcribe)["text"]
|
|
|
89 |
formatted_text = "<br>".join(text)
|
90 |
else:
|
91 |
if not return_timestamps:
|
92 |
+
text = pipe(file_to_transcribe,lang="nn")["text"]
|
93 |
formatted_text = format_output(text)
|
94 |
else:
|
95 |
+
chunks = pipe(file_to_transcribe, return_timestamps=True,lang="nn")["chunks"]
|
96 |
text = []
|
97 |
for chunk in chunks:
|
98 |
start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
|
|
|
153 |
demo = gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.red, secondary_hue=gr.themes.colors.red))
|
154 |
|
155 |
with demo:
|
156 |
+
|
157 |
+
with gr.Column():
|
158 |
+
gr.HTML(f"<img src='file/Logonew.png' style='width:200px;'>")
|
159 |
+
with gr.Column(scale=8):
|
160 |
+
# Use Markdown for title and description
|
161 |
gr.Markdown(
|
162 |
"""
|
163 |
+
<h1 style="font-size: 3em;">NB-Whisper Demo</h1>
|
164 |
+
"""
|
165 |
)
|
166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
mf_transcribe = gr.Interface(
|
168 |
fn=transcribe,
|
169 |
inputs=[
|
170 |
gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
|
171 |
+
gr.components.Checkbox(label="Inkluder tidskoder"),
|
172 |
gr.components.Checkbox(label="Nynorsk"),
|
173 |
],
|
174 |
|
175 |
outputs=[
|
176 |
gr.HTML(label="Varsel"),
|
177 |
gr.HTML(label="text"),
|
178 |
+
gr.File(label="Last ned transkripsjon")
|
179 |
],
|
180 |
+
#outputs="text",
|
181 |
+
|
182 |
description=(
|
183 |
+
"Demoen bruker"
|
184 |
+
f" modellen [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) til å transkribere lydfiler opp til 30 minutter."
|
185 |
),
|
186 |
allow_flagging="never",
|
187 |
+
#show_submit_button=False,
|
188 |
)
|
189 |
|
190 |
+
# Uncomment to add the YouTube transcription interface if needed
|
191 |
+
# yt_transcribe_interface = gr.Interface(
|
192 |
+
# fn=yt_transcribe,
|
193 |
+
# inputs=[
|
194 |
+
# gr.components.Textbox(lines=1, placeholder="Lim inn URL til en YouTube-video her", label="YouTube URL"),
|
195 |
+
# gr.components.Checkbox(label="Inkluder tidsstempler"),
|
196 |
+
# ],
|
197 |
+
# examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
|
198 |
+
# outputs=["html", "text"],
|
199 |
+
# title="Whisper Demo: Transkriber YouTube",
|
200 |
+
# description=(
|
201 |
+
# "Transkriber lange YouTube-videoer med et enkelt klikk! Demoen bruker den fintunede modellen:"
|
202 |
+
# f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) og 🤗 Transformers til å transkribere lydfiler av"
|
203 |
+
# " vilkårlig lengde."
|
204 |
+
# ),
|
205 |
+
# allow_flagging="never",
|
206 |
+
# )
|
207 |
+
|
208 |
+
# Start demoen uten faner
|
209 |
+
demo.launch(share=share, show_api=False,allowed_paths=["Logonew.png"]).queue()
|