gdnartea commited on
Commit
0866bfe
·
verified ·
1 Parent(s): 4230f6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -128
app.py CHANGED
@@ -1,130 +1,20 @@
1
- import gradio as gr
2
- import json
3
- import librosa
4
- import os
5
- import soundfile as sf
6
- import tempfile
7
- import uuid
8
-
9
- import torch
10
-
11
- from nemo.collections.asr.models import ASRModel
12
- from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchMultiTaskAED
13
- from nemo.collections.asr.parts.utils.transcribe_utils import get_buffered_pred_feat_multitaskAED
14
-
15
- SAMPLE_RATE = 16000 # Hz
16
- MAX_AUDIO_MINUTES = 10 # wont try to transcribe if longer than this
17
-
18
- model = ASRModel.from_pretrained("nvidia/canary-1b")
19
- model.eval()
20
-
21
- # make sure beam size always 1 for consistency
22
- model.change_decoding_strategy(None)
23
- decoding_cfg = model.cfg.decoding
24
- decoding_cfg.beam.beam_size = 1
25
- model.change_decoding_strategy(decoding_cfg)
26
-
27
- # setup for buffered inference
28
- model.cfg.preprocessor.dither = 0.0
29
- model.cfg.preprocessor.pad_to = 0
30
-
31
- feature_stride = model.cfg.preprocessor['window_stride']
32
- model_stride_in_secs = feature_stride * 8 # 8 = model stride, which is 8 for FastConformer
33
-
34
- frame_asr = FrameBatchMultiTaskAED(
35
- asr_model=model,
36
- frame_len=40.0,
37
- total_buffer=40.0,
38
- batch_size=16,
39
  )
40
 
41
- amp_dtype = torch.float16
42
-
43
-
44
- def convert_audio(audio_filepath, tmpdir, utt_id):
45
- """
46
- Convert all files to monochannel 16 kHz wav files.
47
- Do not convert and raise error if audio too long.
48
- Returns output filename and duration.
49
- """
50
-
51
- data, sr = librosa.load(audio_filepath, sr=None, mono=True)
52
-
53
- duration = librosa.get_duration(y=data, sr=sr)
54
-
55
- if duration / 60.0 > MAX_AUDIO_MINUTES:
56
- raise gr.Error(
57
- f"This demo can transcribe up to {MAX_AUDIO_MINUTES} minutes of audio. "
58
- "If you wish, you may trim the audio using the Audio viewer in Step 1 "
59
- "(click on the scissors icon to start trimming audio)."
60
- )
61
-
62
- if sr != SAMPLE_RATE:
63
- data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE)
64
-
65
- out_filename = os.path.join(tmpdir, utt_id + '.wav')
66
-
67
- # save output audio
68
- sf.write(out_filename, data, SAMPLE_RATE)
69
-
70
- return out_filename, duration
71
-
72
- def transcribe(audio_filepath):
73
-
74
- if audio_filepath is None:
75
- raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
76
-
77
- utt_id = uuid.uuid4()
78
- with tempfile.TemporaryDirectory() as tmpdir:
79
- converted_audio_filepath, duration = convert_audio(audio_filepath, tmpdir, str(utt_id))
80
-
81
- # make manifest file and save
82
- manifest_data = {
83
- "audio_filepath": converted_audio_filepath,
84
- "source_lang": "en",
85
- "target_lang": "en",
86
- "taskname": "asr",
87
- "pnc": "no",
88
- "answer": "predict",
89
- "duration": str(duration),
90
- }
91
-
92
- manifest_filepath = os.path.join(tmpdir, f'{utt_id}.json')
93
-
94
- with open(manifest_filepath, 'w') as fout:
95
- line = json.dumps(manifest_data)
96
- fout.write(line + '\n')
97
-
98
- # call transcribe, passing in manifest filepath
99
- if duration < 40:
100
- output_text = model.transcribe(manifest_filepath)[0]
101
- else: # do buffered inference
102
- with torch.cuda.amp.autocast(dtype=amp_dtype): # TODO: make it work if no cuda
103
- with torch.no_grad():
104
- hyps = get_buffered_pred_feat_multitaskAED(
105
- frame_asr,
106
- model.cfg.preprocessor,
107
- model_stride_in_secs,
108
- model.device,
109
- manifest=manifest_filepath,
110
- filepaths=None,
111
- )
112
-
113
- output_text = hyps[0].text
114
-
115
- return output_text
116
-
117
-
118
- iface = gr.Interface(
119
- fn=transcribe,
120
- inputs=gr.Audio(sources="microphone", type="filepath"),
121
- outputs="text")
122
-
123
- iface.queue()
124
- iface.launch()
125
-
126
-
127
-
128
-
129
-
130
-
 
1
+ from transformers import pipeline
2
+ from gradio import Interface, inputs, outputs
3
+
4
+ # Initialize the text generation pipeline
5
+ generator = pipeline('text-generation', model='microsoft/Phi-3-mini-4k-instruct-gguf')
6
+
7
+ def generate_text(prompt):
8
+ # Generate text
9
+ output = generator(prompt, max_length=100)
10
+ return output[0]['generated_text']
11
+
12
+ # Create a Gradio interface
13
+ iface = Interface(
14
+ fn=generate_text,
15
+ inputs=inputs.Textbox(lines=5, placeholder="Enter your prompt here..."),
16
+ outputs=outputs.Textbox()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  )
18
 
19
+ # Launch the interface
20
+ iface.launch()