KingNish commited on
Commit
ceea111
·
verified ·
1 Parent(s): 23a2ead

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -30
app.py CHANGED
@@ -32,7 +32,7 @@ pipe = pipeline(
32
  model=model,
33
  tokenizer=tokenizer,
34
  feature_extractor=processor.feature_extractor,
35
- max_new_tokens=25,
36
  torch_dtype=torch_dtype,
37
  device=device,
38
  )
@@ -43,36 +43,10 @@ def transcribe(inputs, previous_transcription):
43
  try:
44
  filename = f"{uuid.uuid4().hex}.wav"
45
  sample_rate, audio_data = inputs
46
-
47
- # Check the duration of the audio
48
- duration = len(audio_data) / sample_rate # Duration in seconds
49
-
50
- if duration > 5:
51
- # Split audio into chunks of 5 seconds
52
- chunk_size = 5 * sample_rate # Number of samples for 5 seconds
53
- num_chunks = int(np.ceil(len(audio_data) / chunk_size))
54
- transcriptions = []
55
 
56
- for i in range(num_chunks):
57
- start_index = i * chunk_size
58
- end_index = min(start_index + chunk_size, len(audio_data))
59
- chunk_data = audio_data[start_index:end_index]
60
-
61
- # Write chunk to a temporary file
62
- chunk_filename = f"{uuid.uuid4().hex}_chunk.wav"
63
- scipy.io.wavfile.write(chunk_filename, sample_rate, chunk_data)
64
-
65
- # Transcribe the chunk
66
- transcription = pipe(chunk_filename)["text"]
67
- transcriptions.append(transcription)
68
-
69
- # Combine all transcriptions
70
- previous_transcription += " ".join(transcriptions)
71
- else:
72
- # Write the original audio file if it's 5 seconds or less
73
- scipy.io.wavfile.write(filename, sample_rate, audio_data)
74
- transcription = pipe(filename)["text"]
75
- previous_transcription += transcription
76
 
77
  end_time = time.time()
78
  latency = end_time - start_time
 
32
  model=model,
33
  tokenizer=tokenizer,
34
  feature_extractor=processor.feature_extractor,
35
+ chunk_length_s=10,
36
  torch_dtype=torch_dtype,
37
  device=device,
38
  )
 
43
  try:
44
  filename = f"{uuid.uuid4().hex}.wav"
45
  sample_rate, audio_data = inputs
46
+ scipy.io.wavfile.write(filename, sample_rate, audio_data)
 
 
 
 
 
 
 
 
47
 
48
+ transcription = pipe(filename)["text"]
49
+ previous_transcription += transcription
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  end_time = time.time()
52
  latency = end_time - start_time