Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -70,9 +70,9 @@ os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
|
|
| 70 |
# Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
|
| 71 |
# python summarize.py ./local/file_on_your/system --api_name <API_name>`
|
| 72 |
#
|
| 73 |
-
#
|
| 74 |
-
#
|
| 75 |
-
#
|
| 76 |
#
|
| 77 |
###
|
| 78 |
|
|
@@ -172,7 +172,7 @@ print(r"""
|
|
| 172 |
\__,_||_| \__,_||_| |_| \__| \_/\_/ \__,_| \__| \___||_| |_|
|
| 173 |
""")
|
| 174 |
|
| 175 |
-
|
| 176 |
# System Checks
|
| 177 |
#
|
| 178 |
#
|
|
@@ -234,12 +234,14 @@ def check_ffmpeg():
|
|
| 234 |
else:
|
| 235 |
logging.debug("ffmpeg not installed on the local system/in local PATH")
|
| 236 |
print(
|
| 237 |
-
"ffmpeg is not installed.\n\n You can either install it manually, or through your package manager of
|
|
|
|
| 238 |
if userOS == "Windows":
|
| 239 |
download_ffmpeg()
|
| 240 |
elif userOS == "Linux":
|
| 241 |
print(
|
| 242 |
-
"You should install ffmpeg using your platform's appropriate package manager, 'apt install ffmpeg',
|
|
|
|
| 243 |
else:
|
| 244 |
logging.debug("running an unsupported OS")
|
| 245 |
print("You're running an unsupported/Un-tested OS")
|
|
@@ -298,10 +300,10 @@ def download_ffmpeg():
|
|
| 298 |
|
| 299 |
#
|
| 300 |
#
|
| 301 |
-
|
| 302 |
|
| 303 |
|
| 304 |
-
|
| 305 |
# Processing Paths and local file handling
|
| 306 |
#
|
| 307 |
#
|
|
@@ -352,16 +354,17 @@ def process_local_file(file_path):
|
|
| 352 |
|
| 353 |
#
|
| 354 |
#
|
| 355 |
-
|
| 356 |
|
| 357 |
|
| 358 |
-
|
| 359 |
# Video Download/Handling
|
| 360 |
#
|
| 361 |
|
| 362 |
def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
|
| 363 |
download_video, download_audio, chunk_size):
|
| 364 |
video_file_path = None
|
|
|
|
| 365 |
try:
|
| 366 |
results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
|
| 367 |
whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
|
|
@@ -534,10 +537,10 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
|
|
| 534 |
|
| 535 |
#
|
| 536 |
#
|
| 537 |
-
|
| 538 |
|
| 539 |
|
| 540 |
-
|
| 541 |
# Audio Transcription
|
| 542 |
#
|
| 543 |
# Convert video .m4a into .wav using ffmpeg
|
|
@@ -546,7 +549,13 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
|
|
| 546 |
#
|
| 547 |
|
| 548 |
# os.system(r'.\Bin\ffmpeg.exe -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
|
| 549 |
-
def convert_to_wav(video_file_path, offset=0):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 550 |
print("Starting conversion process of .m4a to .WAV")
|
| 551 |
out_path = os.path.splitext(video_file_path)[0] + ".wav"
|
| 552 |
|
|
@@ -641,10 +650,10 @@ def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='sm
|
|
| 641 |
|
| 642 |
#
|
| 643 |
#
|
| 644 |
-
|
| 645 |
|
| 646 |
|
| 647 |
-
|
| 648 |
# Diarization
|
| 649 |
#
|
| 650 |
# TODO: https://huggingface.co/pyannote/speaker-diarization-3.1
|
|
@@ -666,7 +675,8 @@ def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='sm
|
|
| 666 |
# import tqdm
|
| 667 |
# import wave
|
| 668 |
#
|
| 669 |
-
#
|
|
|
|
| 670 |
#
|
| 671 |
#
|
| 672 |
# _,file_ending = os.path.splitext(f'{video_file_path}')
|
|
@@ -761,10 +771,10 @@ def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='sm
|
|
| 761 |
# raise RuntimeError("Error Running inference with local model", e)
|
| 762 |
#
|
| 763 |
#
|
| 764 |
-
|
| 765 |
|
| 766 |
|
| 767 |
-
|
| 768 |
# Summarizers
|
| 769 |
#
|
| 770 |
#
|
|
@@ -1055,18 +1065,20 @@ def summarize_with_llama(api_url, file_path, token, custom_prompt):
|
|
| 1055 |
|
| 1056 |
|
| 1057 |
# https://lite.koboldai.net/koboldcpp_api#/api%2Fv1/post_api_v1_generate
|
| 1058 |
-
def summarize_with_kobold(
|
| 1059 |
try:
|
| 1060 |
logging.debug("kobold: Loading JSON data")
|
| 1061 |
-
with open(
|
| 1062 |
segments = json.load(file)
|
| 1063 |
|
| 1064 |
logging.debug(f"kobold: Extracting text from segments file")
|
| 1065 |
text = extract_text_from_segments(segments)
|
| 1066 |
|
|
|
|
| 1067 |
headers = {
|
| 1068 |
'accept': 'application/json',
|
| 1069 |
'content-type': 'application/json',
|
|
|
|
| 1070 |
}
|
| 1071 |
|
| 1072 |
kobold_prompt = f"{text} \n\n\n\n{custom_prompt}"
|
|
@@ -1082,7 +1094,7 @@ def summarize_with_kobold(api_url, file_path, custom_prompt):
|
|
| 1082 |
|
| 1083 |
logging.debug("kobold: Submitting request to API endpoint")
|
| 1084 |
print("kobold: Submitting request to API endpoint")
|
| 1085 |
-
response = requests.post(
|
| 1086 |
response_data = response.json()
|
| 1087 |
logging.debug("kobold: API Response Data: %s", response_data)
|
| 1088 |
|
|
@@ -1105,24 +1117,26 @@ def summarize_with_kobold(api_url, file_path, custom_prompt):
|
|
| 1105 |
|
| 1106 |
|
| 1107 |
# https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API
|
| 1108 |
-
def summarize_with_oobabooga(
|
| 1109 |
try:
|
| 1110 |
logging.debug("ooba: Loading JSON data")
|
| 1111 |
-
with open(
|
| 1112 |
segments = json.load(file)
|
| 1113 |
|
| 1114 |
logging.debug(f"ooba: Extracting text from segments file\n\n\n")
|
| 1115 |
text = extract_text_from_segments(segments)
|
| 1116 |
logging.debug(f"ooba: Finished extracting text from segments file")
|
| 1117 |
|
|
|
|
| 1118 |
headers = {
|
| 1119 |
'accept': 'application/json',
|
| 1120 |
'content-type': 'application/json',
|
| 1121 |
}
|
| 1122 |
|
| 1123 |
-
# prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a French bakery baking cakes. It
|
| 1124 |
-
#
|
| 1125 |
-
|
|
|
|
| 1126 |
logging.debug("ooba: Prompt being sent is {ooba_prompt}")
|
| 1127 |
|
| 1128 |
data = {
|
|
@@ -1133,7 +1147,7 @@ def summarize_with_oobabooga(api_url, file_path, custom_prompt):
|
|
| 1133 |
|
| 1134 |
logging.debug("ooba: Submitting request to API endpoint")
|
| 1135 |
print("ooba: Submitting request to API endpoint")
|
| 1136 |
-
response = requests.post(
|
| 1137 |
logging.debug("ooba: API Response Data: %s", response)
|
| 1138 |
|
| 1139 |
if response.status_code == 200:
|
|
@@ -1161,28 +1175,28 @@ def save_summary_to_file(summary, file_path):
|
|
| 1161 |
|
| 1162 |
#
|
| 1163 |
#
|
| 1164 |
-
|
| 1165 |
|
| 1166 |
|
| 1167 |
-
|
| 1168 |
# Gradio UI
|
| 1169 |
#
|
| 1170 |
|
| 1171 |
# Only to be used when configured with Gradio for HF Space
|
| 1172 |
-
def summarize_with_huggingface(
|
| 1173 |
logging.debug(f"huggingface: Summarization process starting...")
|
| 1174 |
|
| 1175 |
model = "microsoft/Phi-3-mini-128k-instruct"
|
| 1176 |
API_URL = f"https://api-inference.huggingface.co/models/{model}"
|
| 1177 |
-
headers = {"Authorization": f"Bearer {
|
| 1178 |
|
| 1179 |
-
with open(
|
| 1180 |
segments = json.load(file)
|
| 1181 |
text = ''.join([segment['text'] for segment in segments])
|
| 1182 |
|
| 1183 |
# FIXME adjust max_length and min_length as needed
|
| 1184 |
data = {
|
| 1185 |
-
"inputs": text,
|
| 1186 |
"parameters": {"max_length": 4096, "min_length": 100}
|
| 1187 |
}
|
| 1188 |
|
|
@@ -1197,24 +1211,26 @@ def summarize_with_huggingface(api_key, file_path):
|
|
| 1197 |
response_data = response.json()
|
| 1198 |
wait_time = response_data.get('estimated_time', 10)
|
| 1199 |
return None, f"Model is loading, retrying in {int(wait_time)} seconds..."
|
|
|
|
| 1200 |
# Sleep before retrying....
|
| 1201 |
-
time.sleep(wait_time)
|
| 1202 |
|
| 1203 |
-
if
|
| 1204 |
-
api_key = os.
|
| 1205 |
-
logging.debug("HUGGINGFACE API KEY CHECK: " +
|
| 1206 |
try:
|
| 1207 |
logging.debug("huggingface: Loading json data for summarization")
|
| 1208 |
-
with open(
|
| 1209 |
segments = json.load(file)
|
| 1210 |
|
| 1211 |
logging.debug("huggingface: Extracting text from the segments")
|
| 1212 |
text = ' '.join([segment['text'] for segment in segments])
|
| 1213 |
|
| 1214 |
-
api_key = os.
|
| 1215 |
-
logging.debug("HUGGINGFACE API KEY CHECK #2: " +
|
| 1216 |
|
| 1217 |
logging.debug("huggingface: Submitting request...")
|
|
|
|
| 1218 |
response = requests.post(API_URL, headers=headers, json=data)
|
| 1219 |
|
| 1220 |
if response.status_code == 200:
|
|
@@ -1230,8 +1246,11 @@ def summarize_with_huggingface(api_key, file_path):
|
|
| 1230 |
print(f"Error occurred while processing summary with huggingface: {str(e)}")
|
| 1231 |
return None
|
| 1232 |
|
| 1233 |
-
|
| 1234 |
-
|
|
|
|
|
|
|
|
|
|
| 1235 |
|
| 1236 |
|
| 1237 |
def format_transcription(transcription_result):
|
|
@@ -1242,19 +1261,6 @@ def format_transcription(transcription_result):
|
|
| 1242 |
return ""
|
| 1243 |
|
| 1244 |
|
| 1245 |
-
def process_text(api_key, text_file):
|
| 1246 |
-
summary, message = summarize_with_huggingface(api_key, text_file)
|
| 1247 |
-
if summary:
|
| 1248 |
-
# Show summary on success
|
| 1249 |
-
return "Summary:", summary
|
| 1250 |
-
else:
|
| 1251 |
-
# Inform user about load/wait time
|
| 1252 |
-
return "Notice:", message
|
| 1253 |
-
|
| 1254 |
-
|
| 1255 |
-
|
| 1256 |
-
|
| 1257 |
-
|
| 1258 |
def format_file_path(file_path):
|
| 1259 |
# Helper function to check file existence and return an appropriate path or message
|
| 1260 |
return file_path if file_path and os.path.exists(file_path) else None
|
|
@@ -1294,7 +1300,9 @@ def launch_ui(demo_mode=False):
|
|
| 1294 |
visible=False)
|
| 1295 |
custom_prompt_input = gr.Textbox(
|
| 1296 |
label="Custom Prompt (Customize your summary, or ask a different question)",
|
| 1297 |
-
placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the
|
|
|
|
|
|
|
| 1298 |
lines=3, visible=True)
|
| 1299 |
offset_input = gr.Number(value=0, label="Offset (Seconds into the video to start transcribing at)",
|
| 1300 |
visible=False)
|
|
@@ -1347,8 +1355,9 @@ def launch_ui(demo_mode=False):
|
|
| 1347 |
fn=process_url,
|
| 1348 |
inputs=all_inputs,
|
| 1349 |
outputs=outputs,
|
| 1350 |
-
title="Video Transcription and Summarization",
|
| 1351 |
-
description="Submit a video URL for transcription and summarization. Ensure you input all necessary
|
|
|
|
| 1352 |
)
|
| 1353 |
|
| 1354 |
with gr.Tab("Transcription & Summarization History"):
|
|
@@ -1371,7 +1380,7 @@ def launch_ui(demo_mode=False):
|
|
| 1371 |
#
|
| 1372 |
|
| 1373 |
def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False,
|
| 1374 |
-
download_video_flag=False, demo_mode=False, custom_prompt=None):
|
| 1375 |
if input_path is None and args.user_interface:
|
| 1376 |
return []
|
| 1377 |
start_time = time.monotonic()
|
|
@@ -1385,7 +1394,10 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
|
|
| 1385 |
elif (info_dict := get_youtube(input_path)) and 'entries' in info_dict:
|
| 1386 |
logging.debug("MAIN: YouTube playlist detected")
|
| 1387 |
print(
|
| 1388 |
-
"\n\nSorry, but playlists aren't currently supported. You can run the following command to generate a
|
|
|
|
|
|
|
|
|
|
| 1389 |
return
|
| 1390 |
else:
|
| 1391 |
paths = [input_path]
|
|
@@ -1399,8 +1411,7 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
|
|
| 1399 |
if info_dict:
|
| 1400 |
logging.debug("MAIN: Creating path for video file...")
|
| 1401 |
download_path = create_download_directory(info_dict['title'])
|
| 1402 |
-
logging.debug("MAIN: Path created successfully")
|
| 1403 |
-
logging.debug("MAIN: Downloading video from yt_dlp...")
|
| 1404 |
try:
|
| 1405 |
video_path = download_video(path, download_path, info_dict, download_video_flag)
|
| 1406 |
except RuntimeError as e:
|
|
@@ -1431,6 +1442,17 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
|
|
| 1431 |
logging.info(f"Transcription complete: {audio_file}")
|
| 1432 |
|
| 1433 |
# Perform summarization based on the specified API
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1434 |
if api_name and api_key:
|
| 1435 |
logging.debug(f"MAIN: Summarization being performed by {api_name}")
|
| 1436 |
json_file_path = audio_file.replace('.wav', '.segments.json')
|
|
@@ -1441,6 +1463,15 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
|
|
| 1441 |
summary = summarize_with_openai(openai_api_key, json_file_path, openai_model, custom_prompt)
|
| 1442 |
except requests.exceptions.ConnectionError:
|
| 1443 |
requests.status_code = "Connection: "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1444 |
elif api_name.lower() == "anthropic":
|
| 1445 |
anthropic_api_key = api_key if api_key else config.get('API', 'anthropic_api_key', fallback=None)
|
| 1446 |
try:
|
|
@@ -1486,16 +1517,6 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
|
|
| 1486 |
summary = summarize_with_oobabooga(ooba_ip, json_file_path, ooba_token, custom_prompt)
|
| 1487 |
except requests.exceptions.ConnectionError:
|
| 1488 |
requests.status_code = "Connection: "
|
| 1489 |
-
elif api_name.lower() == "huggingface":
|
| 1490 |
-
huggingface_api_key = os.environ.get(HF_TOKEN)
|
| 1491 |
-
if (huggingface_api_key is None):
|
| 1492 |
-
huggingface_api_key = api_key if api_key else config.get('API', 'huggingface_api_key', fallback=None)
|
| 1493 |
-
try:
|
| 1494 |
-
logging.debug(f"MAIN: Trying to summarize with huggingface")
|
| 1495 |
-
summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt)
|
| 1496 |
-
except requests.exceptions.ConnectionError:
|
| 1497 |
-
requests.status_code = "Connection: "
|
| 1498 |
-
|
| 1499 |
else:
|
| 1500 |
logging.warning(f"Unsupported API: {api_name}")
|
| 1501 |
summary = None
|
|
@@ -1507,10 +1528,11 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
|
|
| 1507 |
else:
|
| 1508 |
logging.warning(f"Failed to generate summary using {api_name} API")
|
| 1509 |
else:
|
| 1510 |
-
logging.info("No API specified. Summarization will not be performed")
|
| 1511 |
except Exception as e:
|
| 1512 |
logging.error(f"Error processing path: {path}")
|
| 1513 |
logging.error(str(e))
|
|
|
|
| 1514 |
# end_time = time.monotonic()
|
| 1515 |
# print("Total program execution time: " + timedelta(seconds=end_time - start_time))
|
| 1516 |
|
|
@@ -1522,6 +1544,7 @@ if __name__ == "__main__":
|
|
| 1522 |
parser.add_argument('input_path', type=str, help='Path or URL of the video', nargs='?')
|
| 1523 |
parser.add_argument('-v', '--video', action='store_true', help='Download the video instead of just the audio')
|
| 1524 |
parser.add_argument('-api', '--api_name', type=str, help='API name for summarization (optional)')
|
|
|
|
| 1525 |
parser.add_argument('-ns', '--num_speakers', type=int, default=2, help='Number of speakers (default: 2)')
|
| 1526 |
parser.add_argument('-wm', '--whisper_model', type=str, default='small.en',
|
| 1527 |
help='Whisper model (default: small.en)')
|
|
@@ -1575,7 +1598,7 @@ if __name__ == "__main__":
|
|
| 1575 |
logging.info(f'API: {args.api_name}')
|
| 1576 |
logging.info('Summarization will be performed.')
|
| 1577 |
else:
|
| 1578 |
-
logging.info('No API specified. Summarization will not be performed.')
|
| 1579 |
|
| 1580 |
logging.debug("Platform check being performed...")
|
| 1581 |
platform_check()
|
|
@@ -1590,7 +1613,7 @@ if __name__ == "__main__":
|
|
| 1590 |
try:
|
| 1591 |
results = main(args.input_path, api_name=args.api_name, api_key=args.api_key,
|
| 1592 |
num_speakers=args.num_speakers, whisper_model=args.whisper_model, offset=args.offset,
|
| 1593 |
-
vad_filter=args.vad_filter, download_video_flag=args.video)
|
| 1594 |
logging.info('Transcription process completed.')
|
| 1595 |
except Exception as e:
|
| 1596 |
logging.error('An error occurred during the transcription process.')
|
|
|
|
| 70 |
# Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
|
| 71 |
# python summarize.py ./local/file_on_your/system --api_name <API_name>`
|
| 72 |
#
|
| 73 |
+
# Run it as a WebApp** python summarize.py -gui` - This requires you to either stuff your API keys into the
|
| 74 |
+
# `config.txt` file, or pass them into the app every time you want to use it. Can be helpful for setting up a shared
|
| 75 |
+
# instance, but not wanting people to perform inference on your server.
|
| 76 |
#
|
| 77 |
###
|
| 78 |
|
|
|
|
| 172 |
\__,_||_| \__,_||_| |_| \__| \_/\_/ \__,_| \__| \___||_| |_|
|
| 173 |
""")
|
| 174 |
|
| 175 |
+
#######################################################################################################################
|
| 176 |
# System Checks
|
| 177 |
#
|
| 178 |
#
|
|
|
|
| 234 |
else:
|
| 235 |
logging.debug("ffmpeg not installed on the local system/in local PATH")
|
| 236 |
print(
|
| 237 |
+
"ffmpeg is not installed.\n\n You can either install it manually, or through your package manager of "
|
| 238 |
+
"choice.\n Windows users, builds are here: https://www.gyan.dev/ffmpeg/builds/")
|
| 239 |
if userOS == "Windows":
|
| 240 |
download_ffmpeg()
|
| 241 |
elif userOS == "Linux":
|
| 242 |
print(
|
| 243 |
+
"You should install ffmpeg using your platform's appropriate package manager, 'apt install ffmpeg',"
|
| 244 |
+
"'dnf install ffmpeg' or 'pacman', etc.")
|
| 245 |
else:
|
| 246 |
logging.debug("running an unsupported OS")
|
| 247 |
print("You're running an unsupported/Un-tested OS")
|
|
|
|
| 300 |
|
| 301 |
#
|
| 302 |
#
|
| 303 |
+
########################################################################################################################
|
| 304 |
|
| 305 |
|
| 306 |
+
#######################################################################################################################
|
| 307 |
# Processing Paths and local file handling
|
| 308 |
#
|
| 309 |
#
|
|
|
|
| 354 |
|
| 355 |
#
|
| 356 |
#
|
| 357 |
+
########################################################################################################################
|
| 358 |
|
| 359 |
|
| 360 |
+
#######################################################################################################################
|
| 361 |
# Video Download/Handling
|
| 362 |
#
|
| 363 |
|
| 364 |
def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
|
| 365 |
download_video, download_audio, chunk_size):
|
| 366 |
video_file_path = None
|
| 367 |
+
print("API Name received:", api_name) # Debugging line
|
| 368 |
try:
|
| 369 |
results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
|
| 370 |
whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
|
|
|
|
| 537 |
|
| 538 |
#
|
| 539 |
#
|
| 540 |
+
#######################################################################################################################
|
| 541 |
|
| 542 |
|
| 543 |
+
######################################################################################################################
|
| 544 |
# Audio Transcription
|
| 545 |
#
|
| 546 |
# Convert video .m4a into .wav using ffmpeg
|
|
|
|
| 549 |
#
|
| 550 |
|
| 551 |
# os.system(r'.\Bin\ffmpeg.exe -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
|
| 552 |
+
def convert_to_wav(video_file_path, offset=0, overwrite=False):
|
| 553 |
+
out_path = os.path.splitext(video_file_path)[0] + ".wav"
|
| 554 |
+
|
| 555 |
+
if os.path.exists(out_path) and not overwrite:
|
| 556 |
+
print(f"File '{out_path}' already exists. Skipping conversion.")
|
| 557 |
+
logging.info(f"Skipping conversion as file already exists: {out_path}")
|
| 558 |
+
return out_path
|
| 559 |
print("Starting conversion process of .m4a to .WAV")
|
| 560 |
out_path = os.path.splitext(video_file_path)[0] + ".wav"
|
| 561 |
|
|
|
|
| 650 |
|
| 651 |
#
|
| 652 |
#
|
| 653 |
+
######################################################################################################################
|
| 654 |
|
| 655 |
|
| 656 |
+
#######################################################################################################################
|
| 657 |
# Diarization
|
| 658 |
#
|
| 659 |
# TODO: https://huggingface.co/pyannote/speaker-diarization-3.1
|
|
|
|
| 675 |
# import tqdm
|
| 676 |
# import wave
|
| 677 |
#
|
| 678 |
+
# embedding_model = PretrainedSpeakerEmbedding( embedding_model, device=torch.device("cuda" if
|
| 679 |
+
# torch.cuda.is_available() else "cpu"))
|
| 680 |
#
|
| 681 |
#
|
| 682 |
# _,file_ending = os.path.splitext(f'{video_file_path}')
|
|
|
|
| 771 |
# raise RuntimeError("Error Running inference with local model", e)
|
| 772 |
#
|
| 773 |
#
|
| 774 |
+
######################################################################################################################
|
| 775 |
|
| 776 |
|
| 777 |
+
#######################################################################################################################
|
| 778 |
# Summarizers
|
| 779 |
#
|
| 780 |
#
|
|
|
|
| 1065 |
|
| 1066 |
|
| 1067 |
# https://lite.koboldai.net/koboldcpp_api#/api%2Fv1/post_api_v1_generate
|
| 1068 |
+
def summarize_with_kobold(kobold_ip, json_file_path, kobold_token, custom_prompt):
|
| 1069 |
try:
|
| 1070 |
logging.debug("kobold: Loading JSON data")
|
| 1071 |
+
with open(json_file_path, 'r') as file:
|
| 1072 |
segments = json.load(file)
|
| 1073 |
|
| 1074 |
logging.debug(f"kobold: Extracting text from segments file")
|
| 1075 |
text = extract_text_from_segments(segments)
|
| 1076 |
|
| 1077 |
+
# FIXME - API Key generated from copilot...kobold.cpp doesn't mention the header for it either...
|
| 1078 |
headers = {
|
| 1079 |
'accept': 'application/json',
|
| 1080 |
'content-type': 'application/json',
|
| 1081 |
+
'X_API_KEY': kobold_token
|
| 1082 |
}
|
| 1083 |
|
| 1084 |
kobold_prompt = f"{text} \n\n\n\n{custom_prompt}"
|
|
|
|
| 1094 |
|
| 1095 |
logging.debug("kobold: Submitting request to API endpoint")
|
| 1096 |
print("kobold: Submitting request to API endpoint")
|
| 1097 |
+
response = requests.post(kobold_ip, headers=headers, json=data)
|
| 1098 |
response_data = response.json()
|
| 1099 |
logging.debug("kobold: API Response Data: %s", response_data)
|
| 1100 |
|
|
|
|
| 1117 |
|
| 1118 |
|
| 1119 |
# https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API
|
| 1120 |
+
def summarize_with_oobabooga(ooba_ip, json_file_path, ooba_token, custom_prompt):
|
| 1121 |
try:
|
| 1122 |
logging.debug("ooba: Loading JSON data")
|
| 1123 |
+
with open(json_file_path, 'r') as file:
|
| 1124 |
segments = json.load(file)
|
| 1125 |
|
| 1126 |
logging.debug(f"ooba: Extracting text from segments file\n\n\n")
|
| 1127 |
text = extract_text_from_segments(segments)
|
| 1128 |
logging.debug(f"ooba: Finished extracting text from segments file")
|
| 1129 |
|
| 1130 |
+
# FIXME - Add headers for ooba auth
|
| 1131 |
headers = {
|
| 1132 |
'accept': 'application/json',
|
| 1133 |
'content-type': 'application/json',
|
| 1134 |
}
|
| 1135 |
|
| 1136 |
+
# prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a French bakery baking cakes. It
|
| 1137 |
+
# is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are
|
| 1138 |
+
# my favorite." prompt_text += f"\n\n{text}" # Uncomment this line if you want to include the text variable
|
| 1139 |
+
ooba_prompt = f"{text}\n\n\n\n{custom_prompt}"
|
| 1140 |
logging.debug("ooba: Prompt being sent is {ooba_prompt}")
|
| 1141 |
|
| 1142 |
data = {
|
|
|
|
| 1147 |
|
| 1148 |
logging.debug("ooba: Submitting request to API endpoint")
|
| 1149 |
print("ooba: Submitting request to API endpoint")
|
| 1150 |
+
response = requests.post(ooba_ip, headers=headers, json=data, verify=False)
|
| 1151 |
logging.debug("ooba: API Response Data: %s", response)
|
| 1152 |
|
| 1153 |
if response.status_code == 200:
|
|
|
|
| 1175 |
|
| 1176 |
#
|
| 1177 |
#
|
| 1178 |
+
########################################################################################################################
|
| 1179 |
|
| 1180 |
|
| 1181 |
+
#######################################################################################################################
|
| 1182 |
# Gradio UI
|
| 1183 |
#
|
| 1184 |
|
| 1185 |
# Only to be used when configured with Gradio for HF Space
|
| 1186 |
+
def summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt):
|
| 1187 |
logging.debug(f"huggingface: Summarization process starting...")
|
| 1188 |
|
| 1189 |
model = "microsoft/Phi-3-mini-128k-instruct"
|
| 1190 |
API_URL = f"https://api-inference.huggingface.co/models/{model}"
|
| 1191 |
+
headers = {"Authorization": f"Bearer {huggingface_api_key}"}
|
| 1192 |
|
| 1193 |
+
with open(json_file_path, 'r') as file:
|
| 1194 |
segments = json.load(file)
|
| 1195 |
text = ''.join([segment['text'] for segment in segments])
|
| 1196 |
|
| 1197 |
# FIXME adjust max_length and min_length as needed
|
| 1198 |
data = {
|
| 1199 |
+
"inputs": text + "\n\n\n\n" + custom_prompt,
|
| 1200 |
"parameters": {"max_length": 4096, "min_length": 100}
|
| 1201 |
}
|
| 1202 |
|
|
|
|
| 1211 |
response_data = response.json()
|
| 1212 |
wait_time = response_data.get('estimated_time', 10)
|
| 1213 |
return None, f"Model is loading, retrying in {int(wait_time)} seconds..."
|
| 1214 |
+
# FIXME : This is a hack, should be done better
|
| 1215 |
# Sleep before retrying....
|
| 1216 |
+
# time.sleep(wait_time)
|
| 1217 |
|
| 1218 |
+
if huggingface_api_key == "":
|
| 1219 |
+
api_key = os.getenv(HF_TOKEN)
|
| 1220 |
+
logging.debug("HUGGINGFACE API KEY CHECK: " + huggingface_api_key)
|
| 1221 |
try:
|
| 1222 |
logging.debug("huggingface: Loading json data for summarization")
|
| 1223 |
+
with open(json_file_path, 'r') as file:
|
| 1224 |
segments = json.load(file)
|
| 1225 |
|
| 1226 |
logging.debug("huggingface: Extracting text from the segments")
|
| 1227 |
text = ' '.join([segment['text'] for segment in segments])
|
| 1228 |
|
| 1229 |
+
#api_key = os.getenv('HF_TOKEN').replace('"', '')
|
| 1230 |
+
logging.debug("HUGGINGFACE API KEY CHECK #2: " + huggingface_api_key)
|
| 1231 |
|
| 1232 |
logging.debug("huggingface: Submitting request...")
|
| 1233 |
+
logging.debug("huggingface: Printing request headers: %s", headers)
|
| 1234 |
response = requests.post(API_URL, headers=headers, json=data)
|
| 1235 |
|
| 1236 |
if response.status_code == 200:
|
|
|
|
| 1246 |
print(f"Error occurred while processing summary with huggingface: {str(e)}")
|
| 1247 |
return None
|
| 1248 |
|
| 1249 |
+
# FIXME
|
| 1250 |
+
# This is here for gradio authentication
|
| 1251 |
+
# Its just not setup.
|
| 1252 |
+
#def same_auth(username, password):
|
| 1253 |
+
# return username == password
|
| 1254 |
|
| 1255 |
|
| 1256 |
def format_transcription(transcription_result):
|
|
|
|
| 1261 |
return ""
|
| 1262 |
|
| 1263 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1264 |
def format_file_path(file_path):
|
| 1265 |
# Helper function to check file existence and return an appropriate path or message
|
| 1266 |
return file_path if file_path and os.path.exists(file_path) else None
|
|
|
|
| 1300 |
visible=False)
|
| 1301 |
custom_prompt_input = gr.Textbox(
|
| 1302 |
label="Custom Prompt (Customize your summary, or ask a different question)",
|
| 1303 |
+
placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the "
|
| 1304 |
+
"provided text.\nA: Here is a detailed, bulleted list of the key points made in the "
|
| 1305 |
+
"transcribed video and supporting arguments:",
|
| 1306 |
lines=3, visible=True)
|
| 1307 |
offset_input = gr.Number(value=0, label="Offset (Seconds into the video to start transcribing at)",
|
| 1308 |
visible=False)
|
|
|
|
| 1355 |
fn=process_url,
|
| 1356 |
inputs=all_inputs,
|
| 1357 |
outputs=outputs,
|
| 1358 |
+
title="TL/DW: Video Transcription and Summarization with Custom Prompt Support",
|
| 1359 |
+
description="Submit a video URL for transcription and summarization. Ensure you input all necessary "
|
| 1360 |
+
"information including API keys."
|
| 1361 |
)
|
| 1362 |
|
| 1363 |
with gr.Tab("Transcription & Summarization History"):
|
|
|
|
| 1380 |
#
|
| 1381 |
|
| 1382 |
def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False,
|
| 1383 |
+
download_video_flag=False, demo_mode=False, custom_prompt=None, overwrite=False):
|
| 1384 |
if input_path is None and args.user_interface:
|
| 1385 |
return []
|
| 1386 |
start_time = time.monotonic()
|
|
|
|
| 1394 |
elif (info_dict := get_youtube(input_path)) and 'entries' in info_dict:
|
| 1395 |
logging.debug("MAIN: YouTube playlist detected")
|
| 1396 |
print(
|
| 1397 |
+
"\n\nSorry, but playlists aren't currently supported. You can run the following command to generate a "
|
| 1398 |
+
"text file that you can then pass into this script though! (It may not work... playlist support seems "
|
| 1399 |
+
"spotty)" + """\n\n\tpython Get_Playlist_URLs.py <Youtube Playlist URL>\n\n\tThen,\n\n\tpython
|
| 1400 |
+
diarizer.py <playlist text file name>\n\n""")
|
| 1401 |
return
|
| 1402 |
else:
|
| 1403 |
paths = [input_path]
|
|
|
|
| 1411 |
if info_dict:
|
| 1412 |
logging.debug("MAIN: Creating path for video file...")
|
| 1413 |
download_path = create_download_directory(info_dict['title'])
|
| 1414 |
+
logging.debug("MAIN: Path created successfully\n MAIN: Now Downloading video from yt_dlp...")
|
|
|
|
| 1415 |
try:
|
| 1416 |
video_path = download_video(path, download_path, info_dict, download_video_flag)
|
| 1417 |
except RuntimeError as e:
|
|
|
|
| 1442 |
logging.info(f"Transcription complete: {audio_file}")
|
| 1443 |
|
| 1444 |
# Perform summarization based on the specified API
|
| 1445 |
+
logging.debug(f"MAIN: HF: Summarization being performed by HuggingFace")
|
| 1446 |
+
json_file_path = audio_file.replace('.wav', '.segments.json')
|
| 1447 |
+
if api_name == "huggingface":
|
| 1448 |
+
huggingface_api_key = os.getenv('HF_TOKEN').replace('"', '')
|
| 1449 |
+
if huggingface_api_key is None:
|
| 1450 |
+
huggingface_api_key = api_key if api_key else config.get('API', 'huggingface_api_key', fallback=None)
|
| 1451 |
+
try:
|
| 1452 |
+
logging.debug(f"MAIN: Trying to summarize with huggingface")
|
| 1453 |
+
summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt)
|
| 1454 |
+
except requests.exceptions.ConnectionError:
|
| 1455 |
+
requests.status_code = "Connection: "
|
| 1456 |
if api_name and api_key:
|
| 1457 |
logging.debug(f"MAIN: Summarization being performed by {api_name}")
|
| 1458 |
json_file_path = audio_file.replace('.wav', '.segments.json')
|
|
|
|
| 1463 |
summary = summarize_with_openai(openai_api_key, json_file_path, openai_model, custom_prompt)
|
| 1464 |
except requests.exceptions.ConnectionError:
|
| 1465 |
requests.status_code = "Connection: "
|
| 1466 |
+
elif api_name.lower() == "huggingface":
|
| 1467 |
+
huggingface_api_key = os.getenv(HF_TOKEN)
|
| 1468 |
+
if huggingface_api_key is None:
|
| 1469 |
+
huggingface_api_key = api_key if api_key else config.get('API', 'huggingface_api_key', fallback=None)
|
| 1470 |
+
try:
|
| 1471 |
+
logging.debug(f"MAIN: Trying to summarize with huggingface")
|
| 1472 |
+
summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt)
|
| 1473 |
+
except requests.exceptions.ConnectionError:
|
| 1474 |
+
requests.status_code = "Connection: "
|
| 1475 |
elif api_name.lower() == "anthropic":
|
| 1476 |
anthropic_api_key = api_key if api_key else config.get('API', 'anthropic_api_key', fallback=None)
|
| 1477 |
try:
|
|
|
|
| 1517 |
summary = summarize_with_oobabooga(ooba_ip, json_file_path, ooba_token, custom_prompt)
|
| 1518 |
except requests.exceptions.ConnectionError:
|
| 1519 |
requests.status_code = "Connection: "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1520 |
else:
|
| 1521 |
logging.warning(f"Unsupported API: {api_name}")
|
| 1522 |
summary = None
|
|
|
|
| 1528 |
else:
|
| 1529 |
logging.warning(f"Failed to generate summary using {api_name} API")
|
| 1530 |
else:
|
| 1531 |
+
logging.info("MAIN: #2 - No API specified. Summarization will not be performed")
|
| 1532 |
except Exception as e:
|
| 1533 |
logging.error(f"Error processing path: {path}")
|
| 1534 |
logging.error(str(e))
|
| 1535 |
+
continue
|
| 1536 |
# end_time = time.monotonic()
|
| 1537 |
# print("Total program execution time: " + timedelta(seconds=end_time - start_time))
|
| 1538 |
|
|
|
|
| 1544 |
parser.add_argument('input_path', type=str, help='Path or URL of the video', nargs='?')
|
| 1545 |
parser.add_argument('-v', '--video', action='store_true', help='Download the video instead of just the audio')
|
| 1546 |
parser.add_argument('-api', '--api_name', type=str, help='API name for summarization (optional)')
|
| 1547 |
+
parser.add_argument('--overwrite', action='store_true', help='Overwrite existing audio files')
|
| 1548 |
parser.add_argument('-ns', '--num_speakers', type=int, default=2, help='Number of speakers (default: 2)')
|
| 1549 |
parser.add_argument('-wm', '--whisper_model', type=str, default='small.en',
|
| 1550 |
help='Whisper model (default: small.en)')
|
|
|
|
| 1598 |
logging.info(f'API: {args.api_name}')
|
| 1599 |
logging.info('Summarization will be performed.')
|
| 1600 |
else:
|
| 1601 |
+
logging.info('MAIN: #1 No API specified. Summarization will not be performed.')
|
| 1602 |
|
| 1603 |
logging.debug("Platform check being performed...")
|
| 1604 |
platform_check()
|
|
|
|
| 1613 |
try:
|
| 1614 |
results = main(args.input_path, api_name=args.api_name, api_key=args.api_key,
|
| 1615 |
num_speakers=args.num_speakers, whisper_model=args.whisper_model, offset=args.offset,
|
| 1616 |
+
vad_filter=args.vad_filter, download_video_flag=args.video, overwrite=args.overwrite)
|
| 1617 |
logging.info('Transcription process completed.')
|
| 1618 |
except Exception as e:
|
| 1619 |
logging.error('An error occurred during the transcription process.')
|