Pijush2023 commited on
Commit
f8395c3
·
verified ·
1 Parent(s): ffe372d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -721
app.py CHANGED
@@ -694,718 +694,6 @@
694
  # demo.launch(share=True)
695
 
696
 
697
- # import gradio as gr
698
- # import requests
699
- # import os
700
- # import time
701
- # import re
702
- # import logging
703
- # import tempfile
704
- # import folium
705
- # import concurrent.futures
706
- # import torch
707
- # from PIL import Image
708
- # from datetime import datetime
709
- # from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
710
- # from googlemaps import Client as GoogleMapsClient
711
- # from gtts import gTTS
712
- # from diffusers import StableDiffusionPipeline
713
- # from langchain_openai import OpenAIEmbeddings, ChatOpenAI
714
- # from langchain_pinecone import PineconeVectorStore
715
- # from langchain.prompts import PromptTemplate
716
- # from langchain.chains import RetrievalQA
717
- # from langchain.chains.conversation.memory import ConversationBufferWindowMemory
718
- # from langchain.agents import Tool, initialize_agent
719
- # from huggingface_hub import login
720
- # from transformers.models.speecht5.number_normalizer import EnglishNumberNormalizer
721
- # from parler_tts import ParlerTTSForConditionalGeneration
722
- # from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
723
- # from scipy.io.wavfile import write as write_wav
724
- # from pydub import AudioSegment
725
- # from string import punctuation
726
- # import librosa
727
- # from pathlib import Path
728
- # import torchaudio
729
- # import numpy as np
730
-
731
- # # Check if the token is already set in the environment variables
732
- # hf_token = os.getenv("HF_TOKEN")
733
- # if hf_token is None:
734
- # print("Please set your Hugging Face token in the environment variables.")
735
- # else:
736
- # login(token=hf_token)
737
-
738
- # logging.basicConfig(level=logging.DEBUG)
739
-
740
- # embeddings = OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])
741
-
742
- # from pinecone import Pinecone
743
- # pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
744
-
745
- # index_name = "birmingham-dataset"
746
- # vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
747
- # retriever = vectorstore.as_retriever(search_kwargs={'k': 5})
748
-
749
- # chat_model = ChatOpenAI(api_key=os.environ['OPENAI_API_KEY'], temperature=0, model='gpt-4o')
750
-
751
- # conversational_memory = ConversationBufferWindowMemory(
752
- # memory_key='chat_history',
753
- # k=10,
754
- # return_messages=True
755
- # )
756
-
757
- # def get_current_time_and_date():
758
- # now = datetime.now()
759
- # return now.strftime("%Y-%m-%d %H:%M:%S")
760
-
761
- # current_time_and_date = get_current_time_and_date()
762
-
763
- # def fetch_local_events():
764
- # api_key = os.environ['SERP_API']
765
- # url = f'https://serpapi.com/search.json?engine=google_events&q=Events+in+Birmingham&hl=en&gl=us&api_key={api_key}'
766
- # response = requests.get(url)
767
- # if response.status_code == 200:
768
- # events_results = response.json().get("events_results", [])
769
- # events_html = """
770
- # <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Local Events</h2>
771
- # <style>
772
- # table {
773
- # font-family: 'Verdana', sans-serif;
774
- # color: #333;
775
- # border-collapse: collapse;
776
- # width: 100%;
777
- # }
778
- # th, td {
779
- # border: 1px solid #fff !important;
780
- # padding: 8px;
781
- # }
782
- # th {
783
- # background-color: #f2f2f2;
784
- # color: #333;
785
- # text-align: left;
786
- # }
787
- # tr:hover {
788
- # background-color: #f5f5f5;
789
- # }
790
- # .event-link {
791
- # color: #1E90FF;
792
- # text-decoration: none;
793
- # }
794
- # .event-link:hover {
795
- # text-decoration: underline;
796
- # }
797
- # </style>
798
- # <table>
799
- # <tr>
800
- # <th>Title</th>
801
- # <th>Date and Time</th>
802
- # <th>Location</th>
803
- # </tr>
804
- # """
805
- # for event in events_results:
806
- # title = event.get("title", "No title")
807
- # date_info = event.get("date", {})
808
- # date = f"{date_info.get('start_date', '')} {date_info.get('when', '')}".replace("{", "").replace("}", "")
809
- # location = event.get("address", "No location")
810
- # if isinstance(location, list):
811
- # location = " ".join(location)
812
- # location = location.replace("[", "").replace("]", "")
813
- # link = event.get("link", "#")
814
- # events_html += f"""
815
- # <tr>
816
- # <td><a class='event-link' href='{link}' target='_blank'>{title}</a></td>
817
- # <td>{date}</td>
818
- # <td>{location}</td>
819
- # </tr>
820
- # """
821
- # events_html += "</table>"
822
- # return events_html
823
- # else:
824
- # return "<p>Failed to fetch local events</p>"
825
-
826
- # def fetch_local_weather():
827
- # try:
828
- # api_key = os.environ['WEATHER_API']
829
- # url = f'https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/birmingham?unitGroup=metric&include=events%2Calerts%2Chours%2Cdays%2Ccurrent&key={api_key}'
830
- # response = requests.get(url)
831
- # response.raise_for_status()
832
- # jsonData = response.json()
833
-
834
- # current_conditions = jsonData.get("currentConditions", {})
835
- # temp_celsius = current_conditions.get("temp", "N/A")
836
-
837
- # if temp_celsius != "N/A":
838
- # temp_fahrenheit = int((temp_celsius * 9/5) + 32)
839
- # else:
840
- # temp_fahrenheit = "N/A"
841
-
842
- # condition = current_conditions.get("conditions", "N/A")
843
- # humidity = current_conditions.get("humidity", "N/A")
844
-
845
- # weather_html = f"""
846
- # <div class="weather-theme">
847
- # <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Local Weather</h2>
848
- # <div class="weather-content">
849
- # <div class="weather-icon">
850
- # <img src="https://www.weatherbit.io/static/img/icons/{get_weather_icon(condition)}.png" alt="{condition}" style="width: 100px; height: 100px;">
851
- # </div>
852
- # <div class="weather-details">
853
- # <p style="font-family: 'Verdana', sans-serif; color: #333; font-size: 1.2em;">Temperature: {temp_fahrenheit}°F</p>
854
- # <p style="font-family: 'Verdana', sans-serif; color: #333; font-size: 1.2em;">Condition: {condition}</p>
855
- # <p style="font-family: 'Verdana', sans-serif; color: #333; font-size: 1.2em;">Humidity: {humidity}%</p>
856
- # </div>
857
- # </div>
858
- # </div>
859
- # <style>
860
- # .weather-theme {{
861
- # animation: backgroundAnimation 10s infinite alternate;
862
- # border-radius: 10px;
863
- # padding: 10px;
864
- # margin-bottom: 15px;
865
- # background: linear-gradient(45deg, #ffcc33, #ff6666, #ffcc33, #ff6666);
866
- # background-size: 400% 400%;
867
- # box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
868
- # transition: box-shadow 0.3s ease, background-color 0.3s ease;
869
- # }}
870
- # .weather-theme:hover {{
871
- # box-shadow: 0 8px 16px rgba(0, 0, 0, 0.2);
872
- # background-position: 100% 100%;
873
- # }}
874
- # @keyframes backgroundAnimation {{
875
- # 0% {{ background-position: 0% 50%; }}
876
- # 100% {{ background-position: 100% 50%; }}
877
- # }}
878
- # .weather-content {{
879
- # display: flex;
880
- # align-items: center;
881
- # }}
882
- # .weather-icon {{
883
- # flex: 1;
884
- # }}
885
- # .weather-details {{
886
- # flex 3;
887
- # }}
888
- # </style>
889
- # """
890
- # return weather_html
891
- # except requests.exceptions.RequestException as e:
892
- # return f"<p>Failed to fetch local weather: {e}</p>"
893
-
894
- # def get_weather_icon(condition):
895
- # condition_map = {
896
- # "Clear": "c01d",
897
- # "Partly Cloudy": "c02d",
898
- # "Cloudy": "c03d",
899
- # "Overcast": "c04d",
900
- # "Mist": "a01d",
901
- # "Patchy rain possible": "r01d",
902
- # "Light rain": "r02d",
903
- # "Moderate rain": "r03d",
904
- # "Heavy rain": "r04d",
905
- # "Snow": "s01d",
906
- # "Thunderstorm": "t01d",
907
- # "Fog": "a05d",
908
- # }
909
- # return condition_map.get(condition, "c04d")
910
-
911
- # template1 = """You are an expert concierge who is helpful and a renowned guide for Birmingham,Alabama. Based on weather being a sunny bright day and the today's date is 1st july 2024, use the following pieces of context,
912
- # memory, and message history, along with your knowledge of perennial events in Birmingham,Alabama, to answer the question at the end. If you don't know the answer, just say "Homie, I need to get more data for this," and don't try to make up an answer.
913
- # Use fifteen sentences maximum. Keep the answer as detailed as possible. Always include the address, time, date, and
914
- # event type and description.And also add this Birmingham,AL at the end of each address, Always say "It was my pleasure!" at the end of the answer.
915
- # {context}
916
- # Question: {question}
917
- # Helpful Answer:"""
918
-
919
- # template2 = """You are an expert concierge who is helpful and a renowned guide for Birmingham,Alabama. Based on today's weather being a sunny bright day and today's date is 16th july 2024, take the location or address but don't show the location or address on the output prompts. Use the following pieces of context,
920
- # memory, and message history, along with your knowledge of perennial events in Birmingham,Alabama, to answer the question at the end. If you don't know the answer, just say "Homie, I need to get more data for this," and don't try to make up an answer.
921
- # Keep the answer short ,sweet and crisp and in one shot. Always say "It was my pleasure!" at the end of the answer.
922
- # {context}
923
- # Question: {question}
924
- # Helpful Answer:"""
925
-
926
- # QA_CHAIN_PROMPT_1 = PromptTemplate(input_variables=["context", "question"], template=template1)
927
- # QA_CHAIN_PROMPT_2 = PromptTemplate(input_variables=["context", "question"], template=template2)
928
-
929
- # def build_qa_chain(prompt_template):
930
- # qa_chain = RetrievalQA.from_chain_type(
931
- # llm=chat_model,
932
- # chain_type="stuff",
933
- # retriever=retriever,
934
- # chain_type_kwargs={"prompt": prompt_template}
935
- # )
936
- # tools = [
937
- # Tool(
938
- # name='Knowledge Base',
939
- # func=qa_chain,
940
- # description='Use this tool when answering general knowledge queries to get more information about the topic'
941
- # )
942
- # ]
943
- # return qa_chain, tools
944
-
945
- # def initialize_agent_with_prompt(prompt_template):
946
- # qa_chain, tools = build_qa_chain(prompt_template)
947
- # agent = initialize_agent(
948
- # agent='chat-conversational-react-description',
949
- # tools=tools,
950
- # llm=chat_model,
951
- # verbose=False,
952
- # max_iteration=5,
953
- # early_stopping_method='generate',
954
- # memory=conversational_memory
955
- # )
956
- # return agent
957
-
958
- # def generate_answer(message, choice):
959
- # logging.debug(f"generate_answer called with prompt_choice: {choice}")
960
-
961
- # if choice == "Details":
962
- # agent = initialize_agent_with_prompt(QA_CHAIN_PROMPT_1)
963
- # elif choice == "Conversational":
964
- # agent = initialize_agent_with_prompt(QA_CHAIN_PROMPT_2)
965
- # else:
966
- # logging.error(f"Invalid prompt_choice: {choice}. Defaulting to 'Conversational'")
967
- # agent = initialize_agent_with_prompt(QA_CHAIN_PROMPT_2)
968
- # response = agent(message)
969
-
970
- # addresses = extract_addresses(response['output'])
971
- # return response['output'], addresses
972
-
973
- # def bot(history, choice, tts_choice, state):
974
- # if not history:
975
- # return history
976
- # response, addresses = generate_answer(history[-1][0], choice)
977
- # history[-1][1] = ""
978
-
979
- # with concurrent.futures.ThreadPoolExecutor() as executor:
980
- # if tts_choice == "Alpha":
981
- # audio_future = executor.submit(generate_audio_elevenlabs, response)
982
- # elif tts_choice == "Beta":
983
- # audio_future = executor.submit(generate_audio_parler_tts, response)
984
- # elif tts_choice == "Gamma":
985
- # audio_future = executor.submit(generate_audio_mars5, response)
986
-
987
- # for character in response:
988
- # history[-1][1] += character
989
- # time.sleep(0.05)
990
- # yield history, None
991
-
992
- # audio_path = audio_future.result()
993
- # yield history, audio_path
994
-
995
- # def add_message(history, message):
996
- # history.append((message, None))
997
- # return history, gr.Textbox(value="", interactive=True, placeholder="Enter message or upload file...", show_label=False)
998
-
999
- # def print_like_dislike(x: gr.LikeData):
1000
- # print(x.index, x.value, x.liked)
1001
-
1002
- # def extract_addresses(response):
1003
- # if not isinstance(response, str):
1004
- # response = str(response)
1005
- # address_patterns = [
1006
- # r'([A-Z].*,\sBirmingham,\sAL\s\d{5})',
1007
- # r'(\d{4}\s.*,\sBirmingham,\sAL\s\d{5})',
1008
- # r'([A-Z].*,\sAL\s\d{5})',
1009
- # r'([A-Z].*,.*\sSt,\sBirmingham,\sAL\s\d{5})',
1010
- # r'([A-Z].*,.*\sStreets,\sBirmingham,\sAL\s\d{5})',
1011
- # r'(\d{2}.*\sStreets)',
1012
- # r'([A-Z].*\s\d{2},\sBirmingham,\sAL\s\d{5})',
1013
- # r'([a-zA-Z]\s Birmingham)',
1014
- # r'([a-zA-Z].*,\sBirmingham,\sAL)',
1015
- # r'(^Birmingham,AL$)'
1016
- # ]
1017
- # addresses = []
1018
- # for pattern in address_patterns:
1019
- # addresses.extend(re.findall(pattern, response))
1020
- # return addresses
1021
-
1022
- # all_addresses = []
1023
-
1024
- # def generate_map(location_names):
1025
- # global all_addresses
1026
- # all_addresses.extend(location_names)
1027
-
1028
- # api_key = os.environ['GOOGLEMAPS_API_KEY']
1029
- # gmaps = GoogleMapsClient(key=api_key)
1030
-
1031
- # m = folium.Map(location=[33.5175, -86.809444], zoom_start=12)
1032
-
1033
- # for location_name in all_addresses:
1034
- # geocode_result = gmaps.geocode(location_name)
1035
- # if geocode_result:
1036
- # location = geocode_result[0]['geometry']['location']
1037
- # folium.Marker(
1038
- # [location['lat'], location['lng']],
1039
- # tooltip=f"{geocode_result[0]['formatted_address']}"
1040
- # ).add_to(m)
1041
-
1042
- # map_html = m._repr_html_()
1043
- # return map_html
1044
-
1045
- # def fetch_local_news():
1046
- # api_key = os.environ['SERP_API']
1047
- # url = f'https://serpapi.com/search.json?engine=google_news&q=birmingham headline&api_key={api_key}'
1048
- # response = requests.get(url)
1049
- # if response.status_code == 200:
1050
- # results = response.json().get("news_results", [])
1051
- # news_html = """
1052
- # <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Birmingham Today</h2>
1053
- # <style>
1054
- # .news-item {
1055
- # font-family: 'Verdana', sans-serif;
1056
- # color: #333;
1057
- # background-color: #f0f8ff;
1058
- # margin-bottom: 15px;
1059
- # padding: 10px;
1060
- # border-radius: 5px;
1061
- # transition: box-shadow 0.3s ease, background-color 0.3s ease;
1062
- # font-weight: bold;
1063
- # }
1064
- # .news-item:hover {
1065
- # box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
1066
- # background-color: #e6f7ff;
1067
- # }
1068
- # .news-item a {
1069
- # color: #1E90FF;
1070
- # text-decoration: none;
1071
- # font-weight: bold;
1072
- # }
1073
- # .news-item a:hover {
1074
- # text-decoration: underline;
1075
- # }
1076
- # .news-preview {
1077
- # position: absolute;
1078
- # display: none;
1079
- # border: 1px solid #ccc;
1080
- # border-radius: 5px;
1081
- # box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
1082
- # background-color: white;
1083
- # z-index: 1000;
1084
- # max-width: 300px;
1085
- # padding: 10px;
1086
- # font-family: 'Verdana', sans-serif;
1087
- # color: #333;
1088
- # }
1089
- # </style>
1090
- # <script>
1091
- # function showPreview(event, previewContent) {
1092
- # var previewBox = document.getElementById('news-preview');
1093
- # previewBox.innerHTML = previewContent;
1094
- # previewBox.style.left = event.pageX + 'px';
1095
- # previewBox.style.top = event.pageY + 'px';
1096
- # previewBox.style.display = 'block';
1097
- # }
1098
- # function hidePreview() {
1099
- # var previewBox = document.getElementById('news-preview');
1100
- # previewBox.style.display = 'none';
1101
- # }
1102
- # </script>
1103
- # <div id="news-preview" class="news-preview"></div>
1104
- # """
1105
- # for index, result in enumerate(results[:7]):
1106
- # title = result.get("title", "No title")
1107
- # link = result.get("link", "#")
1108
- # snippet = result.get("snippet", "")
1109
- # news_html += f"""
1110
- # <div class="news-item" onmouseover="showPreview(event, '{snippet}')" onmouseout="hidePreview()">
1111
- # <a href='{link}' target='_blank'>{index + 1}. {title}</a>
1112
- # <p>{snippet}</p>
1113
- # </div>
1114
- # """
1115
- # return news_html
1116
- # else:
1117
- # return "<p>Failed to fetch local news</p>"
1118
-
1119
- # import numpy as np
1120
- # import torch
1121
- # from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
1122
-
1123
- # model_id = 'openai/whisper-large-v3'
1124
- # device = "cuda:0" if torch.cuda.is_available() else "cpu"
1125
- # torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
1126
- # model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
1127
- # processor = AutoProcessor.from_pretrained(model_id)
1128
-
1129
- # pipe_asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, max_new_tokens=128, chunk_length_s=15, batch_size=16, torch_dtype=torch_dtype, device=device, return_timestamps=True)
1130
-
1131
- # base_audio_drive = "/data/audio"
1132
-
1133
- # def transcribe_function(stream, new_chunk):
1134
- # try:
1135
- # sr, y = new_chunk[0], new_chunk[1]
1136
- # except TypeError:
1137
- # print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
1138
- # return stream, "", None
1139
-
1140
- # y = y.astype(np.float32) / np.max(np.abs(y))
1141
-
1142
- # if stream is not None:
1143
- # stream = np.concatenate([stream, y])
1144
- # else:
1145
- # stream = y
1146
-
1147
- # result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
1148
-
1149
- # full_text = result.get("text","")
1150
-
1151
- # return stream, full_text, result
1152
-
1153
- # def update_map_with_response(history):
1154
- # if not history:
1155
- # return ""
1156
- # response = history[-1][1]
1157
- # addresses = extract_addresses(response)
1158
- # return generate_map(addresses)
1159
-
1160
- # def clear_textbox():
1161
- # return ""
1162
-
1163
- # def show_map_if_details(history, choice):
1164
- # if choice in ["Details", "Conversational"]:
1165
- # return gr.update(visible=True), update_map_with_response(history)
1166
- # else:
1167
- # return gr.update(visible=False), ""
1168
-
1169
- # def generate_audio_elevenlabs(text):
1170
- # XI_API_KEY = os.environ['ELEVENLABS_API']
1171
- # VOICE_ID = 'd9MIrwLnvDeH7aZb61E9'
1172
- # tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
1173
- # headers = {
1174
- # "Accept": "application/json",
1175
- # "xi-api-key": XI_API_KEY
1176
- # }
1177
- # data = {
1178
- # "text": str(text),
1179
- # "model_id": "eleven_multilingual_v2",
1180
- # "voice_settings": {
1181
- # "stability": 1.0,
1182
- # "similarity_boost": 0.0,
1183
- # "style": 0.60,
1184
- # "use_speaker_boost": False
1185
- # }
1186
- # }
1187
- # response = requests.post(tts_url, headers=headers, json=data, stream=True)
1188
- # if response.ok:
1189
- # with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
1190
- # for chunk in response.iter_content(chunk_size=1024):
1191
- # f.write(chunk)
1192
- # temp_audio_path = f.name
1193
- # logging.debug(f"Audio saved to {temp_audio_path}")
1194
- # return temp_audio_path
1195
- # else:
1196
- # logging.error(f"Error generating audio: {response.text}")
1197
- # return None
1198
-
1199
- # repo_id = "parler-tts/parler-tts-mini-expresso"
1200
-
1201
- # parler_model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
1202
- # parler_tokenizer = AutoTokenizer.from_pretrained(repo_id)
1203
- # parler_feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id)
1204
-
1205
- # SAMPLE_RATE = parler_feature_extractor.sampling_rate
1206
- # SEED = 42
1207
-
1208
- # def preprocess(text):
1209
- # number_normalizer = EnglishNumberNormalizer()
1210
- # text = number_normalizer(text).strip()
1211
- # if text[-1] not in punctuation:
1212
- # text = f"{text}."
1213
-
1214
- # abbreviations_pattern = r'\b[A-Z][A-Z\.]+\b'
1215
-
1216
- # def separate_abb(chunk):
1217
- # chunk = chunk.replace(".", "")
1218
- # return " ".join(chunk)
1219
-
1220
- # abbreviations = re.findall(abbreviations_pattern, text)
1221
- # for abv in abbreviations:
1222
- # if abv in text:
1223
- # text = text.replace(abv, separate_abb(abv))
1224
- # return text
1225
-
1226
- # def chunk_text(text, max_length=250):
1227
- # words = text.split()
1228
- # chunks = []
1229
- # current_chunk = []
1230
- # current_length = 0
1231
-
1232
- # for word in words:
1233
- # if current_length + len(word) + 1 <= max_length:
1234
- # current_chunk.append(word)
1235
- # current_length += len(word) + 1
1236
- # else:
1237
- # chunks.append(' '.join(current_chunk))
1238
- # current_chunk = [word]
1239
- # current_length = len(word) + 1
1240
-
1241
- # if current_chunk:
1242
- # chunks.append(' '.join(current_chunk))
1243
-
1244
- # return chunks
1245
-
1246
- # def generate_audio_parler_tts(text):
1247
- # description = "Thomas speaks with emphasis and excitement at a moderate pace with high quality."
1248
- # chunks = chunk_text(preprocess(text))
1249
- # audio_segments = []
1250
-
1251
- # for chunk in chunks:
1252
- # inputs = parler_tokenizer(description, return_tensors="pt").to(device)
1253
- # prompt = parler_tokenizer(chunk, return_tensors="pt").to(device)
1254
-
1255
- # set_seed(SEED)
1256
- # generation = parler_model.generate(input_ids=inputs.input_ids, prompt_input_ids=prompt.input_ids)
1257
- # audio_arr = generation.cpu().numpy().squeeze()
1258
-
1259
- # temp_audio_path = os.path.join(tempfile.gettempdir(), f"parler_tts_audio_{len(audio_segments)}.wav")
1260
- # write_wav(temp_audio_path, SAMPLE_RATE, audio_arr)
1261
- # audio_segments.append(AudioSegment.from_wav(temp_audio_path))
1262
-
1263
- # combined_audio = sum(audio_segments)
1264
- # combined_audio_path = os.path.join(tempfile.gettempdir(), "parler_tts_combined_audio.wav")
1265
- # combined_audio.export(combined_audio_path, format="wav")
1266
-
1267
- # logging.debug(f"Audio saved to {combined_audio_path}")
1268
- # return combined_audio_path
1269
-
1270
- # # Load the MARS5 model
1271
- # mars5, config_class = torch.hub.load('Camb-ai/mars5-tts', 'mars5_english', trust_repo=True)
1272
-
1273
- # def generate_audio_mars5(text):
1274
- # description = "Thomas speaks with emphasis and excitement at a moderate pace with high quality."
1275
- # kwargs_dict = {
1276
- # 'temperature': 0.2,
1277
- # 'top_k': -1,
1278
- # 'top_p': 0.2,
1279
- # 'typical_p': 1.0,
1280
- # 'freq_penalty': 2.6,
1281
- # 'presence_penalty': 0.4,
1282
- # 'rep_penalty_window': 100,
1283
- # 'max_prompt_phones': 360,
1284
- # 'deep_clone': True,
1285
- # 'nar_guidance_w': 3
1286
- # }
1287
-
1288
- # chunks = chunk_text(preprocess(text))
1289
- # audio_segments = []
1290
-
1291
- # for chunk in chunks:
1292
- # wav = torch.zeros(1, mars5.sr) # Use a placeholder silent audio for the reference
1293
- # cfg = config_class(**{k: kwargs_dict[k] for k in kwargs_dict if k in config_class.__dataclass_fields__})
1294
- # ar_codes, wav_out = mars5.tts(chunk, wav, "", cfg=cfg)
1295
-
1296
- # temp_audio_path = os.path.join(tempfile.gettempdir(), f"mars5_audio_{len(audio_segments)}.wav")
1297
- # torchaudio.save(temp_audio_path, wav_out.unsqueeze(0), mars5.sr)
1298
- # audio_segments.append(AudioSegment.from_wav(temp_audio_path))
1299
-
1300
- # combined_audio = sum(audio_segments)
1301
- # combined_audio_path = os.path.join(tempfile.gettempdir(), "mars5_combined_audio.wav")
1302
- # combined_audio.export(combined_audio_path, format="wav")
1303
-
1304
- # logging.debug(f"Audio saved to {combined_audio_path}")
1305
- # return combined_audio_path
1306
-
1307
- # pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2", torch_dtype=torch.float16)
1308
- # pipe.to(device)
1309
-
1310
- # def generate_image(prompt):
1311
- # with torch.cuda.amp.autocast():
1312
- # image = pipe(
1313
- # prompt,
1314
- # num_inference_steps=28,
1315
- # guidance_scale=3.0,
1316
- # ).images[0]
1317
- # return image
1318
-
1319
- # hardcoded_prompt_1 = "Give a high quality photograph of a great looking red 2026 Toyota coupe against a skyline setting in the night, michael mann style in omaha enticing the consumer to buy this product"
1320
- # hardcoded_prompt_2 = "A vibrant and dynamic football game scene in the style of Peter Paul Rubens, showcasing the intense match between Alabama and Nebraska. The players are depicted with the dramatic, muscular physiques and expressive faces typical of Rubens' style. The Alabama team is wearing their iconic crimson and white uniforms, while the Nebraska team is in their classic red and white attire. The scene is filled with action, with players in mid-motion, tackling, running, and catching the ball. The background features a grand stadium filled with cheering fans, banners, and the natural landscape in the distance. The colors are rich and vibrant, with a strong use of light and shadow to create depth and drama. The overall atmosphere captures the intensity and excitement of the game, infused with the grandeur and dynamism characteristic of Rubens' work."
1321
- # hardcoded_prompt_3 = "Create a high-energy scene of a DJ performing on a large stage with vibrant lights, colorful lasers, a lively dancing crowd, and various electronic equipment in the background."
1322
-
1323
- # def update_images():
1324
- # image_1 = generate_image(hardcoded_prompt_1)
1325
- # image_2 = generate_image(hardcoded_prompt_2)
1326
- # image_3 = generate_image(hardcoded_prompt_3)
1327
- # return image_1, image_2, image_3
1328
-
1329
- # def clear_state_and_textbox():
1330
- # conversational_memory.clear()
1331
- # return ""
1332
-
1333
- # def transcribe_and_update_textbox(audio, chat_input):
1334
- # transcribed_text = transcribe(audio)
1335
- # # return "",transcribed_text
1336
- # return transcribed_text
1337
-
1338
- # def transcribe_function_whisper(audio):
1339
- # sr, y = audio
1340
- # y = y.astype(np.float32)
1341
- # y /= np.max(np.abs(y))
1342
- # result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
1343
- # full_text = result.get("text", "")
1344
- # return full_text
1345
-
1346
- # with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
1347
- # with gr.Row():
1348
- # with gr.Column():
1349
- # state = gr.State()
1350
-
1351
- # chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
1352
- # choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
1353
-
1354
- # gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
1355
-
1356
- # chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!", placeholder="After Prompt, click Retriever Only")
1357
- # chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input], api_name="voice_query")
1358
- # tts_choice = gr.Radio(label="Select TTS System", choices=["Alpha", "Beta", "Gamma"], value="Alpha")
1359
- # retriever_button = gr.Button("Retriever")
1360
-
1361
- # gr.Markdown("<h1 style='color: red;'>Radar Map</h1>", elem_id="Map-Radar")
1362
- # location_output = gr.HTML()
1363
- # retriever_button.click(fn=add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input]).then(
1364
- # fn=bot, inputs=[chatbot, choice, tts_choice, state], outputs=[chatbot, gr.Audio(interactive=False, autoplay=True)], api_name="Ask_Retriever").then(
1365
- # fn=show_map_if_details, inputs=[chatbot, choice], outputs=[location_output, location_output], api_name="map_finder").then(
1366
- # fn=clear_state_and_textbox, inputs=[], outputs=[chat_input]
1367
- # )
1368
-
1369
- # bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot], api_name="generate_voice_response")
1370
- # bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
1371
- # chatbot.like(print_like_dislike, None, None)
1372
- # clear_button = gr.Button("Clear")
1373
- # clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
1374
-
1375
- # # Recorder section
1376
-
1377
- # gr.Markdown("<h2>Hey Radar</h2>")
1378
- # audio_input = gr.Audio(sources=["microphone"], type='numpy')
1379
- # transcribe_button = gr.Button("Transcribe")
1380
- # transcribe_button.click(fn=transcribe_and_update_textbox, inputs=[audio_input], outputs=[chat_input],api_name="voice_to_text")
1381
-
1382
-
1383
-
1384
- # # Streaming ASR component
1385
- # gr.Markdown("<h2>Hey Radar ASR</h2>")
1386
- # stream_audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=True)
1387
- # stream_transcription = gr.State(None) # Initialize stream state
1388
- # stream_audio_input.change(transcribe_function, inputs=[stream_transcription, stream_audio_input], outputs=[stream_transcription, chat_input])
1389
-
1390
- # with gr.Column():
1391
- # weather_output = gr.HTML(value=fetch_local_weather())
1392
- # news_output = gr.HTML(value=fetch_local_news())
1393
- # news_output = gr.HTML(value=fetch_local_events())
1394
-
1395
- # with gr.Column():
1396
- # image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
1397
- # image_output_2 = gr.Image(value=generate_image(hardcoded_prompt_2), width=400, height=400)
1398
- # image_output_3 = gr.Image(value=generate_image(hardcoded_prompt_3), width=400, height=400)
1399
-
1400
- # refresh_button = gr.Button("Refresh Images")
1401
- # refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3])
1402
- # # location_output = gr.HTML()
1403
- # bot_msg.then(show_map_if_details, [chatbot, choice], [location_output, location_output], api_name="map_finder")
1404
-
1405
- # demo.queue()
1406
- # demo.launch(share=True)
1407
-
1408
-
1409
  import gradio as gr
1410
  import requests
1411
  import os
@@ -1595,7 +883,7 @@ def fetch_local_weather():
1595
  flex: 1;
1596
  }}
1597
  .weather-details {{
1598
- flex: 3;
1599
  }}
1600
  </style>
1601
  """
@@ -1936,7 +1224,7 @@ def preprocess(text):
1936
  return text
1937
 
1938
  def chunk_text(text, max_length=250):
1939
- words = text split()
1940
  chunks = []
1941
  current_chunk = []
1942
  current_length = 0
@@ -2082,7 +1370,7 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
2082
  bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
2083
  chatbot.like(print_like_dislike, None, None)
2084
  clear_button = gr.Button("Clear")
2085
- clear_button.click(lambda: [None, ""], outputs=[stream_transcription, chat_input])
2086
 
2087
  # Recorder section
2088
 
@@ -2091,18 +1379,18 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
2091
  transcribe_button = gr.Button("Transcribe")
2092
  transcribe_button.click(fn=transcribe_and_update_textbox, inputs=[audio_input], outputs=[chat_input],api_name="voice_to_text")
2093
 
2094
-
2095
 
2096
  # Streaming ASR component
2097
- gr.Markdown("<h2>Streaming ASR</h2>")
2098
  stream_audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=True)
2099
  stream_transcription = gr.State(None) # Initialize stream state
2100
  stream_audio_input.change(transcribe_function, inputs=[stream_transcription, stream_audio_input], outputs=[stream_transcription, chat_input])
2101
 
2102
- with gr.Column():
2103
- weather_output = gr.HTML(value=fetch_local_weather())
2104
- news_output = gr.HTML(value=fetch_local_news())
2105
- news_output = gr.HTML(value=fetch_local_events())
2106
 
2107
  with gr.Column():
2108
  image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
@@ -2118,3 +1406,5 @@ demo.queue()
2118
  demo.launch(share=True)
2119
 
2120
 
 
 
 
694
  # demo.launch(share=True)
695
 
696
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
697
  import gradio as gr
698
  import requests
699
  import os
 
883
  flex: 1;
884
  }}
885
  .weather-details {{
886
+ flex 3;
887
  }}
888
  </style>
889
  """
 
1224
  return text
1225
 
1226
  def chunk_text(text, max_length=250):
1227
+ words = text.split()
1228
  chunks = []
1229
  current_chunk = []
1230
  current_length = 0
 
1370
  bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
1371
  chatbot.like(print_like_dislike, None, None)
1372
  clear_button = gr.Button("Clear")
1373
+ clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
1374
 
1375
  # Recorder section
1376
 
 
1379
  transcribe_button = gr.Button("Transcribe")
1380
  transcribe_button.click(fn=transcribe_and_update_textbox, inputs=[audio_input], outputs=[chat_input],api_name="voice_to_text")
1381
 
1382
+
1383
 
1384
  # Streaming ASR component
1385
+ gr.Markdown("<h2>Hey Radar ASR</h2>")
1386
  stream_audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=True)
1387
  stream_transcription = gr.State(None) # Initialize stream state
1388
  stream_audio_input.change(transcribe_function, inputs=[stream_transcription, stream_audio_input], outputs=[stream_transcription, chat_input])
1389
 
1390
+ # with gr.Column():
1391
+ # weather_output = gr.HTML(value=fetch_local_weather())
1392
+ # news_output = gr.HTML(value=fetch_local_news())
1393
+ # news_output = gr.HTML(value=fetch_local_events())
1394
 
1395
  with gr.Column():
1396
  image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
 
1406
  demo.launch(share=True)
1407
 
1408
 
1409
+
1410
+