|  | import os | 
					
						
						|  | import gradio as gr | 
					
						
						|  | from sentence_transformers import SentenceTransformer, CrossEncoder, util | 
					
						
						|  | from transformers import pipeline | 
					
						
						|  | import torch | 
					
						
						|  | import pickle | 
					
						
						|  | import pandas as pd | 
					
						
						|  | import gradio as gr | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h") | 
					
						
						|  | def speech_to_text(speech): | 
					
						
						|  | text = asr(speech)["text"] | 
					
						
						|  | return text | 
					
						
						|  |  | 
					
						
						|  | bi_encoder = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1") | 
					
						
						|  | cross_encoder = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2") | 
					
						
						|  | corpus=pd.read_pickle("corpus.pkl") | 
					
						
						|  | corpus_embeddings=pd.read_pickle("corpus_embeddings_cpu.pkl") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def search(query,top_k=100): | 
					
						
						|  | print("Top 3 Answer by the NSE:") | 
					
						
						|  | print() | 
					
						
						|  | ans=[] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | question_embedding = bi_encoder.encode(query, convert_to_tensor=True) | 
					
						
						|  | hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=top_k) | 
					
						
						|  | hits = hits[0] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | cross_inp = [[query, corpus[hit['corpus_id']]] for hit in hits] | 
					
						
						|  | cross_scores = cross_encoder.predict(cross_inp) | 
					
						
						|  |  | 
					
						
						|  | for idx in range(len(cross_scores)): | 
					
						
						|  | hits[idx]['cross-score'] = cross_scores[idx] | 
					
						
						|  | hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True) | 
					
						
						|  |  | 
					
						
						|  | for idx, hit in enumerate(hits[0:3]): | 
					
						
						|  | ans.append(corpus[hit['corpus_id']]) | 
					
						
						|  | return ans[0],ans[1],ans[2] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | demo = gr.Blocks() | 
					
						
						|  | with demo: | 
					
						
						|  | audio_file = gr.inputs.Audio(source="microphone", type="filepath") | 
					
						
						|  | b1 = gr.Button("Recognize Speech") | 
					
						
						|  | text = gr.Textbox() | 
					
						
						|  | b1.click(speech_to_text, inputs=audio_file, outputs=text) | 
					
						
						|  | b2 = gr.Button("Ask Wiki") | 
					
						
						|  | print(text) | 
					
						
						|  | out1 = gr.Textbox() | 
					
						
						|  | out2 = gr.Textbox() | 
					
						
						|  | out3 = gr.Textbox() | 
					
						
						|  | b2.click(search, inputs=text, outputs=[out1,out2,out3]) | 
					
						
						|  | demo.launch(debug=True) |