import os import json import re import html import streamlit as st import plotly.graph_objects as go from google.cloud import storage from google.oauth2 import service_account from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer from scipy.ndimage import gaussian_filter1d from datetime import timedelta, datetime gcp_credentials = os.getenv('GCP_CREDENTIALS') credentials_dict = json.loads(gcp_credentials) creds = service_account.Credentials.from_service_account_info(credentials_dict) client = storage.Client(credentials=creds) bucket_name = "kapnotes" bucket = client.bucket(bucket_name) st.set_page_config(page_title="Kap Notes", layout="wide") def get_client_names(): blobs = list(bucket.list_blobs(prefix="")) client_names = set() for blob in blobs: client_name = blob.name.split("/")[0] client_names.add(client_name) return sorted(client_names) def validate_data(client_name, date, meeting): summary_blob_name = f"{client_name}/{date}/{meeting}/summary.txt" transcription_blob_name = f"{client_name}/{date}/{meeting}/transcription.txt" audio_blob_name = f"{client_name}/{date}/{meeting}/audio.wav" summary_blob = bucket.blob(summary_blob_name) transcription_blob = bucket.blob(transcription_blob_name) audio_blob = bucket.blob(audio_blob_name) return summary_blob.exists() and transcription_blob.exists() and audio_blob.exists() def get_meetings_for_date(client_name, date): prefix = f"{client_name}/{date}/" blobs = list(bucket.list_blobs(prefix=prefix)) meetings = set() for blob in blobs: parts = blob.name.split("/") if len(parts) > 2 and parts[2]: meetings.add(parts[2]) return sorted(meetings) def get_dates_for_client(client_name): prefix = f"{client_name}/" blobs = list(bucket.list_blobs(prefix=prefix)) dates = set() for blob in blobs: parts = blob.name.split("/") if len(parts) > 1 and parts[1]: dates.add(parts[1]) return sorted(dates) def login(): st.markdown(""" """, unsafe_allow_html=True) if 'password' not in st.session_state: st.session_state.password = "" st.markdown("

KAP NOTES

", unsafe_allow_html=True) client_names = get_client_names() client_name = st.selectbox("Select Client", client_names) if client_name: available_dates = get_dates_for_client(client_name) selected_date = st.selectbox(f"Available Dates for {client_name}", available_dates) if selected_date: available_meetings = get_meetings_for_date(client_name, selected_date) selected_meeting = st.selectbox(f"Available Meetings for {selected_date}", available_meetings) password = st.text_input("Enter Password", type="password", value=st.session_state.password) sign_in_button = st.button("Sign In", key="sign_in") if sign_in_button: if password == "kapnotes12345": if validate_data(client_name, selected_date, selected_meeting): st.session_state.client_name = client_name st.session_state.date = selected_date st.session_state.meeting = selected_meeting st.session_state.logged_in = True st.session_state.password = password st.rerun() else: st.error(f"No records available for {client_name} on {selected_date}. Please select another option.") elif not password: st.error("Please enter password.") else: st.error("Incorrect Password. Please try again.") st.session_state.password = password if 'logged_in' not in st.session_state: st.session_state.logged_in = False if not st.session_state.logged_in: login() else: client_name = st.session_state.client_name date = st.session_state.date meeting = st.session_state.meeting password= st.session_state.password if st.sidebar.button("Back"): st.session_state.logged_in = False st.rerun() st.sidebar.markdown(f'''
{client_name}
''', unsafe_allow_html=True) css = ''' ''' st.markdown(css, unsafe_allow_html=True) summary_blob_name = f"{client_name}/{date}/{meeting}/summary.txt" transcription_blob_name = f"{client_name}/{date}/{meeting}/transcription.txt" audio_blob_name = f"{client_name}/{date}/{meeting}/audio.wav" bucket = client.bucket(bucket_name) summary_blob = bucket.blob(summary_blob_name) summary_content = summary_blob.download_as_text() audio_blob = bucket.blob(audio_blob_name) audio_url = audio_blob.generate_signed_url(expiration=timedelta(hours=1), method='GET') summary_match = re.search(r"Summary:\s*(.*?)(?=\nKey Points:)", summary_content, re.DOTALL) summary = summary_match.group(1).strip() if summary_match else "Summary not found." key_points_match = re.search(r"Key Points:\s*(.*?)(?=\nAction Items:)", summary_content, re.DOTALL) key_points = re.findall(r"- (.*?)\n", key_points_match.group(1)) if key_points_match else ["Key points not found."] action_items_match = re.search(r"Action Items:\s*(.*)", summary_content, re.DOTALL) if action_items_match: action_items = re.findall(r"- (.*?)(?=\n- |$)", action_items_match.group(1), re.DOTALL) else: action_items = ["Action items not found."] transcription_blob = bucket.blob(transcription_blob_name) with transcription_blob.open("r") as file: meeting_data = json.load(file) speaker_data = {} total_talktime = 0 for entry in meeting_data: speaker = entry["speaker"] duration = entry["end"] - entry["start"] text = entry["text"] total_talktime += duration if speaker not in speaker_data: speaker_data[speaker] = {"talktime": 0, "text": "", "words": 0} speaker_data[speaker]["talktime"] += duration speaker_data[speaker]["text"] += " " + text speaker_data[speaker]["words"] += len(text.split()) for speaker, data in speaker_data.items(): data["word_per_minute"] = round((data["words"] / data["talktime"] * 60), 2) data["talktime_percentage"] = round((data["talktime"] / total_talktime * 100), 2) combined_text = " ".join(data["text"] for data in speaker_data.values()) analyzer = SentimentIntensityAnalyzer() sentences = combined_text.split('.') sentiment_polarity = [analyzer.polarity_scores(sentence)["compound"] for sentence in sentences if sentence.strip()] smoothed_polarity = gaussian_filter1d(sentiment_polarity, sigma=2) st.title("Kap Notes - Unveiling the story behind your meeting") st.markdown(f"### Summary\n
{summary}
", unsafe_allow_html=True) st.markdown("### Meeting Highlights") st.markdown( f"
" + "
".join(f"• {point}" for point in key_points) + "
", unsafe_allow_html=True ) st.markdown("### Actionable Items") st.markdown( f"
" + "
".join(f"• {item}" for item in action_items) + "
", unsafe_allow_html=True ) st.markdown("### Comments") if 'comments' not in st.session_state: st.session_state.comments = [] def add_comment(comment): st.session_state.comments.append({"name": "", "comment": comment, "date": datetime.now().strftime("%d, %b %Y")}) if 'name' not in st.session_state: st.session_state.name = "" if 'comment' not in st.session_state: st.session_state.comment = "" for comment in st.session_state.comments: st.markdown( f"""
Admin {comment['date']}
{comment['comment']}
""", unsafe_allow_html=True) with st.form(key="comment_form"): st.markdown('
', unsafe_allow_html=True) comment_input = st.text_area("Your Comment", height=100, value=st.session_state.comment) submit_button = st.form_submit_button("Submit") st.markdown('
', unsafe_allow_html=True) if submit_button: if not comment_input: st.error("Enter your comment") else: add_comment(comment_input) st.session_state.comment = "" st.rerun() with st.sidebar: speaker_names = list(speaker_data.keys()) talk_time_percentages = [data["talktime_percentage"] for data in speaker_data.values()] color_palette = ["#A3BFF1", "#F4A7B9", "#C4F1D2", "#D6A7F2", "#FFD5A6", "#9BE1E6", "#F4A3C0", "#C1E7B4", "#F1D0FF", "#F9E9A6"] speaker_colors = {speaker: color_palette[i % len(color_palette)] for i, speaker in enumerate(speaker_names)} st.markdown(f"""

Listen to the Meeting Audio

""", unsafe_allow_html=True) st.title("Chat Conversation") with st.expander("Click to view the chat conversation", expanded=False): chat_conversation = "" for index, entry in enumerate(meeting_data): speaker = entry["speaker"] text = entry["text"] talk_time = entry["end"] - entry["start"] speaker_color = speaker_colors[speaker] chat_conversation += f"""
{speaker} {talk_time:.2f} mins
{text}
""" st.markdown(chat_conversation, unsafe_allow_html=True) fig = go.Figure(data=[go.Pie(labels=speaker_names, values=talk_time_percentages, marker=dict(colors=list(speaker_colors.values())), hole=0.3)]) fig.update_layout( title="Speaker Analytics", showlegend=True, legend=dict( orientation="h", yanchor="top", y=-0.2, xanchor="center", x=0.5 ) ) st.plotly_chart(fig) st.markdown("### Sentiment Analysis of the Meeting") fig = go.Figure() fig.add_trace(go.Scatter(x=list(range(len(smoothed_polarity))), y=smoothed_polarity, mode='lines', name='Sentiment', line=dict(color='blue'))) fig.update_layout( xaxis=dict(title="Time (in seconds)"), yaxis=dict(title="Sentiment Score", range=[-1, 1]), ) st.plotly_chart(fig)