import os
import json
import re
import html
import streamlit as st
import plotly.graph_objects as go
from google.cloud import storage
from google.oauth2 import service_account
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from scipy.ndimage import gaussian_filter1d
from datetime import timedelta, datetime
gcp_credentials = os.getenv('GCP_CREDENTIALS')
credentials_dict = json.loads(gcp_credentials)
creds = service_account.Credentials.from_service_account_info(credentials_dict)
client = storage.Client(credentials=creds)
bucket_name = "kapnotes"
bucket = client.bucket(bucket_name)
st.set_page_config(page_title="Kap Notes", layout="wide")
def get_client_names():
blobs = list(bucket.list_blobs(prefix=""))
client_names = set()
for blob in blobs:
client_name = blob.name.split("/")[0]
client_names.add(client_name)
return sorted(client_names)
def validate_data(client_name, date, meeting):
summary_blob_name = f"{client_name}/{date}/{meeting}/summary.txt"
transcription_blob_name = f"{client_name}/{date}/{meeting}/transcription.txt"
audio_blob_name = f"{client_name}/{date}/{meeting}/audio.wav"
summary_blob = bucket.blob(summary_blob_name)
transcription_blob = bucket.blob(transcription_blob_name)
audio_blob = bucket.blob(audio_blob_name)
return summary_blob.exists() and transcription_blob.exists() and audio_blob.exists()
def get_meetings_for_date(client_name, date):
prefix = f"{client_name}/{date}/"
blobs = list(bucket.list_blobs(prefix=prefix))
meetings = set()
for blob in blobs:
parts = blob.name.split("/")
if len(parts) > 2 and parts[2]:
meetings.add(parts[2])
return sorted(meetings)
def get_dates_for_client(client_name):
prefix = f"{client_name}/"
blobs = list(bucket.list_blobs(prefix=prefix))
dates = set()
for blob in blobs:
parts = blob.name.split("/")
if len(parts) > 1 and parts[1]:
dates.add(parts[1])
return sorted(dates)
def login():
st.markdown("""
""", unsafe_allow_html=True)
if 'password' not in st.session_state:
st.session_state.password = ""
st.markdown("
KAP NOTES ", unsafe_allow_html=True)
client_names = get_client_names()
client_name = st.selectbox("Select Client", client_names)
if client_name:
available_dates = get_dates_for_client(client_name)
selected_date = st.selectbox(f"Available Dates for {client_name}", available_dates)
if selected_date:
available_meetings = get_meetings_for_date(client_name, selected_date)
selected_meeting = st.selectbox(f"Available Meetings for {selected_date}", available_meetings)
password = st.text_input("Enter Password", type="password", value=st.session_state.password)
sign_in_button = st.button("Sign In", key="sign_in")
if sign_in_button:
if password == "kapnotes12345":
if validate_data(client_name, selected_date, selected_meeting):
st.session_state.client_name = client_name
st.session_state.date = selected_date
st.session_state.meeting = selected_meeting
st.session_state.logged_in = True
st.session_state.password = password
st.rerun()
else:
st.error(f"No records available for {client_name} on {selected_date}. Please select another option.")
elif not password:
st.error("Please enter password.")
else:
st.error("Incorrect Password. Please try again.")
st.session_state.password = password
if 'logged_in' not in st.session_state:
st.session_state.logged_in = False
if not st.session_state.logged_in:
login()
else:
client_name = st.session_state.client_name
date = st.session_state.date
meeting = st.session_state.meeting
password= st.session_state.password
if st.sidebar.button("Back"):
st.session_state.logged_in = False
st.rerun()
st.sidebar.markdown(f'''
''', unsafe_allow_html=True)
css = '''
'''
st.markdown(css, unsafe_allow_html=True)
summary_blob_name = f"{client_name}/{date}/{meeting}/summary.txt"
transcription_blob_name = f"{client_name}/{date}/{meeting}/transcription.txt"
audio_blob_name = f"{client_name}/{date}/{meeting}/audio.wav"
bucket = client.bucket(bucket_name)
summary_blob = bucket.blob(summary_blob_name)
summary_content = summary_blob.download_as_text()
audio_blob = bucket.blob(audio_blob_name)
audio_url = audio_blob.generate_signed_url(expiration=timedelta(hours=1), method='GET')
summary_match = re.search(r"Summary:\s*(.*?)(?=\nKey Points:)", summary_content, re.DOTALL)
summary = summary_match.group(1).strip() if summary_match else "Summary not found."
key_points_match = re.search(r"Key Points:\s*(.*?)(?=\nAction Items:)", summary_content, re.DOTALL)
key_points = re.findall(r"- (.*?)\n", key_points_match.group(1)) if key_points_match else ["Key points not found."]
action_items_match = re.search(r"Action Items:\s*(.*)", summary_content, re.DOTALL)
if action_items_match:
action_items = re.findall(r"- (.*?)(?=\n- |$)", action_items_match.group(1), re.DOTALL)
else:
action_items = ["Action items not found."]
transcription_blob = bucket.blob(transcription_blob_name)
with transcription_blob.open("r") as file:
meeting_data = json.load(file)
speaker_data = {}
total_talktime = 0
for entry in meeting_data:
speaker = entry["speaker"]
duration = entry["end"] - entry["start"]
text = entry["text"]
total_talktime += duration
if speaker not in speaker_data:
speaker_data[speaker] = {"talktime": 0, "text": "", "words": 0}
speaker_data[speaker]["talktime"] += duration
speaker_data[speaker]["text"] += " " + text
speaker_data[speaker]["words"] += len(text.split())
for speaker, data in speaker_data.items():
data["word_per_minute"] = round((data["words"] / data["talktime"] * 60), 2)
data["talktime_percentage"] = round((data["talktime"] / total_talktime * 100), 2)
combined_text = " ".join(data["text"] for data in speaker_data.values())
analyzer = SentimentIntensityAnalyzer()
sentences = combined_text.split('.')
sentiment_polarity = [analyzer.polarity_scores(sentence)["compound"] for sentence in sentences if sentence.strip()]
smoothed_polarity = gaussian_filter1d(sentiment_polarity, sigma=2)
st.title("Kap Notes - Unveiling the story behind your meeting")
st.markdown(f"### Summary\n{summary}
", unsafe_allow_html=True)
st.markdown("### Meeting Highlights")
st.markdown(
f"" + " ".join(f"• {point}" for point in key_points) + "
",
unsafe_allow_html=True
)
st.markdown("### Actionable Items")
st.markdown(
f"" + " ".join(f"• {item}" for item in action_items) + "
",
unsafe_allow_html=True
)
st.markdown("### Comments")
if 'comments' not in st.session_state:
st.session_state.comments = []
def add_comment(comment):
st.session_state.comments.append({"name": "", "comment": comment, "date": datetime.now().strftime("%d, %b %Y")})
if 'name' not in st.session_state:
st.session_state.name = ""
if 'comment' not in st.session_state:
st.session_state.comment = ""
for comment in st.session_state.comments:
st.markdown(
f"""
""", unsafe_allow_html=True)
with st.form(key="comment_form"):
st.markdown('', unsafe_allow_html=True)
comment_input = st.text_area("Your Comment", height=100, value=st.session_state.comment)
submit_button = st.form_submit_button("Submit")
st.markdown('
', unsafe_allow_html=True)
if submit_button:
if not comment_input:
st.error("Enter your comment")
else:
add_comment(comment_input)
st.session_state.comment = ""
st.rerun()
with st.sidebar:
speaker_names = list(speaker_data.keys())
talk_time_percentages = [data["talktime_percentage"] for data in speaker_data.values()]
color_palette = ["#A3BFF1", "#F4A7B9", "#C4F1D2", "#D6A7F2", "#FFD5A6", "#9BE1E6", "#F4A3C0", "#C1E7B4", "#F1D0FF", "#F9E9A6"]
speaker_colors = {speaker: color_palette[i % len(color_palette)] for i, speaker in enumerate(speaker_names)}
st.markdown(f"""
Listen to the Meeting Audio
Your browser does not support the audio element.
""", unsafe_allow_html=True)
st.title("Chat Conversation")
with st.expander("Click to view the chat conversation", expanded=False):
chat_conversation = ""
for index, entry in enumerate(meeting_data):
speaker = entry["speaker"]
text = entry["text"]
talk_time = entry["end"] - entry["start"]
speaker_color = speaker_colors[speaker]
chat_conversation += f"""
{speaker}
{talk_time:.2f} mins
{text}
"""
st.markdown(chat_conversation, unsafe_allow_html=True)
fig = go.Figure(data=[go.Pie(labels=speaker_names, values=talk_time_percentages, marker=dict(colors=list(speaker_colors.values())), hole=0.3)])
fig.update_layout(
title="Speaker Analytics",
showlegend=True,
legend=dict(
orientation="h",
yanchor="top",
y=-0.2,
xanchor="center",
x=0.5
)
)
st.plotly_chart(fig)
st.markdown("### Sentiment Analysis of the Meeting")
fig = go.Figure()
fig.add_trace(go.Scatter(x=list(range(len(smoothed_polarity))), y=smoothed_polarity, mode='lines', name='Sentiment', line=dict(color='blue')))
fig.update_layout(
xaxis=dict(title="Time (in seconds)"),
yaxis=dict(title="Sentiment Score", range=[-1, 1]),
)
st.plotly_chart(fig)