Spaces:

Akshayram1
/

data_visualization

Running

App Files Files Community

Akshayram1 commited on Jan 24

Commit

9f9b802

verified ·

1 Parent(s): a07d7fa

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -34

app.py CHANGED Viewed

@@ -3,39 +3,51 @@ import pandas as pd
 import matplotlib.pyplot as plt
 def process_data(df):
-    # Clean and transform data
     df = df[df['Project Category'].notna()]
-    # Create Week buckets
-    df['Start Date'] = pd.to_datetime(df['Date'].str.split(' to ').str[0], format='%d/%b/%y')
-    df['Week'] = df['Start Date'].apply(lambda x: 1 if x <= pd.Timestamp('2025-01-05') else 2)
-    # Aggregate utilization data
-    utilization = df.groupby(['Week', 'Project Category'])['Logged'].sum().unstack(fill_value=0)
-    # Calculate percentages
     total_hours = utilization.sum(axis=1)
     utilization_percent = utilization.div(total_hours, axis=0) * 100
-    # Select relevant categories
-    utilization_percent = utilization_percent[['Fixed Bid Projects - Billable',
-                                              'Non-Billable',
-                                              'Leaves']].rename(columns={
-        'Fixed Bid Projects - Billable': 'Billable',
-        'Non-Billable': 'Non-Billable',
-        'Leaves': 'Leaves'
-    })
     return utilization_percent
 def create_utilization_chart(week_data, week_number):
-    fig, ax = plt.subplots()
     wedges, texts, autotexts = ax.pie(
-        week_data.values,
-        labels=week_data.index,
         autopct='%1.1f%%',
-        colors=['#4CAF50', '#FFC107', '#9E9E9E']
     )
     plt.setp(autotexts, size=10, weight="bold", color='white')
     ax.set_title(f'Week {week_number} Utilization', pad=20)
     return fig
@@ -46,20 +58,34 @@ def main():
     uploaded_file = st.file_uploader("Upload Tempo Timesheet", type=['xls', 'xlsx'])
     if uploaded_file:
-        df = pd.read_excel(uploaded_file, sheet_name='Report')
-        utilization_percent = process_data(df)
-        # Page 4 Visualization
-        st.header("Bi-Weekly Utilization Report")
-        col1, col2 = st.columns(2)
-        with col1:
-            week1 = utilization_percent.loc[1]
-            st.pyplot(create_utilization_chart(week1, 1))
-        with col2:
-            week2 = utilization_percent.loc[2]
-            st.pyplot(create_utilization_chart(week2, 2))
 if __name__ == "__main__":
     main()

 import matplotlib.pyplot as plt
 def process_data(df):
+    # Clean data and handle date parsing
     df = df[df['Project Category'].notna()]
+    # Convert date strings to datetime
+    df['Start Date'] = pd.to_datetime(
+        df['Date'].str.split(' to ').str[0],
+        format='%d/%b/%y',
+        errors='coerce'
+    )
+    # Filter valid dates and assign weeks
+    df = df.dropna(subset=['Start Date'])
+    df['Week'] = df['Start Date'].apply(
+        lambda x: 1 if x <= pd.Timestamp('2025-01-05') else 2
+    )
+    # Consolidate billable categories
+    df['Category'] = df['Project Category'].apply(
+        lambda x: 'Billable' if 'Billable' in x else x
+    )
+    # Aggregate data
+    utilization = df.groupby(['Week', 'Category'])['Logged'].sum().unstack(fill_value=0)
+    # Select relevant categories and calculate percentages
+    categories = ['Billable', 'Non-Billable', 'Leaves']
+    utilization = utilization.reindex(categories, axis=1, fill_value=0)
     total_hours = utilization.sum(axis=1)
     utilization_percent = utilization.div(total_hours, axis=0) * 100
     return utilization_percent
 def create_utilization_chart(week_data, week_number):
+    fig, ax = plt.subplots(figsize=(6, 6))
+    labels = week_data.index[week_data > 0]
+    sizes = week_data[week_data > 0]
     wedges, texts, autotexts = ax.pie(
+        sizes,
+        labels=labels,
         autopct='%1.1f%%',
+        colors=['#4CAF50', '#FFC107', '#9E9E9E'],
+        startangle=90
     )
     plt.setp(autotexts, size=10, weight="bold", color='white')
     ax.set_title(f'Week {week_number} Utilization', pad=20)
     return fig
     uploaded_file = st.file_uploader("Upload Tempo Timesheet", type=['xls', 'xlsx'])
     if uploaded_file:
+        try:
+            df = pd.read_excel(uploaded_file, sheet_name='Report')
+            utilization_percent = process_data(df)
+            # Page 4 Visualization
+            st.header("Bi-Weekly Utilization Report")
+            col1, col2 = st.columns(2)
+            with col1:
+                if 1 in utilization_percent.index:
+                    week1 = utilization_percent.loc[1]
+                    st.pyplot(create_utilization_chart(week1, 1))
+                else:
+                    st.warning("No data for Week 1")
+            with col2:
+                if 2 in utilization_percent.index:
+                    week2 = utilization_percent.loc[2]
+                    st.pyplot(create_utilization_chart(week2, 2))
+                else:
+                    st.warning("No data for Week 2")
+            # Show raw data for verification
+            st.subheader("Processed Data Preview")
+            st.dataframe(utilization_percent)
+        except Exception as e:
+            st.error(f"Error processing file: {str(e)}")
 if __name__ == "__main__":
     main()