Spaces:
Running
Running
File size: 5,107 Bytes
a0a4a7f f9ab266 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import pandas as pd
import numpy as np
df = pd.read_csv('receiverByStateTown.csv')
agg = df.groupby(['state','town']).agg({
'signal_strength': 'mean',
'dropped_call': 'mean'
}).reset_index()
# Define classification logic
def classify_receiver(row):
avg_signal = row['signal_strength']
drop_rate = row['dropped_call']
if avg_signal >= 2.0 and drop_rate <= 0.25:
return 'Green'
elif avg_signal >= 1.3 and drop_rate <= 0.19:
return 'Yellow'
else:
return 'Red'
agg['receiver_status'] = agg.apply(classify_receiver, axis=1)
def classify_receiver_mean(row):
avg_signal = row['avg_signal_strength']
drop_rate = row['call_drop_rate']
if avg_signal >= 2.0 and drop_rate <= 0.25:
return 'Green'
elif avg_signal >= 1.3 and drop_rate <= 0.19:
return 'Yellow'
else:
return 'Red'
new_data = agg.merge(df, left_on= ["state","town"], right_on= ["state", "town"], how="inner")
final_df = new_data[['state','town','device_id','receiver_latitude','receiver_longitude','receiver_status']]
def analyze_telecom_data():
results = {}
# 1. State with the most connected devices, group by state and then find sort by max device ids count
device_count_by_state = df.groupby('state')['device_id'].nunique().sort_values(ascending=False)
#get the state with maximum number of connected devices
results['max_connected_state'] = device_count_by_state.idxmax()
#get the number of devices connected for that state
results['max_connected_state_count'] = device_count_by_state.max()
# 2. State with best average signal strength
signal_strength_by_state = df.groupby('state')['signal_strength'].mean().sort_values(ascending=False)
# get the state with the best signal strength
results['best_signal_state'] = signal_strength_by_state.idxmax()
#get the best averge signal strength for that state
results['best_signal_avg'] = round(signal_strength_by_state.max(), 2)
# 3. State with worst average call drop rate
call_drop_by_state = df.groupby('state')['dropped_call'].mean().sort_values(ascending=False)
# get the state with max call drop call
results['worst_call_drop_state'] = call_drop_by_state.idxmax()
# get the worst drop rate for that state
results['worst_call_drop_rate'] = round(call_drop_by_state.max(), 2)
# 4. Town-level summary (per state and town)
town_summary = (
df.groupby(['state', 'town'])
.agg(
device_count=('device_id', 'nunique'), # find device count aggregated
avg_signal_strength=('signal_strength', 'mean'), # find average signal strength
call_drop_rate=('dropped_call', 'mean'), # find average drop calls
avg_call_duration=('call_duration', 'mean') # find average call duration
)
.reset_index()
.sort_values(by='device_count', ascending=False)
)
town_summary['receiver_status'] = town_summary.apply(classify_receiver_mean, axis=1)
location_df = (
df.groupby(['state', 'town'])
.agg(
receiver_latitude=('receiver_latitude', 'first'),
receiver_longitude=('receiver_longitude', 'first')
)
.reset_index()
)
town_summary = town_summary.merge(location_df, on=['state', 'town'], how='left')
town_summary = town_summary.round(2)
# 5. Top towns by different metrics
#get town by most connected devices
results['top_town_by_device'] = town_summary.sort_values('device_count', ascending=False).head(1).to_dict('records')[0]
#get town by best average signal
results['top_town_by_signal'] = town_summary.sort_values('avg_signal_strength', ascending=False).head(1).to_dict('records')[0]
#get town with most call drop rate
results['worst_town_by_call_drop'] = town_summary.sort_values('call_drop_rate', ascending=False).head(1).to_dict('records')[0]
# 6. Full breakdowns
results['state_device_count'] = device_count_by_state.to_dict()
results['state_signal_strength'] = signal_strength_by_state.round(2).to_dict()
results['state_call_drop_rate'] = call_drop_by_state.round(2).to_dict()
results['town_summary'] = town_summary.round(2).to_dict('records')
return results
def get_summary_by_state(state_name):
filtered = final_df[final_df['state'] == state_name]
coord_counts = (
filtered.groupby(['town', 'receiver_latitude', 'receiver_longitude'])
.size()
.reset_index(name='count')
)
dominant_coords = (
coord_counts.sort_values(['town', 'count'], ascending=[True, False])
.groupby('town')
.head(1)
.drop(columns='count')
)
summary = (
filtered.merge(dominant_coords, on=['town', 'receiver_latitude', 'receiver_longitude'])
.groupby(['town', 'receiver_latitude', 'receiver_longitude'])
.agg(
device_count=('device_id', 'nunique'),
receiver_status=('receiver_status', lambda x: x.mode().iloc[0])
)
.reset_index())
return summary |