TelcomReceiverStatus / getReceiverData.py
SoumyaJ's picture
Update getReceiverData.py
a0a4a7f verified
import pandas as pd
import numpy as np
df = pd.read_csv('receiverByStateTown.csv')
agg = df.groupby(['state','town']).agg({
'signal_strength': 'mean',
'dropped_call': 'mean'
}).reset_index()
# Define classification logic
def classify_receiver(row):
avg_signal = row['signal_strength']
drop_rate = row['dropped_call']
if avg_signal >= 2.0 and drop_rate <= 0.25:
return 'Green'
elif avg_signal >= 1.3 and drop_rate <= 0.19:
return 'Yellow'
else:
return 'Red'
agg['receiver_status'] = agg.apply(classify_receiver, axis=1)
def classify_receiver_mean(row):
avg_signal = row['avg_signal_strength']
drop_rate = row['call_drop_rate']
if avg_signal >= 2.0 and drop_rate <= 0.25:
return 'Green'
elif avg_signal >= 1.3 and drop_rate <= 0.19:
return 'Yellow'
else:
return 'Red'
new_data = agg.merge(df, left_on= ["state","town"], right_on= ["state", "town"], how="inner")
final_df = new_data[['state','town','device_id','receiver_latitude','receiver_longitude','receiver_status']]
def analyze_telecom_data():
results = {}
# 1. State with the most connected devices, group by state and then find sort by max device ids count
device_count_by_state = df.groupby('state')['device_id'].nunique().sort_values(ascending=False)
#get the state with maximum number of connected devices
results['max_connected_state'] = device_count_by_state.idxmax()
#get the number of devices connected for that state
results['max_connected_state_count'] = device_count_by_state.max()
# 2. State with best average signal strength
signal_strength_by_state = df.groupby('state')['signal_strength'].mean().sort_values(ascending=False)
# get the state with the best signal strength
results['best_signal_state'] = signal_strength_by_state.idxmax()
#get the best averge signal strength for that state
results['best_signal_avg'] = round(signal_strength_by_state.max(), 2)
# 3. State with worst average call drop rate
call_drop_by_state = df.groupby('state')['dropped_call'].mean().sort_values(ascending=False)
# get the state with max call drop call
results['worst_call_drop_state'] = call_drop_by_state.idxmax()
# get the worst drop rate for that state
results['worst_call_drop_rate'] = round(call_drop_by_state.max(), 2)
# 4. Town-level summary (per state and town)
town_summary = (
df.groupby(['state', 'town'])
.agg(
device_count=('device_id', 'nunique'), # find device count aggregated
avg_signal_strength=('signal_strength', 'mean'), # find average signal strength
call_drop_rate=('dropped_call', 'mean'), # find average drop calls
avg_call_duration=('call_duration', 'mean') # find average call duration
)
.reset_index()
.sort_values(by='device_count', ascending=False)
)
town_summary['receiver_status'] = town_summary.apply(classify_receiver_mean, axis=1)
location_df = (
df.groupby(['state', 'town'])
.agg(
receiver_latitude=('receiver_latitude', 'first'),
receiver_longitude=('receiver_longitude', 'first')
)
.reset_index()
)
town_summary = town_summary.merge(location_df, on=['state', 'town'], how='left')
town_summary = town_summary.round(2)
# 5. Top towns by different metrics
#get town by most connected devices
results['top_town_by_device'] = town_summary.sort_values('device_count', ascending=False).head(1).to_dict('records')[0]
#get town by best average signal
results['top_town_by_signal'] = town_summary.sort_values('avg_signal_strength', ascending=False).head(1).to_dict('records')[0]
#get town with most call drop rate
results['worst_town_by_call_drop'] = town_summary.sort_values('call_drop_rate', ascending=False).head(1).to_dict('records')[0]
# 6. Full breakdowns
results['state_device_count'] = device_count_by_state.to_dict()
results['state_signal_strength'] = signal_strength_by_state.round(2).to_dict()
results['state_call_drop_rate'] = call_drop_by_state.round(2).to_dict()
results['town_summary'] = town_summary.round(2).to_dict('records')
return results
def get_summary_by_state(state_name):
filtered = final_df[final_df['state'] == state_name]
coord_counts = (
filtered.groupby(['town', 'receiver_latitude', 'receiver_longitude'])
.size()
.reset_index(name='count')
)
dominant_coords = (
coord_counts.sort_values(['town', 'count'], ascending=[True, False])
.groupby('town')
.head(1)
.drop(columns='count')
)
summary = (
filtered.merge(dominant_coords, on=['town', 'receiver_latitude', 'receiver_longitude'])
.groupby(['town', 'receiver_latitude', 'receiver_longitude'])
.agg(
device_count=('device_id', 'nunique'),
receiver_status=('receiver_status', lambda x: x.mode().iloc[0])
)
.reset_index())
return summary