Spaces:
Running
Running
Update getReceiverData.py
Browse files- getReceiverData.py +138 -114
getReceiverData.py
CHANGED
@@ -1,115 +1,139 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import numpy as np
|
3 |
-
|
4 |
-
df = pd.read_csv('receiverByStateTown.csv')
|
5 |
-
|
6 |
-
agg = df.groupby(['state','town']).agg({
|
7 |
-
'signal_strength': 'mean',
|
8 |
-
'dropped_call': 'mean'
|
9 |
-
}).reset_index()
|
10 |
-
|
11 |
-
# Define classification logic
|
12 |
-
def classify_receiver(row):
|
13 |
-
avg_signal = row['signal_strength']
|
14 |
-
drop_rate = row['dropped_call']
|
15 |
-
|
16 |
-
if avg_signal >= 2.0 and drop_rate <= 0.25:
|
17 |
-
return 'Green'
|
18 |
-
elif avg_signal >= 1.3 and drop_rate <= 0.19:
|
19 |
-
return 'Yellow'
|
20 |
-
else:
|
21 |
-
return 'Red'
|
22 |
-
|
23 |
-
agg['receiver_status'] = agg.apply(classify_receiver, axis=1)
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
)
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
)
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
)
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
return summary
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
df = pd.read_csv('receiverByStateTown.csv')
|
5 |
+
|
6 |
+
agg = df.groupby(['state','town']).agg({
|
7 |
+
'signal_strength': 'mean',
|
8 |
+
'dropped_call': 'mean'
|
9 |
+
}).reset_index()
|
10 |
+
|
11 |
+
# Define classification logic
|
12 |
+
def classify_receiver(row):
|
13 |
+
avg_signal = row['signal_strength']
|
14 |
+
drop_rate = row['dropped_call']
|
15 |
+
|
16 |
+
if avg_signal >= 2.0 and drop_rate <= 0.25:
|
17 |
+
return 'Green'
|
18 |
+
elif avg_signal >= 1.3 and drop_rate <= 0.19:
|
19 |
+
return 'Yellow'
|
20 |
+
else:
|
21 |
+
return 'Red'
|
22 |
+
|
23 |
+
agg['receiver_status'] = agg.apply(classify_receiver, axis=1)
|
24 |
+
|
25 |
+
def classify_receiver_mean(row):
|
26 |
+
avg_signal = row['avg_signal_strength']
|
27 |
+
drop_rate = row['call_drop_rate']
|
28 |
+
|
29 |
+
if avg_signal >= 2.0 and drop_rate <= 0.25:
|
30 |
+
return 'Green'
|
31 |
+
elif avg_signal >= 1.3 and drop_rate <= 0.19:
|
32 |
+
return 'Yellow'
|
33 |
+
else:
|
34 |
+
return 'Red'
|
35 |
+
|
36 |
+
new_data = agg.merge(df, left_on= ["state","town"], right_on= ["state", "town"], how="inner")
|
37 |
+
|
38 |
+
final_df = new_data[['state','town','device_id','receiver_latitude','receiver_longitude','receiver_status']]
|
39 |
+
|
40 |
+
def analyze_telecom_data():
|
41 |
+
results = {}
|
42 |
+
|
43 |
+
# 1. State with the most connected devices, group by state and then find sort by max device ids count
|
44 |
+
device_count_by_state = df.groupby('state')['device_id'].nunique().sort_values(ascending=False)
|
45 |
+
|
46 |
+
#get the state with maximum number of connected devices
|
47 |
+
results['max_connected_state'] = device_count_by_state.idxmax()
|
48 |
+
|
49 |
+
#get the number of devices connected for that state
|
50 |
+
results['max_connected_state_count'] = device_count_by_state.max()
|
51 |
+
|
52 |
+
# 2. State with best average signal strength
|
53 |
+
signal_strength_by_state = df.groupby('state')['signal_strength'].mean().sort_values(ascending=False)
|
54 |
+
|
55 |
+
# get the state with the best signal strength
|
56 |
+
results['best_signal_state'] = signal_strength_by_state.idxmax()
|
57 |
+
|
58 |
+
#get the best averge signal strength for that state
|
59 |
+
results['best_signal_avg'] = round(signal_strength_by_state.max(), 2)
|
60 |
+
|
61 |
+
# 3. State with worst average call drop rate
|
62 |
+
call_drop_by_state = df.groupby('state')['dropped_call'].mean().sort_values(ascending=False)
|
63 |
+
|
64 |
+
# get the state with max call drop call
|
65 |
+
results['worst_call_drop_state'] = call_drop_by_state.idxmax()
|
66 |
+
|
67 |
+
# get the worst drop rate for that state
|
68 |
+
results['worst_call_drop_rate'] = round(call_drop_by_state.max(), 2)
|
69 |
+
|
70 |
+
# 4. Town-level summary (per state and town)
|
71 |
+
town_summary = (
|
72 |
+
df.groupby(['state', 'town'])
|
73 |
+
.agg(
|
74 |
+
device_count=('device_id', 'nunique'), # find device count aggregated
|
75 |
+
avg_signal_strength=('signal_strength', 'mean'), # find average signal strength
|
76 |
+
call_drop_rate=('dropped_call', 'mean'), # find average drop calls
|
77 |
+
avg_call_duration=('call_duration', 'mean') # find average call duration
|
78 |
+
)
|
79 |
+
.reset_index()
|
80 |
+
.sort_values(by='device_count', ascending=False)
|
81 |
+
)
|
82 |
+
town_summary['receiver_status'] = town_summary.apply(classify_receiver_mean, axis=1)
|
83 |
+
|
84 |
+
location_df = (
|
85 |
+
df.groupby(['state', 'town'])
|
86 |
+
.agg(
|
87 |
+
receiver_latitude=('receiver_latitude', 'first'),
|
88 |
+
receiver_longitude=('receiver_longitude', 'first')
|
89 |
+
)
|
90 |
+
.reset_index()
|
91 |
+
)
|
92 |
+
town_summary = town_summary.merge(location_df, on=['state', 'town'], how='left')
|
93 |
+
|
94 |
+
town_summary = town_summary.round(2)
|
95 |
+
|
96 |
+
# 5. Top towns by different metrics
|
97 |
+
|
98 |
+
#get town by most connected devices
|
99 |
+
results['top_town_by_device'] = town_summary.sort_values('device_count', ascending=False).head(1).to_dict('records')[0]
|
100 |
+
|
101 |
+
#get town by best average signal
|
102 |
+
results['top_town_by_signal'] = town_summary.sort_values('avg_signal_strength', ascending=False).head(1).to_dict('records')[0]
|
103 |
+
|
104 |
+
#get town with most call drop rate
|
105 |
+
results['worst_town_by_call_drop'] = town_summary.sort_values('call_drop_rate', ascending=False).head(1).to_dict('records')[0]
|
106 |
+
|
107 |
+
# 6. Full breakdowns
|
108 |
+
results['state_device_count'] = device_count_by_state.to_dict()
|
109 |
+
results['state_signal_strength'] = signal_strength_by_state.round(2).to_dict()
|
110 |
+
results['state_call_drop_rate'] = call_drop_by_state.round(2).to_dict()
|
111 |
+
results['town_summary'] = town_summary.round(2).to_dict('records')
|
112 |
+
|
113 |
+
return results
|
114 |
+
|
115 |
+
|
116 |
+
def get_summary_by_state(state_name):
|
117 |
+
filtered = final_df[final_df['state'] == state_name]
|
118 |
+
|
119 |
+
coord_counts = (
|
120 |
+
filtered.groupby(['town', 'receiver_latitude', 'receiver_longitude'])
|
121 |
+
.size()
|
122 |
+
.reset_index(name='count')
|
123 |
+
)
|
124 |
+
dominant_coords = (
|
125 |
+
coord_counts.sort_values(['town', 'count'], ascending=[True, False])
|
126 |
+
.groupby('town')
|
127 |
+
.head(1)
|
128 |
+
.drop(columns='count')
|
129 |
+
)
|
130 |
+
summary = (
|
131 |
+
filtered.merge(dominant_coords, on=['town', 'receiver_latitude', 'receiver_longitude'])
|
132 |
+
.groupby(['town', 'receiver_latitude', 'receiver_longitude'])
|
133 |
+
.agg(
|
134 |
+
device_count=('device_id', 'nunique'),
|
135 |
+
receiver_status=('receiver_status', lambda x: x.mode().iloc[0])
|
136 |
+
)
|
137 |
+
.reset_index())
|
138 |
+
|
139 |
return summary
|