SoumyaJ commited on
Commit
a0a4a7f
·
verified ·
1 Parent(s): 55095e0

Update getReceiverData.py

Browse files
Files changed (1) hide show
  1. getReceiverData.py +138 -114
getReceiverData.py CHANGED
@@ -1,115 +1,139 @@
1
- import pandas as pd
2
- import numpy as np
3
-
4
- df = pd.read_csv('receiverByStateTown.csv')
5
-
6
- agg = df.groupby(['state','town']).agg({
7
- 'signal_strength': 'mean',
8
- 'dropped_call': 'mean'
9
- }).reset_index()
10
-
11
- # Define classification logic
12
- def classify_receiver(row):
13
- avg_signal = row['signal_strength']
14
- drop_rate = row['dropped_call']
15
-
16
- if avg_signal >= 2.0 and drop_rate <= 0.25:
17
- return 'Green'
18
- elif avg_signal >= 1.3 and drop_rate <= 0.19:
19
- return 'Yellow'
20
- else:
21
- return 'Red'
22
-
23
- agg['receiver_status'] = agg.apply(classify_receiver, axis=1)
24
-
25
- new_data = agg.merge(df, left_on= ["state","town"], right_on= ["state", "town"], how="inner")
26
-
27
- final_df = new_data[['state','town','device_id','receiver_latitude','receiver_longitude','receiver_status']]
28
-
29
- def analyze_telecom_data():
30
- results = {}
31
-
32
- # 1. State with the most connected devices, group by state and then find sort by max device ids count
33
- device_count_by_state = df.groupby('state')['device_id'].nunique().sort_values(ascending=False)
34
-
35
- #get the state with maximum number of connected devices
36
- results['max_connected_state'] = device_count_by_state.idxmax()
37
-
38
- #get the number of devices connected for that state
39
- results['max_connected_state_count'] = device_count_by_state.max()
40
-
41
- # 2. State with best average signal strength
42
- signal_strength_by_state = df.groupby('state')['signal_strength'].mean().sort_values(ascending=False)
43
-
44
- # get the state with the best signal strength
45
- results['best_signal_state'] = signal_strength_by_state.idxmax()
46
-
47
- #get the best averge signal strength for that state
48
- results['best_signal_avg'] = round(signal_strength_by_state.max(), 2)
49
-
50
- # 3. State with worst average call drop rate
51
- call_drop_by_state = df.groupby('state')['dropped_call'].mean().sort_values(ascending=False)
52
-
53
- # get the state with max call drop call
54
- results['worst_call_drop_state'] = call_drop_by_state.idxmax()
55
-
56
- # get the worst drop rate for that state
57
- results['worst_call_drop_rate'] = round(call_drop_by_state.max(), 2)
58
-
59
- # 4. Town-level summary (per state and town)
60
- town_summary = (
61
- df.groupby(['state', 'town'])
62
- .agg(
63
- device_count=('device_id', 'nunique'), # find device count aggregated
64
- avg_signal_strength=('signal_strength', 'mean'), # find average signal strength
65
- call_drop_rate=('dropped_call', 'mean'), # find average drop calls
66
- avg_call_duration=('call_duration', 'mean') # find average call duration
67
- )
68
- .reset_index()
69
- .sort_values(by='device_count', ascending=False)
70
- )
71
-
72
- # 5. Top towns by different metrics
73
-
74
- #get town by most connected devices
75
- results['top_town_by_device'] = town_summary.sort_values('device_count', ascending=False).head(1).to_dict('records')[0]
76
-
77
- #get town by best average signal
78
- results['top_town_by_signal'] = town_summary.sort_values('avg_signal_strength', ascending=False).head(1).to_dict('records')[0]
79
-
80
- #get town with most call drop rate
81
- results['worst_town_by_call_drop'] = town_summary.sort_values('call_drop_rate', ascending=False).head(1).to_dict('records')[0]
82
-
83
- # 6. Full breakdowns
84
- results['state_device_count'] = device_count_by_state.to_dict()
85
- results['state_signal_strength'] = signal_strength_by_state.round(2).to_dict()
86
- results['state_call_drop_rate'] = call_drop_by_state.round(2).to_dict()
87
- results['town_summary'] = town_summary.round(2).to_dict('records')
88
-
89
- return results
90
-
91
-
92
- def get_summary_by_state(state_name):
93
- filtered = final_df[final_df['state'] == state_name]
94
-
95
- coord_counts = (
96
- filtered.groupby(['town', 'receiver_latitude', 'receiver_longitude'])
97
- .size()
98
- .reset_index(name='count')
99
- )
100
- dominant_coords = (
101
- coord_counts.sort_values(['town', 'count'], ascending=[True, False])
102
- .groupby('town')
103
- .head(1)
104
- .drop(columns='count')
105
- )
106
- summary = (
107
- filtered.merge(dominant_coords, on=['town', 'receiver_latitude', 'receiver_longitude'])
108
- .groupby(['town', 'receiver_latitude', 'receiver_longitude'])
109
- .agg(
110
- device_count=('device_id', 'nunique'),
111
- receiver_status=('receiver_status', lambda x: x.mode().iloc[0])
112
- )
113
- .reset_index())
114
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  return summary
 
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+ df = pd.read_csv('receiverByStateTown.csv')
5
+
6
+ agg = df.groupby(['state','town']).agg({
7
+ 'signal_strength': 'mean',
8
+ 'dropped_call': 'mean'
9
+ }).reset_index()
10
+
11
+ # Define classification logic
12
+ def classify_receiver(row):
13
+ avg_signal = row['signal_strength']
14
+ drop_rate = row['dropped_call']
15
+
16
+ if avg_signal >= 2.0 and drop_rate <= 0.25:
17
+ return 'Green'
18
+ elif avg_signal >= 1.3 and drop_rate <= 0.19:
19
+ return 'Yellow'
20
+ else:
21
+ return 'Red'
22
+
23
+ agg['receiver_status'] = agg.apply(classify_receiver, axis=1)
24
+
25
+ def classify_receiver_mean(row):
26
+ avg_signal = row['avg_signal_strength']
27
+ drop_rate = row['call_drop_rate']
28
+
29
+ if avg_signal >= 2.0 and drop_rate <= 0.25:
30
+ return 'Green'
31
+ elif avg_signal >= 1.3 and drop_rate <= 0.19:
32
+ return 'Yellow'
33
+ else:
34
+ return 'Red'
35
+
36
+ new_data = agg.merge(df, left_on= ["state","town"], right_on= ["state", "town"], how="inner")
37
+
38
+ final_df = new_data[['state','town','device_id','receiver_latitude','receiver_longitude','receiver_status']]
39
+
40
+ def analyze_telecom_data():
41
+ results = {}
42
+
43
+ # 1. State with the most connected devices, group by state and then find sort by max device ids count
44
+ device_count_by_state = df.groupby('state')['device_id'].nunique().sort_values(ascending=False)
45
+
46
+ #get the state with maximum number of connected devices
47
+ results['max_connected_state'] = device_count_by_state.idxmax()
48
+
49
+ #get the number of devices connected for that state
50
+ results['max_connected_state_count'] = device_count_by_state.max()
51
+
52
+ # 2. State with best average signal strength
53
+ signal_strength_by_state = df.groupby('state')['signal_strength'].mean().sort_values(ascending=False)
54
+
55
+ # get the state with the best signal strength
56
+ results['best_signal_state'] = signal_strength_by_state.idxmax()
57
+
58
+ #get the best averge signal strength for that state
59
+ results['best_signal_avg'] = round(signal_strength_by_state.max(), 2)
60
+
61
+ # 3. State with worst average call drop rate
62
+ call_drop_by_state = df.groupby('state')['dropped_call'].mean().sort_values(ascending=False)
63
+
64
+ # get the state with max call drop call
65
+ results['worst_call_drop_state'] = call_drop_by_state.idxmax()
66
+
67
+ # get the worst drop rate for that state
68
+ results['worst_call_drop_rate'] = round(call_drop_by_state.max(), 2)
69
+
70
+ # 4. Town-level summary (per state and town)
71
+ town_summary = (
72
+ df.groupby(['state', 'town'])
73
+ .agg(
74
+ device_count=('device_id', 'nunique'), # find device count aggregated
75
+ avg_signal_strength=('signal_strength', 'mean'), # find average signal strength
76
+ call_drop_rate=('dropped_call', 'mean'), # find average drop calls
77
+ avg_call_duration=('call_duration', 'mean') # find average call duration
78
+ )
79
+ .reset_index()
80
+ .sort_values(by='device_count', ascending=False)
81
+ )
82
+ town_summary['receiver_status'] = town_summary.apply(classify_receiver_mean, axis=1)
83
+
84
+ location_df = (
85
+ df.groupby(['state', 'town'])
86
+ .agg(
87
+ receiver_latitude=('receiver_latitude', 'first'),
88
+ receiver_longitude=('receiver_longitude', 'first')
89
+ )
90
+ .reset_index()
91
+ )
92
+ town_summary = town_summary.merge(location_df, on=['state', 'town'], how='left')
93
+
94
+ town_summary = town_summary.round(2)
95
+
96
+ # 5. Top towns by different metrics
97
+
98
+ #get town by most connected devices
99
+ results['top_town_by_device'] = town_summary.sort_values('device_count', ascending=False).head(1).to_dict('records')[0]
100
+
101
+ #get town by best average signal
102
+ results['top_town_by_signal'] = town_summary.sort_values('avg_signal_strength', ascending=False).head(1).to_dict('records')[0]
103
+
104
+ #get town with most call drop rate
105
+ results['worst_town_by_call_drop'] = town_summary.sort_values('call_drop_rate', ascending=False).head(1).to_dict('records')[0]
106
+
107
+ # 6. Full breakdowns
108
+ results['state_device_count'] = device_count_by_state.to_dict()
109
+ results['state_signal_strength'] = signal_strength_by_state.round(2).to_dict()
110
+ results['state_call_drop_rate'] = call_drop_by_state.round(2).to_dict()
111
+ results['town_summary'] = town_summary.round(2).to_dict('records')
112
+
113
+ return results
114
+
115
+
116
+ def get_summary_by_state(state_name):
117
+ filtered = final_df[final_df['state'] == state_name]
118
+
119
+ coord_counts = (
120
+ filtered.groupby(['town', 'receiver_latitude', 'receiver_longitude'])
121
+ .size()
122
+ .reset_index(name='count')
123
+ )
124
+ dominant_coords = (
125
+ coord_counts.sort_values(['town', 'count'], ascending=[True, False])
126
+ .groupby('town')
127
+ .head(1)
128
+ .drop(columns='count')
129
+ )
130
+ summary = (
131
+ filtered.merge(dominant_coords, on=['town', 'receiver_latitude', 'receiver_longitude'])
132
+ .groupby(['town', 'receiver_latitude', 'receiver_longitude'])
133
+ .agg(
134
+ device_count=('device_id', 'nunique'),
135
+ receiver_status=('receiver_status', lambda x: x.mode().iloc[0])
136
+ )
137
+ .reset_index())
138
+
139
  return summary