File size: 5,107 Bytes
a0a4a7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f9ab266
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import pandas as pd
import numpy as np  

df = pd.read_csv('receiverByStateTown.csv')

agg = df.groupby(['state','town']).agg({
    'signal_strength': 'mean',
    'dropped_call': 'mean'
}).reset_index()

# Define classification logic
def classify_receiver(row):
    avg_signal = row['signal_strength']
    drop_rate = row['dropped_call']

    if avg_signal >= 2.0 and drop_rate <= 0.25:
        return 'Green'
    elif avg_signal >= 1.3 and drop_rate <= 0.19:
        return 'Yellow'
    else:
        return 'Red'

agg['receiver_status'] = agg.apply(classify_receiver, axis=1)

def classify_receiver_mean(row):
    avg_signal = row['avg_signal_strength']
    drop_rate = row['call_drop_rate']

    if avg_signal >= 2.0 and drop_rate <= 0.25:
        return 'Green'
    elif avg_signal >= 1.3 and drop_rate <= 0.19:
        return 'Yellow'
    else:
        return 'Red'

new_data = agg.merge(df, left_on= ["state","town"], right_on= ["state", "town"], how="inner")

final_df = new_data[['state','town','device_id','receiver_latitude','receiver_longitude','receiver_status']]

def analyze_telecom_data():
    results = {}

    # 1. State with the most connected devices, group by state and then find sort by max device ids count
    device_count_by_state = df.groupby('state')['device_id'].nunique().sort_values(ascending=False)

    #get the state with maximum number of connected devices
    results['max_connected_state'] = device_count_by_state.idxmax()
    
    #get the number of devices connected for that state
    results['max_connected_state_count'] = device_count_by_state.max()

    # 2. State with best average signal strength
    signal_strength_by_state = df.groupby('state')['signal_strength'].mean().sort_values(ascending=False)

    # get the state with the best signal strength
    results['best_signal_state'] = signal_strength_by_state.idxmax()

    #get the best averge signal strength for that state
    results['best_signal_avg'] = round(signal_strength_by_state.max(), 2)

    # 3. State with worst average call drop rate
    call_drop_by_state = df.groupby('state')['dropped_call'].mean().sort_values(ascending=False)

    # get the state with max call drop call
    results['worst_call_drop_state'] = call_drop_by_state.idxmax()

    # get the worst drop rate for that state
    results['worst_call_drop_rate'] = round(call_drop_by_state.max(), 2)

    # 4. Town-level summary (per state and town)
    town_summary = (
        df.groupby(['state', 'town'])
        .agg(
            device_count=('device_id', 'nunique'), # find device count aggregated
            avg_signal_strength=('signal_strength', 'mean'), # find average signal strength
            call_drop_rate=('dropped_call', 'mean'), # find average drop calls
            avg_call_duration=('call_duration', 'mean') # find average call duration
        )
        .reset_index()
        .sort_values(by='device_count', ascending=False)
    )
    town_summary['receiver_status'] = town_summary.apply(classify_receiver_mean, axis=1)
    
    location_df = (
    df.groupby(['state', 'town'])
    .agg(
        receiver_latitude=('receiver_latitude', 'first'),
        receiver_longitude=('receiver_longitude', 'first')
    )
    .reset_index()
)
    town_summary = town_summary.merge(location_df, on=['state', 'town'], how='left')
    
    town_summary = town_summary.round(2)

    # 5. Top towns by different metrics

    #get town by most connected devices
    results['top_town_by_device'] = town_summary.sort_values('device_count', ascending=False).head(1).to_dict('records')[0]

    #get town by best average signal 
    results['top_town_by_signal'] = town_summary.sort_values('avg_signal_strength', ascending=False).head(1).to_dict('records')[0]

    #get town with most call drop rate 
    results['worst_town_by_call_drop'] = town_summary.sort_values('call_drop_rate', ascending=False).head(1).to_dict('records')[0]

    # 6. Full breakdowns
    results['state_device_count'] = device_count_by_state.to_dict()
    results['state_signal_strength'] = signal_strength_by_state.round(2).to_dict()
    results['state_call_drop_rate'] = call_drop_by_state.round(2).to_dict()
    results['town_summary'] = town_summary.round(2).to_dict('records')

    return results


def get_summary_by_state(state_name):
    filtered = final_df[final_df['state'] == state_name]

    coord_counts = (
    filtered.groupby(['town', 'receiver_latitude', 'receiver_longitude'])
            .size()
            .reset_index(name='count')
)
    dominant_coords = (
    coord_counts.sort_values(['town', 'count'], ascending=[True, False])
                .groupby('town')
                .head(1)
                .drop(columns='count')
)
    summary = (
    filtered.merge(dominant_coords, on=['town', 'receiver_latitude', 'receiver_longitude'])
            .groupby(['town', 'receiver_latitude', 'receiver_longitude'])
            .agg(
                device_count=('device_id', 'nunique'),
                receiver_status=('receiver_status', lambda x: x.mode().iloc[0])
            )
            .reset_index())
    
    return summary