File size: 4,495 Bytes
f9ab266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import pandas as pd
import numpy as np  

df = pd.read_csv('receiverByStateTown.csv')

agg = df.groupby(['state','town']).agg({
    'signal_strength': 'mean',
    'dropped_call': 'mean'
}).reset_index()

# Define classification logic
def classify_receiver(row):
    avg_signal = row['signal_strength']
    drop_rate = row['dropped_call']

    if avg_signal >= 2.0 and drop_rate <= 0.25:
        return 'Green'
    elif avg_signal >= 1.3 and drop_rate <= 0.19:
        return 'Yellow'
    else:
        return 'Red'

agg['receiver_status'] = agg.apply(classify_receiver, axis=1)

new_data = agg.merge(df, left_on= ["state","town"], right_on= ["state", "town"], how="inner")

final_df = new_data[['state','town','device_id','receiver_latitude','receiver_longitude','receiver_status']]

def analyze_telecom_data():
    results = {}

    # 1. State with the most connected devices, group by state and then find sort by max device ids count
    device_count_by_state = df.groupby('state')['device_id'].nunique().sort_values(ascending=False)

    #get the state with maximum number of connected devices
    results['max_connected_state'] = device_count_by_state.idxmax()
    
    #get the number of devices connected for that state
    results['max_connected_state_count'] = device_count_by_state.max()

    # 2. State with best average signal strength
    signal_strength_by_state = df.groupby('state')['signal_strength'].mean().sort_values(ascending=False)

    # get the state with the best signal strength
    results['best_signal_state'] = signal_strength_by_state.idxmax()

    #get the best averge signal strength for that state
    results['best_signal_avg'] = round(signal_strength_by_state.max(), 2)

    # 3. State with worst average call drop rate
    call_drop_by_state = df.groupby('state')['dropped_call'].mean().sort_values(ascending=False)

    # get the state with max call drop call
    results['worst_call_drop_state'] = call_drop_by_state.idxmax()

    # get the worst drop rate for that state
    results['worst_call_drop_rate'] = round(call_drop_by_state.max(), 2)

    # 4. Town-level summary (per state and town)
    town_summary = (
        df.groupby(['state', 'town'])
        .agg(
            device_count=('device_id', 'nunique'), # find device count aggregated
            avg_signal_strength=('signal_strength', 'mean'), # find average signal strength
            call_drop_rate=('dropped_call', 'mean'), # find average drop calls
            avg_call_duration=('call_duration', 'mean') # find average call duration
        )
        .reset_index()
        .sort_values(by='device_count', ascending=False)
    )

    # 5. Top towns by different metrics

    #get town by most connected devices
    results['top_town_by_device'] = town_summary.sort_values('device_count', ascending=False).head(1).to_dict('records')[0]

    #get town by best average signal 
    results['top_town_by_signal'] = town_summary.sort_values('avg_signal_strength', ascending=False).head(1).to_dict('records')[0]

    #get town with most call drop rate 
    results['worst_town_by_call_drop'] = town_summary.sort_values('call_drop_rate', ascending=False).head(1).to_dict('records')[0]

    # 6. Full breakdowns
    results['state_device_count'] = device_count_by_state.to_dict()
    results['state_signal_strength'] = signal_strength_by_state.round(2).to_dict()
    results['state_call_drop_rate'] = call_drop_by_state.round(2).to_dict()
    results['town_summary'] = town_summary.round(2).to_dict('records')

    return results


def get_summary_by_state(state_name):
    filtered = final_df[final_df['state'] == state_name]

    coord_counts = (
    filtered.groupby(['town', 'receiver_latitude', 'receiver_longitude'])
            .size()
            .reset_index(name='count')
)
    dominant_coords = (
    coord_counts.sort_values(['town', 'count'], ascending=[True, False])
                .groupby('town')
                .head(1)
                .drop(columns='count')
)
    summary = (
    filtered.merge(dominant_coords, on=['town', 'receiver_latitude', 'receiver_longitude'])
            .groupby(['town', 'receiver_latitude', 'receiver_longitude'])
            .agg(
                device_count=('device_id', 'nunique'),
                receiver_status=('receiver_status', lambda x: x.mode().iloc[0])
            )
            .reset_index())
    
    return summary