Update data_preparation.py
Browse files- data_preparation.py +33 -2
data_preparation.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import pandas as pd
|
2 |
import numpy as np
|
3 |
def data_imp():
|
4 |
-
|
5 |
"CustID": "Unique identifier for each customer.",
|
6 |
"FirstPolYear": "Year when the customer first bought an insurance policy.",
|
7 |
"BirthYear": "Birth year of the customer, used to calculate age.",
|
@@ -17,6 +17,37 @@ def data_imp():
|
|
17 |
"PremLife": "Premium amount for life insurance.",
|
18 |
"PremWork": "Premium amount for work insurance."
|
19 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
insurance_defaults = {
|
21 |
"FirstPolYear": 1999,
|
22 |
"BirthYear": 1980,
|
@@ -62,7 +93,7 @@ def data_imp():
|
|
62 |
"Detergents_Paper": 4000,
|
63 |
"Delicassen": 2000
|
64 |
}
|
65 |
-
return
|
66 |
|
67 |
def preprocess_data(data):
|
68 |
if 'CustID' in data.columns:
|
|
|
1 |
import pandas as pd
|
2 |
import numpy as np
|
3 |
def data_imp():
|
4 |
+
insurance_feature_descriptions = {
|
5 |
"CustID": "Unique identifier for each customer.",
|
6 |
"FirstPolYear": "Year when the customer first bought an insurance policy.",
|
7 |
"BirthYear": "Birth year of the customer, used to calculate age.",
|
|
|
17 |
"PremLife": "Premium amount for life insurance.",
|
18 |
"PremWork": "Premium amount for work insurance."
|
19 |
}
|
20 |
+
retail_feature_descriptions = {
|
21 |
+
"Channel": "Indicates the sales channel through which the customer made purchases.",
|
22 |
+
"Region": "The geographical region where the customer is located.",
|
23 |
+
"Fresh": "Annual spending (in monetary units) on fresh products.",
|
24 |
+
"Milk": "Annual spending (in monetary units) on milk products.",
|
25 |
+
"Grocery": "Annual spending (in monetary units) on grocery items.",
|
26 |
+
"Frozen": "Annual spending (in monetary units) on frozen products.",
|
27 |
+
"Detergents_Paper": "Annual spending (in monetary units) on detergents and paper products.",
|
28 |
+
"Delicassen": "Annual spending (in monetary units) on delicatessen products."
|
29 |
+
}
|
30 |
+
bankng_feature_descriptions = {
|
31 |
+
"CUST_ID": "Unique identifier for each customer.",
|
32 |
+
"BALANCE": "The average balance left in the customer's account.",
|
33 |
+
"BALANCE_FREQUENCY": "Frequency with which the balance is updated.",
|
34 |
+
"PURCHASES": "The total amount of purchases made by the customer.",
|
35 |
+
"ONEOFF_PURCHASES": "The total amount of one-time purchases made by the customer.",
|
36 |
+
"INSTALLMENTS_PURCHASES": "The total amount of purchases made in installments.",
|
37 |
+
"CASH_ADVANCE": "The total amount of cash advances taken by the customer.",
|
38 |
+
"PURCHASES_FREQUENCY": "The frequency of purchases made by the customer.",
|
39 |
+
"ONEOFF_PURCHASES_FREQUENCY": "The frequency of one-time purchases made by the customer.",
|
40 |
+
"PURCHASES_INSTALLMENTS_FREQUENCY": "The frequency of purchases made in installments.",
|
41 |
+
"CASH_ADVANCE_FREQUENCY": "The frequency of cash advances taken by the customer.",
|
42 |
+
"CASH_ADVANCE_TRX": "The number of cash advance transactions made by the customer.",
|
43 |
+
"PURCHASES_TRX": "The number of purchase transactions made by the customer.",
|
44 |
+
"CREDIT_LIMIT": "The credit limit assigned to the customer's account.",
|
45 |
+
"PAYMENTS": "The total amount of payments made by the customer.",
|
46 |
+
"MINIMUM_PAYMENTS": "The minimum amount of payments made by the customer.",
|
47 |
+
"PRC_FULL_PAYMENT": "The percentage of full payments made by the customer.",
|
48 |
+
"TENURE": "The tenure of the customer in months."
|
49 |
+
}
|
50 |
+
|
51 |
insurance_defaults = {
|
52 |
"FirstPolYear": 1999,
|
53 |
"BirthYear": 1980,
|
|
|
93 |
"Detergents_Paper": 4000,
|
94 |
"Delicassen": 2000
|
95 |
}
|
96 |
+
return insurance_feature_descriptions,bankng_feature_descriptions,retail_feature_descriptions,insurance_defaults,banking_defaults,retail_defaults
|
97 |
|
98 |
def preprocess_data(data):
|
99 |
if 'CustID' in data.columns:
|