|
|
|
"""676_252_1434_72 |
|
|
|
Automatically generated by Colab. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/1FniZJX1OfI1PltPCXhpw50znN1aYMFcP |
|
""" |
|
|
|
import numpy as np |
|
import pandas as pd |
|
|
|
import os |
|
for dirname, _, filenames in os.walk('/content/world_bank_data_2025.csv'): |
|
for filename in filenames: |
|
print(os.path.join(dirname, filename)) |
|
|
|
import pandas as pd |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
|
|
df = pd.read_csv('/content/world_bank_data_2025.csv') |
|
df.head() |
|
|
|
print("Shape of dataset:", df.shape) |
|
print("COlumns:\n", df.columns.tolist()) |
|
print("\nMissing values:\n", df.isnull().sum()) |
|
df.dtypes |
|
|
|
indicators = df.columns.difference(['country_name', 'country_id', 'year']) |
|
df_clean = df.dropna(subset=indicators, how='all') |
|
|
|
top_countries = df_clean.groupby('country_name')['GDP (Current USD)'].mean().nlargest(10).index |
|
gdp_plot = df_clean[df_clean['country_name'].isin(top_countries)] |
|
|
|
plt.figure(figsize=(12, 6)) |
|
sns.lineplot(data=gdp_plot, x='year', y='GDP (Current USD)', hue='country_name') |
|
plt.title('GDP Trends (Top 10 Countries by Avg GDP)') |
|
plt.ylabel('GDP in USD') |
|
plt.xticks(rotation=45) |
|
plt.grid(True) |
|
plt.tight_layout() |
|
plt.show() |
|
|
|
numeric_df = df_clean.select_dtypes(include=['number']).drop(columns=['year']) |
|
plt.figure(figsize=(10, 8)) |
|
sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm', fmt='.2f') |
|
plt.title('Correlation Between Economic Indicators') |
|
plt.show() |
|
|
|
inflation_2020 = df_clean[df_clean['year'] == 2020] |
|
plt.figure(figsize=(12, 5)) |
|
sns.histplot(inflation_2020['Inflation (CPI %)'].dropna(), bins=30, kde=True, color='orange') |
|
plt.title('Inflation Rate Distribution - 2020') |
|
plt.xlabel('Inflation (CPI %)') |
|
plt.grid(True) |
|
plt.show() |