Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
from datetime import datetime | |
class Parser: | |
def __init__(self, filename_confirmed, | |
filename_deaths, | |
filename_recovered, | |
filename_population): | |
self.confirmed = self.read_csv(filename_confirmed) | |
self.deaths = self.read_csv(filename_deaths) | |
self.recovered = self.read_csv(filename_recovered) | |
self.population = self.read_population(filename_population) | |
self.countries = list(np.intersect1d(self.confirmed.columns.values, | |
self.population.index.values)) | |
def read_csv(self, filename): | |
# Create pandas dataframe from .csv | |
data = pd.read_csv(filename) | |
# Manipulate the dataframe to have dates as row indices and country | |
# names as column names | |
data = data.set_index("Country/Region") | |
data = data.T | |
data.index = pd.to_datetime(data.index) | |
return data | |
def parse_data(self, start_date, end_date, country): | |
self.validate_date(start_date) | |
self.validate_date(end_date) | |
self.validate_country(country) | |
delta_i = self.confirmed.loc[:end_date, country].diff().dropna() | |
delta_i = delta_i.astype(int) | |
r = (self.deaths.loc[:end_date, country] | |
+ self.recovered.loc[:end_date, country]) | |
delta_r = r.diff().dropna().astype(int) | |
i = (delta_i - delta_r).cumsum() | |
return i[start_date:], r[start_date:] | |
def read_population(self, filename): | |
# Create pandas dataframe from .csv | |
data = pd.read_csv(filename) | |
data = data.set_index("Country") | |
return data | |
def parse_population(self, country): | |
population = self.population.loc[country, "Population"] | |
return population | |
def validate_date(self, date_text): | |
try: | |
datetime.strptime(date_text, '%Y-%m-%d') | |
except ValueError: | |
raise ValueError("Incorrect data format, should be YYYY-MM-DD!") | |
def validate_country(self, country): | |
if country not in self.countries: | |
raise ValueError("Country not in list!") | |