covidSIR / src /data_parser.py
SnoopKilla's picture
Gradio APP
406ac25
raw
history blame
2.18 kB
import pandas as pd
import numpy as np
from datetime import datetime
class Parser:
def __init__(self, filename_confirmed,
filename_deaths,
filename_recovered,
filename_population):
self.confirmed = self.read_csv(filename_confirmed)
self.deaths = self.read_csv(filename_deaths)
self.recovered = self.read_csv(filename_recovered)
self.population = self.read_population(filename_population)
self.countries = list(np.intersect1d(self.confirmed.columns.values,
self.population.index.values))
def read_csv(self, filename):
# Create pandas dataframe from .csv
data = pd.read_csv(filename)
# Manipulate the dataframe to have dates as row indices and country
# names as column names
data = data.set_index("Country/Region")
data = data.T
data.index = pd.to_datetime(data.index)
return data
def parse_data(self, start_date, end_date, country):
self.validate_date(start_date)
self.validate_date(end_date)
self.validate_country(country)
delta_i = self.confirmed.loc[:end_date, country].diff().dropna()
delta_i = delta_i.astype(int)
r = (self.deaths.loc[:end_date, country]
+ self.recovered.loc[:end_date, country])
delta_r = r.diff().dropna().astype(int)
i = (delta_i - delta_r).cumsum()
return i[start_date:], r[start_date:]
def read_population(self, filename):
# Create pandas dataframe from .csv
data = pd.read_csv(filename)
data = data.set_index("Country")
return data
def parse_population(self, country):
population = self.population.loc[country, "Population"]
return population
def validate_date(self, date_text):
try:
datetime.strptime(date_text, '%Y-%m-%d')
except ValueError:
raise ValueError("Incorrect data format, should be YYYY-MM-DD!")
def validate_country(self, country):
if country not in self.countries:
raise ValueError("Country not in list!")