import pandas as pd
from os import listdir
datasets = sorted([x for x in listdir('./datasets')])
datasets
['cv_ano_imuno.csv',
'cv_uf_2015.csv',
'cv_uf_2016.csv',
'cv_uf_2017.csv',
'cv_uf_2018.csv',
'cv_uf_2019.csv']
def filter_df(data, iterator, year=None):
df = data[iterator]
df.columns = df.iloc[0]
df = df.iloc[1:]
df = df.reset_index()
df = df.rename(columns={"index":"Estado"})
if year is not None:
df['Ano'] = year
return df
filepath = './datasets/'
df = [pd.read_csv(filepath + dataset, encoding='utf8', sep=';', decimal=',') for dataset in datasets]
df_t = [x.transpose() for x in df]
imuno = filter_df(df_t, 0)
imuno = imuno.rename(columns={"Estado": "Ano"})
df_2015 = filter_df(df_t, 1, 2015)
df_2016 = filter_df(df_t, 2, 2016)
df_2017 = filter_df(df_t, 3, 2017)
df_2018 = filter_df(df_t, 4, 2018)
df_2019 = filter_df(df_t, 5, 2019)
df_2015.head(2)
Imuno | Estado | BCG | Hepatite B em crianças até 30 dias | Rotavírus Humano | Meningococo C | Hepatite B | Penta | Pneumocócica | Poliomielite | Febre Amarela | ... | Tríplice Viral D2 | Tetra Viral(SRC+VZ) | DTP | Tríplice Bacteriana(DTP)(1º ref) | Dupla adulto e tríplice acelular gestante | dTpa gestante | Tetravalente (DTP/Hib) (TETRA) | Ignorado | Total | Ano |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | RO | 110.77 | 104.78 | 103.95 | 104.02 | 106.35 | 104.54 | 104.68 | 105.44 | 106.06 | ... | 94.61 | 94.63 | 104.63 | 95.82 | 73.92 | 64.65 | 103.36 | 229.39 | 111.27 | 2015 |
1 | AC | 105.9 | 69.63 | 82.5 | 88.64 | 82.62 | 81.24 | 72.48 | 82.74 | 66.67 | ... | 51.69 | 49.3 | 81.3 | 62.7 | 17.13 | 12.08 | 83.71 | 158.09 | 75.54 | 2015 |
2 rows × 25 columns
bases = [df_2015, df_2016, df_2017, df_2018, df_2019]
result = pd.concat(bases)
result.columns
Index(['Estado', 'BCG', 'Hepatite B em crianças até 30 dias',
'Rotavírus Humano', 'Meningococo C', 'Hepatite B', 'Penta',
'Pneumocócica', 'Poliomielite', 'Febre Amarela', 'Hepatite A',
'Pneumocócica(1º ref)', 'Meningococo C (1º ref)',
'Poliomielite(1º ref)', 'Tríplice Viral D1', 'Tríplice Viral D2',
'Tetra Viral(SRC+VZ)', 'DTP', 'Tríplice Bacteriana(DTP)(1º ref)',
'Dupla adulto e tríplice acelular gestante', 'dTpa gestante',
'Tetravalente (DTP/Hib) (TETRA)', 'Ignorado', 'Total', 'Ano',
'DTP REF (4 e 6 anos)', 'Poliomielite 4 anos'],
dtype='object')
result.groupby(['Estado', 'Ano']).sum()
BCG | Hepatite B em crianças até 30 dias | Rotavírus Humano | Meningococo C | Hepatite B | Penta | Pneumocócica | Poliomielite | Febre Amarela | Hepatite A | ... | Tetra Viral(SRC+VZ) | DTP | Tríplice Bacteriana(DTP)(1º ref) | Dupla adulto e tríplice acelular gestante | dTpa gestante | Tetravalente (DTP/Hib) (TETRA) | Ignorado | Total | DTP REF (4 e 6 anos) | Poliomielite 4 anos | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Estado | Ano | |||||||||||||||||||||
Total | 2015 | 105.08 | 90.93 | 95.35 | 98.19 | 97.74 | 96.30 | 94.23 | 98.29 | 46.31 | 97.07 | ... | 77.37 | 96.90 | 85.78 | 45.57 | 44.97 | 95.49 | 196.58 | 95.07 | 0.00 | 0.00 |
2016 | 95.55 | 81.75 | 88.98 | 91.68 | 105.19 | 89.27 | 95.00 | 84.43 | 44.59 | 71.58 | ... | 79.04 | 89.53 | 64.28 | 31.53 | 33.81 | 5.21 | 16.44 | 50.44 | 2.73 | 0.00 | |
2017 | 97.98 | 85.88 | 85.12 | 87.44 | 84.40 | 84.24 | 92.15 | 84.74 | 47.37 | 78.94 | ... | 35.44 | 0.00 | 72.40 | 34.73 | 42.40 | 0.00 | 0.00 | 72.93 | 66.08 | 62.26 | |
2018 | 99.72 | 88.40 | 91.33 | 88.49 | 88.53 | 88.49 | 95.25 | 89.54 | 59.50 | 82.69 | ... | 33.26 | 0.00 | 73.27 | 44.99 | 60.23 | 0.00 | 0.00 | 77.13 | 68.52 | 63.62 | |
2019 | 86.67 | 78.57 | 85.40 | 87.41 | 70.77 | 70.76 | 89.07 | 84.19 | 62.41 | 85.02 | ... | 34.24 | 0.00 | 57.08 | 45.02 | 63.23 | 0.00 | 0.00 | 73.44 | 53.74 | 68.45 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
TO | 2015 | 107.48 | 94.35 | 97.73 | 98.73 | 100.00 | 98.70 | 93.92 | 97.22 | 83.28 | 94.52 | ... | 52.17 | 98.76 | 83.28 | 15.24 | 16.55 | 96.74 | 231.40 | 92.37 | 0.00 | 0.00 |
2016 | 99.23 | 77.44 | 91.00 | 92.43 | 106.61 | 88.14 | 97.31 | 84.77 | 77.26 | 75.63 | ... | 88.32 | 88.14 | 69.89 | 11.52 | 19.23 | 5.49 | 31.04 | 60.94 | 2.28 | 0.00 | |
2017 | 104.75 | 91.32 | 89.09 | 90.01 | 85.77 | 85.76 | 93.17 | 86.05 | 73.28 | 81.50 | ... | 60.91 | 0.00 | 75.43 | 34.82 | 53.33 | 0.00 | 0.00 | 77.95 | 72.35 | 66.53 | |
2018 | 104.06 | 100.67 | 93.40 | 79.72 | 90.04 | 90.04 | 100.71 | 91.68 | 81.08 | 84.08 | ... | 65.92 | 0.00 | 74.54 | 45.80 | 70.06 | 0.00 | 0.00 | 81.12 | 65.28 | 62.70 | |
2019 | 112.41 | 111.88 | 88.73 | 93.52 | 76.38 | 76.38 | 94.02 | 88.17 | 76.47 | 87.09 | ... | 81.35 | 0.00 | 55.40 | 51.01 | 75.97 | 0.00 | 0.00 | 82.12 | 53.13 | 65.91 |
140 rows × 25 columns
result.to_csv('cv_uf.csv', encoding='utf8', sep=',', decimal='.', index=False)
# !jupyter nbconvert main.ipynb --to markdown --output README.md