Thursday, 9 April 2020

covid visualizations


Covid visualizations
In [1]:
import json
In [2]:
from datetime import timedelta
In [3]:
from urllib.request import urlopen
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
{
    "tags":[
        "hide_input",
    ]
}
Out[3]:
{'tags': ['hide_input']}
In [4]:
# color pallette
cnf = '#393e46' # confirmed - grey
dth = '#ff2e63' # death - red
rec = '#21bf73' # recovered - cyan
act = '#fe9801' # active case - yellow
In [5]:
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
In [6]:
from plotly.offline import plot,iplot,init_notebook_mode
init_notebook_mode(connected=True)
In [7]:
df=pd.read_csv(r'C:\Users\dhr3ban\Documents\python\covid datasets\complete.csv',parse_dates=['Date'])
df['Name of State / UT'] = df['Name of State / UT'].str.replace('Union Territory of ', '')
df.head()
Out[7]:
Date Name of State / UT Total Confirmed cases (Indian National) Total Confirmed cases ( Foreign National ) Cured/Discharged/Migrated Latitude Longitude Death Total Confirmed cases
0 2020-01-30 Kerala 1 0 0 10.8505 76.2711 0 1
1 2020-01-31 Kerala 1 0 0 10.8505 76.2711 0 1
2 2020-02-01 Kerala 2 0 0 10.8505 76.2711 0 2
3 2020-02-02 Kerala 3 0 0 10.8505 76.2711 0 3
4 2020-02-03 Kerala 3 0 0 10.8505 76.2711 0 3
In [8]:
df.columns
Out[8]:
Index(['Date', 'Name of State / UT', 'Total Confirmed cases (Indian National)',
       'Total Confirmed cases ( Foreign National )',
       'Cured/Discharged/Migrated', 'Latitude', 'Longitude', 'Death',
       'Total Confirmed cases'],
      dtype='object')
In [9]:
df = df[['Date', 'Name of State / UT', 'Latitude', 'Longitude', 'Total Confirmed cases', 'Death', 'Cured/Discharged/Migrated']]
df.columns = ['Date', 'State/UT', 'Latitude', 'Longitude', 'Confirmed', 'Deaths', 'Cured']

for i in ['Confirmed', 'Deaths', 'Cured']:
    df[i] = df[i].astype('int')
    
df['Active'] = df['Confirmed'] - df['Deaths'] - df['Cured']
df['Mortality rate'] = df['Deaths']/df['Confirmed']
df['Recovery rate'] = df['Cured']/df['Confirmed']

df = df[['Date', 'State/UT', 'Latitude', 'Longitude', 'Confirmed', 'Active', 'Deaths', 'Mortality rate', 'Cured', 'Recovery rate']]
In [10]:
df.head()
Out[10]:
Date State/UT Latitude Longitude Confirmed Active Deaths Mortality rate Cured Recovery rate
0 2020-01-30 Kerala 10.8505 76.2711 1 1 0 0.0 0 0.0
1 2020-01-31 Kerala 10.8505 76.2711 1 1 0 0.0 0 0.0
2 2020-02-01 Kerala 10.8505 76.2711 2 2 0 0.0 0 0.0
3 2020-02-02 Kerala 10.8505 76.2711 3 3 0 0.0 0 0.0
4 2020-02-03 Kerala 10.8505 76.2711 3 3 0 0.0 0 0.0
In [11]:
df.describe()
Out[11]:
Latitude Longitude Confirmed Active Deaths Mortality rate Cured Recovery rate
count 613.000000 613.000000 613.000000 613.000000 613.000000 612.000000 613.000000 612.000000
mean 22.522748 78.841270 25.254486 22.663948 0.559543 0.023220 2.030995 0.066543
std 7.624310 4.714988 47.120280 42.446752 1.421748 0.066354 5.131348 0.146649
min 10.850500 71.192400 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 15.317300 76.085600 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000
50% 22.986800 77.173400 7.000000 6.000000 0.000000 0.000000 0.000000 0.000000
75% 29.058800 79.740000 28.000000 22.000000 1.000000 0.014657 2.000000 0.062017
max 34.299600 94.727800 335.000000 302.000000 16.000000 0.500000 42.000000 1.000000
In [12]:
latest = df[df['Date']==max(df['Date'])]

# days
latest_day = max(df['Date'])
day_before = latest_day - timedelta(days = 1)

# state and total cases 
latest_day_df = df[df['Date']==latest_day].set_index('State/UT')
day_before_df = df[df['Date']==day_before].set_index('State/UT')

temp = pd.merge(left = latest_day_df, right = day_before_df, on='State/UT', suffixes=('_lat', '_bfr'), how='outer')
latest_day_df['New cases'] = temp['Confirmed_lat'] - temp['Confirmed_bfr']
latest = latest_day_df.reset_index()
latest.fillna(1, inplace=True)
In [13]:
temp = latest[['State/UT', 'Confirmed', 'Active', 'New cases', 'Deaths', 'Mortality rate', 'Cured', 'Recovery rate']]
temp = temp.sort_values('Confirmed', ascending=False).reset_index(drop=True)

temp.style\
    .background_gradient(cmap="Blues", subset=['Confirmed', 'Active', 'New cases'])\
    .background_gradient(cmap="Greens", subset=['Cured', 'Recovery rate'])\
    .background_gradient(cmap="Reds", subset=['Deaths', 'Mortality rate'])
Out[13]:
State/UT Confirmed Active New cases Deaths Mortality rate Cured Recovery rate
0 Maharashtra 335 277 0 16 0.0477612 42 0.125373
1 Tamil Nadu 309 302 75 1 0.00323625 6 0.0194175
2 Kerala 286 257 21 2 0.00699301 27 0.0944056
3 Delhi 219 207 67 4 0.0182648 8 0.0365297
4 Rajasthan 133 130 25 0 0 3 0.0225564
5 Andhra Pradesh 132 130 46 1 0.00757576 1 0.00757576
6 Karnataka 124 111 14 3 0.0241935 10 0.0806452
7 Uttar Pradesh 113 97 0 2 0.0176991 14 0.123894
8 Telengana 107 103 11 3 0.0280374 1 0.00934579
9 Madhya Pradesh 99 93 0 6 0.0606061 0 0
10 Gujarat 87 72 5 7 0.0804598 8 0.091954
11 Jammu and Kashmir 70 65 8 2 0.0285714 3 0.0428571
12 West Bengal 53 47 1 3 0.0566038 3 0.0566038
13 Punjab 46 41 0 4 0.0869565 1 0.0217391
14 Haryana 43 22 0 0 0 21 0.488372
15 Bihar 24 23 1 1 0.0416667 0 0
16 Chandigarh 18 18 2 0 0 0 0
17 Assam 16 16 15 0 0 0 0
18 Ladakh 14 11 1 0 0 3 0.214286
19 Andaman and Nicobar Islands 10 10 0 0 0 0 0
20 Uttarakhand 10 8 3 0 0 2 0.2
21 Chhattisgarh 9 7 0 0 0 2 0.222222
22 Goa 6 6 1 0 0 0 0
23 Himachal Pradesh 6 4 3 1 0.166667 1 0.166667
24 Odisha 5 5 1 0 0 0 0
25 Puducherry 3 2 0 0 0 1 0.333333
26 Manipur 2 2 1 0 0 0 0
27 Jharkhand 2 2 1 0 0 0 0
28 Mizoram 1 1 0 0 0 0 0
29 Arunachal Pradesh 1 1 1 0 0 0 0
In [14]:
fig = make_subplots(rows=1, cols=2, shared_xaxes=False, column_widths=[0.4, 0.6],
                    subplot_titles = ['Latest stats', 'Over the time'],
                    specs=[[{"type": "treemap"}, {"type": "bar"}]])

tm = temp.melt(id_vars="Date", value_vars=['Active', 'Deaths', 'Cured'])
fig_1 = px.treemap(tm, path=["variable"], values="value", height=540, width=800,
                 color_discrete_sequence=[rec, act, dth])
fig_1.data[0].textinfo = 'label+text+value'
fig.add_trace(fig_1['data'][0], row=1, col=1)

# fig.add_trace(go.Treemap(labels = tm['variable'], values = tm['value']),1,1)

temp = df.groupby('Date')['Active', 'Deaths', 'Cured'].sum().reset_index()
fig.add_trace(go.Bar(x=temp['Date'], y=temp['Active'], name='Active', marker_color=act), row=1, col=2)
fig.add_trace(go.Bar(x=temp['Date'], y=temp['Deaths'], name='Deaths', marker_color=dth), row=1, col=2)
fig.add_trace(go.Bar(x=temp['Date'], y=temp['Cured'], name='Cured', marker_color=rec), row=1, col=2)

fig.update_layout(barmode='stack')
fig.update_layout(treemapcolorway = [act, rec, dth])
fig.show()
C:\Program Files\Anaconda3\lib\site-packages\pandas\core\indexing.py:1418: FutureWarning:


Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike

In [15]:
temp = latest.sort_values('Confirmed', ascending=False)
state_order = temp['State/UT']

fig = px.bar(temp, 
             x="Confirmed", y="State/UT", color='State/UT', title='Confirmed',
             orientation='h', text='Confirmed', height=len(temp)*35,
             color_discrete_sequence = px.colors.qualitative.Vivid)
fig.show()
In [16]:
temp = latest[latest['Deaths']>0].sort_values('Deaths')
fig_d = px.bar(temp, y="State/UT", x="Deaths", orientation='h', title='Deaths', color_discrete_sequence = ['#ff2e63'], text='Deaths', height=len(temp)*40)

temp = latest[latest['Cured']>0].sort_values('Cured')
fig_r = px.bar(temp, y="State/UT", x="Cured", orientation='h', title='Cured', color_discrete_sequence = ['#2c786c'], text='Cured', height=len(temp)*40)

fig = make_subplots(rows=1, cols=2, shared_xaxes=False, horizontal_spacing=0.2, subplot_titles=("Deaths", "Recovered"))
fig.add_trace(fig_d['data'][0], row=1, col=1)
fig.add_trace(fig_r['data'][0], row=1, col=2)
fig.update_layout(height=800)
fig.show()
In [17]:
fig = px.bar(df.sort_values('Confirmed', ascending=False), x="Date", y="Confirmed", color='State/UT', title='State wise cases over time',
             color_discrete_sequence = px.colors.qualitative.Vivid)
fig.update_traces(textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()
In [18]:
no_of_states = df.groupby('Date')['State/UT'].unique().apply(len).values
dates = df.groupby('Date')['State/UT'].unique().apply(len).index

fig = go.Figure()

fig.add_trace(go.Scatter(x=dates, y=[36 for i in range(len(no_of_states))], 
                         mode='lines', name='Total no. of States+UT', 
                         line = dict(color='#393e46', dash='dash')))

fig.add_trace(go.Scatter(x=dates, y=no_of_states, hoverinfo='x+y',
                         mode='lines', name='No. of affected States+UT', 
                         line = dict(color='#ff2e63')))

fig.update_layout(title='No. of affected States / Union Territory', 
                  xaxis_title='Date', yaxis_title='No. of affected States / Union Territory')
fig.update_traces(textposition='top center')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()
In [19]:
px.scatter(latest[latest['Confirmed']>10], x='Confirmed', y='Deaths', color='State/UT', size='Confirmed', 
           text='State/UT', log_x =True, title='Confirmed vs Death')
In [ ]:
 

No comments:

Post a Comment

covid visualizations

Covid visualizations In [1]: import json In [2]: from ...