India COVID-19 Data Analysis and Estimation - version 3

Thanks to DataMeet for collecting this original data

Data - https://github.com/datameet/covid19/tree/master/data

Code developed by Swanand Mhalagi

For further info

https://www.linkedin.com/in/swanand-mhalagi-8b1813a7/

https://github.com/swanandM

https://medium.com/@swan1991m

In [20]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[20]:
In [1]:
!wget -N -q --timestamping https://raw.githubusercontent.com/datameet/covid19/master/data/mohfw.json
In [2]:
import numpy as np # linear algebra
import pandas as pd 
import cufflinks as cf
import plotly.offline
import plotly.graph_objects as go
from pandas.io.json import json_normalize  
import json
from fbprophet import Prophet

cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

# show all pandas columns 
pd.set_option("display.max.columns", None)
In [3]:
#read JSON
with open('mohfw.json') as f: 
    d = json.load(f) 
df = json_normalize(d['rows'])
In [4]:
def data_design(state_cases):
    #Change the date formate
    state_cases['date']=state_cases['value.report_time'].str.split('T').str[0]
    #Delete uncessesary columns
    state_cases = state_cases.drop(['id','key','value._id','value._rev','value.confirmed_foreign','value.confirmed_india','value.source', 'value.source', 'value.type', 'value.report_time' ], axis=1)
    #rename columns
    state_cases = state_cases.rename(columns={'value.confirmed':'Confirmed', 'value.cured':'Recovered', 'value.death':'Death', 'value.state':'State'})
    #set date as index
    state_cases = state_cases.set_index('date') 
    #Replace state shortforms with original names
    state = [
    ["Andhra Pradesh", "ap"],["Arunachal Pradesh", "ar"],["Assam", "as"], ["Bihar", "br"], ["Chattisgarh","ct"], ["Chhattisgarh","ct"],
    ["Goa","ga"],["Gujarat","gj"], ["Haryana","hr"], ["Himachal Pradesh","hp"],["Jharkhand","jh"],["Karnataka","ka"],["Kerala","kl"],
    ["Madhya Pradesh","mp"], ["Maharashtra","mh"], ["Manipur","mn"], ["Meghalaya","ml"], ["Mizoram","mz"], ["Nagaland","nl"],
    ["Odisha","or"], ["Punjab","pb"], ["Rajasthan","rj"], ["Sikkim","sk"], ["Tamil Nadu","tn"], ["Telengana","tg"], ["Tripura","tr"],
    ["Uttarakhand","ut"],["Uttar Pradesh","up"],["West Bengal","wb"], ["Andaman and Nicobar Islands","an"],["Chandigarh","ch"], ["Dadra and Nagar Haveli","dn"], ["Daman and Diu","dd"],
    ["Delhi","dl"],["Jammu and Kashmir","jk"],["Ladakh","la"],["Lakshadweep","ld"],["Pondicherry","py"],["Puducherry","py"]]
    state_short = pd.DataFrame(state, columns = ['State', 'SN']) 
    change = state_short.set_index('SN').to_dict()['State']
    state_cases = state_cases.replace(change)
    #state_cases
    return state_cases
In [5]:
state_cases = data_design(df)
In [6]:
def cases_till_today(state_cases, today_date):
    till_today = state_cases.loc[today_date]
    till_today = till_today.set_index('State') 
    till_today = till_today.reset_index().drop_duplicates(subset='State',keep='last').set_index('State')
    return till_today.iplot(kind='bar', xTitle='State', yTitle='Case Count', title='India: State-wise Case Distribution as of '+ today_date)
In [7]:
#Change the date as per requirment
cases_till_today(state_cases,'2020-04-30')
In [8]:
def one_state(state_cases, state_name):
    one_state = state_cases[state_cases['State'].str.contains(state_name)] 
    one_state = one_state.reset_index().drop_duplicates(subset='date',keep='last').set_index('date')
    #one_state
    one_state= one_state.drop(['State'], axis=1)
    return one_state.iplot(mode='lines', xTitle='Date', yTitle='Case Count', title='Case Distribution for ' + state_name)
In [9]:
one_state(state_cases,'Maharashtra')
In [16]:
one_state(state_cases,'Delhi')
In [17]:
def state_rate(state_cases, state_name):
    one_state_rate = state_cases[state_cases['State'].str.contains(state_name)] 
    one_state_rate = one_state_rate.reset_index().drop_duplicates(subset='date',keep='last').set_index('date')
    one_state_rate = one_state_rate.drop(['State'], axis=1)
    one_state_rate = one_state_rate.T
    #Diff between two column
    one_state_rate = one_state_rate.diff(axis=1)
    one_state_rate = one_state_rate.T
    return one_state_rate.iplot(mode='lines', xTitle='Date', yTitle='Case Count', title='Daily New Cases in '+ state_name)
In [18]:
state_rate(state_cases,'Maharashtra')
In [19]:
state_rate(state_cases,'Delhi')
In [20]:
def india_proj(state_cases, state_name, Days):
    state = state_cases[state_cases['State'].str.contains(state_name)]
    state = state.drop(['Recovered', 'Death', 'State'], axis=1)
    state = state.T
    state = state.diff(axis=1)
    state = state.T
    state = state.reset_index()  
    state.columns = ['ds','y']
    state = state.drop_duplicates(subset='ds',keep='first')
    basic = Prophet(changepoint_prior_scale=2.5)
    basic.fit(state)
    future= basic.make_future_dataframe(periods= Days)
    forecast=basic.predict(future)
    forecast = forecast.set_index('ds') 
    return forecast[['yhat']].iplot(mode='lines', xTitle='Date', yTitle='Case Count', title= state_name + ':- New Cases Forecast for next '+ str(Days) + ' Days')
In [21]:
india_proj(state_cases, "Maharashtra", 20)
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
In [22]:
india_proj(state_cases, "Delhi", 20)
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
In [13]:
!wget -N -q --timestamping https://raw.githubusercontent.com/datameet/covid19/master/data/all_totals.json
In [14]:
#read JSON
with open('all_totals.json') as f: 
    d1 = json.load(f) 
all_total = json_normalize(d1['rows'])
In [15]:
def India_case(all_total, case_type):    
    ac = all_total[all_total['key'].str.contains(case_type, regex=False)] 
    ac['key'] = ac['key'].astype(str)
    ac['date'] = ac['key'].str.split('T').str[0]
    ac['date'] = ac['date'].str.lstrip("['")
    ac = ac.reset_index().drop_duplicates(subset='date',keep='last').set_index('date')
    ac = ac.drop(['key', 'index'], axis=1)
    #rename columns
    ac = ac.rename(columns={'value':case_type})
    #ac.head()
    return ac
In [16]:
w = India_case(all_total, 'total_confirmed_cases')
x = India_case(all_total, 'active_cases')
y = India_case(all_total, 'cured')
z = India_case(all_total, 'death')
Case_Distri = pd.concat([w,x,y,z], axis=1, join='inner')
Case_Distri.iplot(mode='lines', xTitle='Date', yTitle='Case Count', title='Case Distribution of India')
In [18]:
def India_case_distribution(all_total, case_type):
    ac = all_total[all_total['key'].str.contains(case_type, regex=False)] 
    ac['key'] = ac['key'].astype(str)
    ac['date'] = ac['key'].str.split('T').str[0]
    ac['date'] = ac['date'].str.lstrip("['")
    ac = ac.reset_index().drop_duplicates(subset='date',keep='last').set_index('date')
    ac = ac.drop(['key', 'index'], axis=1)
    return ac.iplot(kind='bar', xTitle='Date', yTitle='Case Count', title= 'COVID-19 ' + case_type + ' distribution all over India')
In [21]:
#India_case_distribution(all_total, 'active_cases')
#India_case_distribution(all_total, 'cured')
#India_case_distribution(all_total, 'death')
#India_case_distribution(all_total, 'total_confirmed_cases')