## This file contains the R code needed in workshop 2. library(tidyverse) library(dslabs) library(deSolve) library(ggplot2) library(lhs) library(outbreaks) library(optimization) ### Section 1 - estimate the growth rate and R_0 ## Load the outbreaks package and SARS data library(outbreaks) data("sars_canada_2003") ## to display the first few lines head(sars_canada_2003) ### SECTION 2.1 CONTINUING WITH MUMPS IN SAVOONGA ## The SIR model code as in lectures 1 & 2. library(deSolve) sir_model = function (t, y, params) { # label state variables from within the y vector S = y[1] # susceptibles I = y[2] # infectious R = y[3] # recovered N = S + I + R # Pull parameter values from the params vector beta = params["beta"] gamma = params["gamma"] # Define equations dS = -(beta*S*I)/N dI = (beta*S*I)/N - gamma*I dR = gamma * I # return list of gradients res = c(dS, dI, dR) list(res) } # load mumps data for Savoonga # If mumps_sav.Rdata isn't in your working directory you will need to include the path # Alternatively, in RStudio you can load the file from 'Session -> load workspace'. # This line will only work if mumps_sav.Rdata is in your working directory. load(file="mumps_sav.Rdata") ## ----------------------------------------------------------------------------------------------------------- par_init = c(beta=1, gamma=1) # Some fairly arbitrary starting values ## We will hard-code the mumps data into the function # the function sir_model is already defined, so we can refer to that here # mumps_sav is already loaded, so we can use that here too ### NOTE - I have changed a couple of lines since workshop 2, ### - to include init_sav ### - to include the times_sav init_sav = c(S=258, I=1, R=0) times_sav = mumps_sav$time_days opt_fun_sav = function(par){ ## The following lines prevent 'optim_nm' from wandering into negative parameter space if (par[1]<0){ par[1] = -par[1] } if(par[2]<0){ par[2] = -par[2] } ## Run the SIR model with the current parameter values output_sav_try = ode( y=init_sav, times = times_sav, func = sir_model, parms= c( beta = par[1], gamma = par[2] ) ) output_savtry_df = as.data.frame(output_sav_try) # coerce to a data.frame object ## Compute a cumulative I column from the model output output_savtry_df$Icum = 259 - output_savtry_df$S n_obs = nrow(mumps_sav) ## Create a data frame with observed and model prediction data for Savoonga sav_rmse_df = data.frame( time = mumps_sav$time_days, obs = mumps_sav$cum_cases, # the following lines find the cumulative I prediction for each time in the mummps data pred = sapply(1:n_obs, function(i){output_savtry_df$Icum[ output_savtry_df$time == mumps_sav$time_days[i] ]}) ) # Calculate the RMSE for this prediction rmse_try = sqrt((1/n_obs)*(sum((sav_rmse_df$pred - sav_rmse_df$obs)^2))) return(rmse_try) } # Run Nelder-Mead optimisation to find the point in parameter space leading to the smallest RMSE optnm_result=optim_nm(opt_fun_sav,start=c(0.6,0.6),k=2, trace=T) optnm_result ### Run the SIR model at the values found by optim_nm output_sav_nm = ode (y=init_sav, times = times_sav, func = sir_model, parms= c( beta = optnm_result$par[1], gamma = optnm_result$par[2] )) output_savnm_df = as.data.frame(output_sav_nm) # coerce to a data.frame object output_savnm_df$Icum = 259 - output_savnm_df$S # Calculate cumulative I # Reformat to 'long' output_savnm_tidy = pivot_longer(output_savnm_df, cols = c("S", "I", "R", "Icum"), names_to = "Compartment") # Create a data frame with corresponding outbreak data from Savoonga and SIR model data n_obs = nrow(mumps_sav) sav_nm_pred = data.frame( time = mumps_sav$time_days, obs = mumps_sav$cum_cases, pred = sapply(1:n_obs, function(i){output_savnm_df$Icum[ output_savnm_df$time == mumps_sav$time_days[i] ]}) ) # Calculate the RMSE from this model fit rmse = sqrt((1/n_obs)*(sum((sav_nm_pred$pred - sav_nm_pred$obs)^2))) ggplot() + # Plot the observed data in blue (with points and a ribbon) geom_point(data=mumps_sav, aes(x=time_days, y=cum_cases), col="blue") + geom_ribbon(data=mumps_sav, aes(x=time_days, ymin=0, ymax=cum_cases), fill="blue", alpha=0.25) + # Plot cumulative I from the SIR model output as a line geom_line(data=output_savnm_tidy[output_savnm_tidy$Compartment=="Icum",], aes(x=time, y=value)) + # Label axes and title ylab("Cumulative Cases") + xlab("Time (days)") + ggtitle(sprintf("beta = %g, gamma = %g, RMSE = %g", optnm_result$par[1], optnm_result$par[2], rmse)) ## SECTION 3 - more detailed models # Define Q function for seasonality # In this function t is in years Qfun = function(t, a, b, phi, tau, mu){ top = b*(a*cos(2*pi*(t-phi))+1) bottom = tau + mu return(top/bottom) } sir_rsv_seas = function (t, y, params) { # label state variables from within the y vector S = y[1] # susceptibles IS = y[2] # Infectious (from S) IR = y[3] # infectious (from R) R = y[4] # recovered (but still infectious, remember) # Create variable N for total number N = S + IR + IS + R # Pull parameter values from the params vector a = params["a"] phi = params["phi"] mu = params["mu"] alpha = params["alpha"] tau = params["tau"] rho = params["rho"] sigma=params["sigma"] b = params["b"] eta = params["eta"] # Calculate Lambda function, for some value of t and parameter # values as specified # Divide t by 365 to go from years to days lambda = Qfun(t/365, a, b, phi, tau, mu)*(IS + eta*IR)*(tau+mu) # Rates of change for each compartment dS = mu*N - lambda*S/N + (alpha*tau/rho)*(IS+IR+R) - mu*S dIS = lambda*S/N - (tau+mu)*IS dIR = sigma*lambda*R/N - (tau/rho + mu)*IR dR = (1-alpha/rho)*tau*IS + (1-alpha)*(tau/rho)*IR - R*(sigma*lambda/N + alpha*tau/rho + mu) # return list of gradients res = c(dS, dIS, dIR, dR) return(list(res)) } # Set parameter values params_rsv_seas = c( a= 0.5, phi = 0, mu=1/(365*70), alpha = 0.16, tau = 1/6, rho = 0.8, sigma = 0.3, b = 0.5, eta = 0.8 ) # Initial values for differential equations # We will run the model for 2.5 years, to show the seasonal pattern initial_vals_rsv = c (S = 0.999, IS=0.001, IR = 0, R = 0) timepoints_rsv_seas = seq (0, 365*2.5, by=0.1) # Run model and format output output_rsv_seas = ode (y=initial_vals_rsv, times = timepoints_rsv_seas, func = sir_rsv_seas, parms= params_rsv_seas) out_rsv_seas_df = as.data.frame(output_rsv_seas) # coerce to a data.frame object out_rsv_seas_tidy = pivot_longer(out_rsv_seas_df, cols = c("S", "IS", "IR", "R"), names_to = "Compartment") # Create date variable to show progression through year out_rsv_seas_tidy$Date = as.Date("31-12-2019", format = "%d-%m-%Y") + out_rsv_seas_tidy$time # Plot compartment populations ggplot(data=out_rsv_seas_tidy, aes(x=Date, y=value, col=Compartment)) + geom_path() + xlab("Time (days)") + ylab("Proportion of population") + scale_x_date(date_labels = "%B")