## This code is explained in more detail in the Workshop 1 file
## There are some explanatory comments in the bodies of functions

## ----packages, echo=FALSE,message=FALSE,warning=FALSE-----------------------------------
library(tidyverse)
library(dslabs)
library(deSolve)
library(ggplot2)

## ---------------------------------------------------------------------------------------
# Lines like this one, with a hash at the start, are comments
# R will not parse them, they are there to help the human readers


### SECTION 1 - SIR models with difference equations

sir_diff = function(
    S1,           ## Initial #susceptibles
    I1,           ## Initial #infectious
    R1,           ## Initial #removed
    beta,         ## beta parameter value
    gamma,        ## gamma parameter value
    times         ## vector of times to run model (first element taken as initial time)
){
  N=S1+I1+R1                   # total model population
  nt = length(times)           # number of time steps   

  # create a data frame to fill in with updated compartment populations
  # At first this data frame is just full of NA values
  out_df = data.frame(
    Time = times, S = rep(NA, nt), I = rep(NA, nt), R=rep(NA, nt)
  )
  # Put the initial populations in the first row of out_df
  out_df$S[1] = S1
  out_df$I[1] = I1
  out_df$R[1] = R1
  
  # Loop through the times vector from 2 to nt (we've already done 1, above), 
  # using the previous row of out_df to calculate the next
  for (i in 2:nt){
    # find t-1 populations
    S_tminus1 = out_df$S[i-1]
    I_tminus1 = out_df$I[i-1]
    R_tminus1 = out_df$R[i-1]
    # calculate t populations using difference equations
    S_t = S_tminus1 - (beta*I_tminus1*S_tminus1)/N
    I_t = I_tminus1 + (beta*I_tminus1*S_tminus1)/N - gamma*I_tminus1
    R_t = R_tminus1 + gamma*I_tminus1
    # Add newly calculated values to out_df, as row i
    out_df$S[i] = S_t
    out_df$I[i] = I_t
    out_df$R[i] = R_t 
  }
  
  # return the fully populated data frame
  return(out_df)
}

# This function rearranges the output from sir_diff to be `long` rather than `wide`
# This makes it easier to plot with ggplot2, as we can colour the lines by 
# Compartment, rather than plotting each compartment separately.

# The function plot_sirdiff needs one input - a data frame produced by sir_diff
# In the function we refer to this data frame as 'sir_df'

plot_sirdiff = function(sir_df){
  # convert sir_df to 'long' format
  sirdf_tidy = pivot_longer(sir_df, cols = c("S", "I", "R"), names_to = "Compartment")
  # Plot the data, as points (to show the actual points of computation)
  # and lines
  ggplot(data=sirdf_tidy) + 
    geom_point(aes(x=Time, y=value, col=Compartment))+
    geom_line(aes(x=Time, y=value, col=Compartment), size=0.5)+
    ylab("Population")
  
}

# Run this model for the parameters and initial values given in lectures
sir_eg_1000 = sir_diff(999, 1, 0, 1.25, 1/2, 1:60)
# plot the output above
plot_sirdiff(sir_eg_1000)



### SECTION 3 - SIR modesl with ODES

library(deSolve)   # to solve the system of ODEs we need to load the 'deSolve' library

## The function sir_model sets up the system of ODEs we derived in the lectures
## The inputs to sir_model are:
### t - a vector of time points at which output should be returned (used later by 'ode')
### y - the initial compartment populations
### parms - the model parameters. Usually a named vector.

sir_model = function (t, y, parms)
{
  # label state variables from within the y vector of initial values
  S = y[1]        # susceptibles
  I = y[2]        # infectious
  R = y[3]        # recovered
  N = S + I + R
  
  # Pull parameter values from the parms vector
  beta = parms["beta"]
  gamma = parms["gamma"]

  # Define equations (dS etc. are simplified notation for dS/dt)
  dS = -(beta*S*I)/N
  dI = (beta*S*I)/N - gamma*I
  dR = gamma * I

  # return list of gradients   
  res = c(dS, dI, dR)
  list(res)
}


## The vector below says we would like outputs every 0.2 time points for 50 time points.
timepoints = seq (0, 50, by=0.2)
timepoints


## Values for our parameters, as a named vector
params_vec = c(
  beta = 1.25, 
  gamma = 1/2
  )
params_vec


## Initial population values
initial_values = c (S = 999, I = 1, R = 0)


## To run the SIR model, we enter all the components into the 'ode' function
output1 = ode (y=initial_values, times = timepoints, func = sir_model, parms= params_vec)

# 'ode' returns a matrix, but we can turn it into a data frame
output1 = as.data.frame(output1)
head(output1)


## ---------------------------------------------------------------------------------------
# calculate R0:
R0 = params_vec[1]/params_vec[2]
# calculate total number of people from initial values
pop_total = sum(initial_values)
output1$Rn = R0*(1/pop_total)*output1$S

head(output1)



## ---------------------------------------------------------------------------------------
## The function 'plot_sir' reformats the SIR model output and plots it
## Like 'plot_sirdiff' from section 2.


plot_sir = function(
  sir_df, 
  Compartments = c("S", "I", "R")
){
# Collect the values in columns S, I and R (or those given as "Compartments")
#  into one column named 'value', and create an additional column named 
# "Compartment".
    output_tidy = pivot_longer(sir_df, cols = all_of(Compartments), names_to = "Compartment")
  # Plot the values, with a different coloured line for each compartment
    ggplot(data=output_tidy, aes(x=time, y=value, col=Compartment)) + 
      geom_line() + ylab("People")  
  }

## Some example plots:

plot_sir(output1, Compartments= c("Rn")) + ylab(expression(R[n]))

plot_sir(output1, Compartments= c("S", "I", "R"))