## ----setup, include=FALSE-------------------------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, fig.align='center')
htmltools::tagList(rmarkdown::html_dependency_font_awesome())


## ----packages, echo=FALSE,message=FALSE,warning=FALSE---------------------------------------------
library(tidyverse)
library(dslabs)
library(ggplot2)


### 2.1 Repeated experiments with BehaviourSpace

# In this workshop, all tasks use this same basic code to view and analyse the results
# of your BehaviourSpace experiments.

## ----nl-bs-plot2, echo=TRUE, fig.cap = "The count of infected turtles over time for 50 ABM runs with the same input values.", fig.height=4, fig.width=8----
library(ggplot2)
## Read the file into R - the filepath will need to be changed
## setting your working directory to the folder you're saving the output
## files in will make this simpler.
## Naming your files and objects well can also help as you try to distinguish 
## between many similar plots and files
exp_anywhere_ip6_prob06 = read.csv("Data/ABM_MA1_p06-table", skip=6, header=T)
## A qiuck way to check that the data has imported correctly (and to see
## what the columns are) is to use 'summary'
summary(exp_anywhere_ip6_prob06) 
## Rename the columns of interest to be easier to work with
## These will likely be:
## - X.run.number
## - X.step
## - count.turtles.with...Status..I
## - count.turtles.with...Status..R
## but may include others 
names(exp_anywhere_ip6_prob06)[c(1,9,10,11)] = c("runID", "TimeStep", "CountInfected", "CountRecovered")
summary(exp_anywhere_ip6_prob06)
## Plot [in this case] the CountInfected data against time
## the 'group' argument means that the plot shows each runID as a separate line
## but doesn't colour them or make a legend
ggplot(data=exp_anywhere_ip6_prob06, aes(x=TimeStep, y=CountInfected, group = runID)) + geom_path()


## ----nl-timemaxinf, echo=TRUE, fig.cap = "Histogram of the time-step of the peak of infection for each of the 50 runs.", fig.height=4, fig.width=8----

# create a vector of runIDs in the dataset
runIDs = unique(exp_anywhere_ip6_prob06$runID)
# Create a vector the same length as runIDs
times_maxI = rep(NA, length(runIDs))
# For each i in runIDs
# create a dataframe for just that run
# find the maximum of the infected values
# find the time step at which that occurs
# record the time step of maximum infection in the times_maxI vector
for (i in runIDs){
  exp_i = exp_anywhere_ip6_prob06[exp_anywhere_ip6_prob06$runID == i,]
  max_inf_i = max(exp_i$CountInfected)
  time_maxinf_i = exp_i$TimeStep[exp_i$CountInfected == max_inf_i] 
  times_maxI[i] = min(time_maxinf_i)
}
# Plot this as a histogram
hist(times_maxI, breaks=20)