Trellis graphs ggplot2

Line plots

Data

Manipulate the data from wide format to long format. We have 43 horses that have 8 observations each:

dwide <- data.frame(id, group, w0, w2, w4, w6, w8, w10, w12, w14)
dlong <- dwide %>%
   gather(key = week, value = epg, -c(id, group))

dlong$week <- factor(dlong$week, levels = c("w0","w2","w4","w6","w8","w10","w12","w14"), labels = c(0,2,4,6,8,10,12,14))
dlong$epg <- as.numeric(dlong$epg)

ggplot(data = dlong, aes(x = week, y = epg, group = id)) +
   geom_point(pch = 1) +
   geom_line() +
   facet_grid( ~ group) +
   xlab("Week") +
   ylab("Eggs per gram") +
   ylim(0,5000)

\TeX \begin{figure}[h]
\centering
\includegraphics[width=8.01cm]{../images/line_plot_facet_ggplot2.png}
\caption{Line plot showing faecal egg count (expressed as the number of eggs per gram of faeces) as a function of the number of weeks post treatment, conditioned by treatment group.}
\label{fig:line_plot_facet_ggplot2}
\end{figure}

Figure 2: Line plot showing faecal egg count (expressed as the number of eggs per gram of faeces) as a function of the number of weeks post treatment, conditioned by treatment group.

Frequency histograms

Note here the use of strip.custom to manipulate the appearance of the panel strips in a lattice plot.

ename <- c(rep("Enterprise A", times = 200), rep("Enterprise B", times = 200))
ename <- factor(ename, levels = c("Enterprise A", "Enterprise B"))
distance <- c(rnorm(n = 200, mean = 20, sd = 2), rnorm(n = 200, mean = 15, sd = 1))
dat <- data.frame(ename, distance)

ggplot(data = dat, aes(x = distance)) +
   geom_histogram(binwidth = 0.5) +
   scale_x_continuous(breaks = seq(from = 0, to = 40, by = 10), limits = c(0,40), name = "Distance (km)") +
   scale_y_continuous(breaks = seq(from = 0, to = 50, by = 10), limits = c(0,50), name = "Number of poultry-positive land parcels") +
   facet_grid(~ ename)

\TeX \begin{figure}[h]
\centering
\includegraphics[width=8.01cm]{../images/histogram_facet_ggplot2.png}
\caption{Frequency histogram showing the distance separating poultry-positive enterprises, conditioned by enterprise type.}
\label{fig:histogram_facet_ggplot2}
\end{figure}

Figure 3: Frequency histogram showing the distance separating poultry-positive enterprises, conditioned by enterprise type.

Bar charts

herd <- rep(c("A","B","C"), times = 4)
test <- rep(c("MCT", "CCT"), each = 6)
tx <- rep(rep(c("Vaccinated", "Control"), each = 3), times = 2)
val <- c(34.0,34.0,63.0,19.0,15.0,35.0,3.2,0,0,1.6,1.7,1.7)
dat <- data.frame(herd, test, tx, val)

ggplot(data = dat, aes(x = herd, y = val, fill = tx)) +
   geom_bar(stat = "identity", position = position_dodge()) +
   labs(x = "Herd", y = "Percentage of test positives") +
   ylim(0, 100) +
   facet_grid(test ~ .) +
   guides(fill = guide_legend(title = "")) +
   theme(legend.position = c(0.20, 0.85), legend.background = element_rect(fill = "transparent"))

\TeX \begin{figure}[h]
\centering
\includegraphics[width=8.01cm]{../images/bar_chart_facet_ggplot2.png}
\caption{Bar plot showing the percentage of test positives for control and vaccinated animals, conditioned by herd and CCT-MCT group.}
\label{fig:bar_chart_facet_ggplot2}
\end{figure}

Figure 4: Bar plot showing the percentage of test positives for control and vaccinated animals, conditioned by herd and CCT-MCT group.

Dotplots

farm <- c(rep("A", times = 6), rep("B", times = 6))
spore <- c(runif(n = 6, min = 0, max = 1500), runif(n = 6, min = 0, max = 500))
dat <- data.frame(farm, spore)

ggplot(data = dat, aes(x = spore, y = farm)) +
   geom_point() +
   scale_x_continuous(breaks = seq(from = 0, to = 1000, by = 250), limits = c(0,1000), name = "Spore count") +
   scale_y_discrete(breaks = c("A", "B"), name = "Farm") +
   theme(aspect.ratio = 0.7)

Annotated barcharts

mnth <- rep(c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"), times = 4)
dist <- c(rep("Co Do", times = 12), rep("Vinh Thanh", times = 12), rep("Vinh Loi", times = 12), rep("Phuoc Long", times = 12))
n <- c(47,68,50,61,50,42,45,26,20,20,18,18,26,35,20,16,12,8,10,2,4,6,6,8,8,20,13,13,13,13,13,10,8,
8,8,0,29,16,15,11,9,18,41,74,69,60,29,25)
dat <- data.frame(mnth, dist, n)

dat$dist <- factor(dat$dist, levels = c("Vinh Loi", "Phuoc Long", "Co Do", "Vinh Thanh"))
dat$mnth <- factor(dat$mnth, levels = c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"))

Data for plot annotations:

ref <- data.frame(dist = levels(dat$dist), lab = c("n = 70", "n = 50", "n = 20", "n = 60"))

ggplot(data = dat, aes(x = mnth, y = n)) +
   geom_bar(stat = 'identity', position = position_dodge()) +
   scale_x_discrete(name = "Month") +
   scale_y_continuous(breaks = seq(from = 0, to = 100, by = 20), limits = c(0,100), name = "Percentage of households") +
   geom_text(data = ref, aes(x = 2, y = 90, label = lab)) +
   facet_wrap(~ dist, ncol = 2) +
   theme(axis.text.x = element_text(angle = 90, hjust = 1))

\TeX \begin{figure}[h]
\centering
\includegraphics[width=8.01cm]{../images/annotated_bar_chart_facet_ggplot2.png}
\caption{Bar chart showing the number of households visited by calendar month, conditioned by commune.}
\label{fig:annotated_bar_chart_facet_ggplot2}
\end{figure}

Figure 5: Bar chart showing the number of households visited by calendar month, conditioned by commune.

Error bar plots

meetn <- seq(from = 1, to = 365, by = 30)
venue <- rep("Track A", times = length(meetn))
nstart <- round(runif(n = length(meetn), min = 100, max = 300), digits = 0)
ninj <- rbinom(n = length(meetn), size = nstart, prob = 0.03)
tracka <- data.frame(venue, meetn, nstart, ninj)

meetn <- seq(from = 10, to = 365, by = 60)
venue <- rep("Track B", times = length(meetn))
nstart <- round(runif(n = length(meetn), min = 100, max = 100), digits = 0)
ninj <- rbinom(n = length(meetn), size = nstart, prob = 0.01)
trackb <- data.frame(venue, meetn, nstart, ninj)

meetn <- seq(from = 5, to = 365, by = 15)
venue <- rep("Track C", times = length(meetn))
nstart <- round(runif(n = length(meetn), min = 100, max = 400), digits = 0)
ninj <- rbinom(n = length(meetn), size = nstart, prob = 0.005)
trackc <- data.frame(venue, meetn, nstart, ninj)

dat <- data.frame(rbind(tracka, trackb, trackc))
dat$meetd <- as.Date("2018-01-01", format = "%Y-%m-%d") + dat$meetn

irisk <- epi.conf(as.matrix(cbind(dat$ninj, dat$nstart)), ctype = "prevalence", method = "exact", N = 1000, design = 1, conf.level = 0.95) * 100
dat <- cbind(dat, irisk)

ggplot(data = dat, aes(x = meetd, y = est)) +
   geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1) +
   geom_point() +
   scale_x_date(name = "Meeting date") +
   scale_y_continuous(breaks = seq(from = 0, to = 20, by = 5), labels = seq(from = 0, to = 20, by = 5), limits = c(0, 20), name = "Percentage of starts resulting in injury") +
   facet_grid(venue ~ .)

\TeX \begin{figure}[h]
\centering
\includegraphics[width=8.01cm]{../images/error_bar_plot_facet_ggplot2.png}
\caption{Error bar plot showing the percentage of race starts resulting in injury as a function of calendar date, conditioned by track.}
\label{fig:error_bar_plot_facet_ggplot2}
\end{figure}

Figure 6: Error bar plot showing the percentage of race starts resulting in injury as a function of calendar date, conditioned by track.

Ranked box and whisker plots

var <- c("movn", "mrs", "movc")
est <- c(0.4, 0.7, 0.9)
low <- c(0.3, 0.6, 0.8)
hig <- c(0.5, 0.8, 1.0)
dat <- data.frame(var, est, low, hig)

Sort the data in order of est and define factors. The order of factors defines the order in which the individual box and whisker plots appear on the graph:

dat <- dat[sort.list(dat$est, decreasing = TRUE),]
dat$var <- factor(dat$var, levels = c("movc","mrs","movn"),
   labels = c("Number of contacts following movement", "Date of movement restriction start", "Number of movements per day"))

var <- rep(dat$var, times = 3)
vals <- c(dat[,2], dat[,3], dat[,4])
dat <- data.frame(var, vals)

ggplot(data = dat, aes(x = vals, y = var)) +
   geom_boxplot() +
   scale_x_continuous(breaks = seq(from = 0, to = 1, by = 0.25), labels = seq(from = 0, to = 1, by = 0.25), limits = c(0, 1), name = "Partial rank correlation coefficient") +
   scale_y_discrete(name = "Parameter") +
   theme(aspect.ratio = 0.7)

Time series plots

edate <- seq(from = as.Date("1/7/08", format = "%d/%m/%y"), to = as.Date("30/6/09", format = "%d/%m/%y"), by = "1 day")
bw2yo <- rnorm(n = 365, mean = 400, sd = 35)
bw3yo <- rnorm(n = 365, mean = 450, sd = 15)
bw46yo <- rnorm(n = 365, mean = 525, sd = 30)

dat <- data.frame(edate = rep(edate, times = 3),
   class = c(rep("2yo", times = 365), rep("3yo", times = 365), rep("4+yo", times = 365)),
   bw = c(bw2yo, bw3yo, bw46yo))

ggplot(data = dat, aes(x = edate, y = bw)) +
   geom_point(pch = 1) +
   geom_line() +
   geom_smooth() +
   facet_grid(class ~ .) +
   xlab("Date") +
   scale_y_continuous(breaks = seq(from = 300, to = 700, by = 100), labels = seq(from = 300, to = 700, by = 100), limits = c(300, 700), name = "Bodyweight (kg)")

\TeX \begin{figure}[h]
\centering
\includegraphics[width=8.01cm]{../images/time_series_plot_facet_ggplot2.png}
\caption{Time series plot showing bodyweight (kg) as a function of calendar date, conditioned by age class.}
\label{fig:time_series_plot_facet_ggplot2}
\end{figure}

Figure 7: Time series plot showing bodyweight (kg) as a function of calendar date, conditioned by age class.

Comparing risk ratios from several models

library(epiR); library(ggplot2); library(scales); library(tidyr); library(dplyr)

exp.a <- gl(n = 3, k = 3); exp.b <- gl(3,1,9); counts <- c(18,17,15,20,10,20,25,13,12)
dat <- data.frame(exp.a, exp.b, counts)
dat.glm01 <- glm(counts ~ exp.a + exp.b, family = poisson, data = dat)

exp.a <- gl(n = 3, k = 3); exp.b <- gl(3,1,9); counts <- c(9,8,7,10,5,10,12,7,6)
dat <- data.frame(exp.a, exp.b, counts)
dat.glm02 <- glm(counts ~ exp.a + exp.b, family = poisson, data = dat)

Create a data frame of risk ratios:

model <- c(rep("Model 1", times = nrow(summary(dat.glm01)$coefficients)), rep("Model 2", times = nrow(summary(dat.glm01)$coefficients)))
vname <- c(row.names(summary(dat.glm01)$coefficients), row.names(summary(dat.glm02)$coefficients))
m1.est <- c(exp(summary(dat.glm01)$coefficients[,1]), exp(summary(dat.glm02)$coefficients[,1]))
m1.low <- c(exp(confint(dat.glm01))[,1], exp(confint(dat.glm02))[,1])
m1.upp <- c(exp(confint(dat.glm01))[,2], exp(confint(dat.glm02))[,2])
dat <- data.frame(model = model, vname = vname, est = m1.est, low = m1.low, upp = m1.upp)

Add risk ratios for each of the reference categories for each explanatory variable to the data frame:

exp.a <- data.frame(model = c("Model 1", "Model 2"), vname = c("exp.a1", "exp.a1"), est = c(1,1), low = c(1,1), upp = c(1,1))
exp.b <- data.frame(model = c("Model 1", "Model 2"), vname = c("exp.b1", "exp.b1"), est = c(1,1), low = c(1,1), upp = c(1,1))
dat <- rbind(dat, exp.a, exp.b)
dat <- subset(dat, subset = vname != "(Intercept)")
dat$vname <- factor(dat$vname, levels = c("exp.a1", "exp.a2", "exp.a3", "exp.b1", "exp.b2", "exp.b3"), labels = c("Size: small", "Size: medium", "Size: large", "Type: mixed", "Type: dairy", "Type: beef"))

Make the plot:

ggplot(data = dat, aes(x = est, y = vname)) +
   geom_errorbarh(aes(xmin = low, xmax = upp), height = 0.1) +
   geom_point() +
   scale_x_continuous(trans = "log2", breaks = c(0.25,0.5,1,2,4), limits = c(0.25, 4), name = "Incidence rate ratio") +
   scale_y_discrete(breaks = c("exp.a1", "exp.a2", "exp.a3", "exp.b1", "exp.b2", "exp.b3"), labels = c("Size: small", "Size: medium", "Size: large", "Type: mixed", "Type: dairy", "Type: beef"), name = "Explanatory variable") +
   facet_grid(model ~ .) +
   geom_vline(xintercept = 1, linetype = 2, color = "black")

\TeX \begin{figure}[h]
\centering
\includegraphics[width=8.01cm]{../images/compare_models_facet_ggplot2.png}
\caption{Error bar plot showing the point estimate of adjusted incidence rate ratios (and their 95\% confidence intervals) for herd size and herd type for two Poisson regression models.}
\label{fig:compare_models_facet_ggplot2}
\end{figure}

Figure 1: Error bar plot showing the point estimate of adjusted incidence rate ratios (and their 95% confidence intervals) for herd size and herd type for two Poisson regression models.