Skip to content

How to analyse influenza A and B from wastewater samples

The graphs are produced using R version 4.2.2 or higher and the following packages are required to run the script:

These following libraries are required to run this script:

library(ggplot2)
library(ggthemes)
library(scales)
library(tidyverse)
library(readxl)
library(dplyr)
library(lubridate)
library(rlang)
library(writexl)
library(scales)
library(EpiCurve)
library(ISOweek)
library(writexl)
library(tidyr)
library(reshape2)

Preparing the data

Wastewater surveillance data is stored on RedCap. Export the data as a csv and set a path to your folders where you have stored the latest wastewater data download. Read in the data using the read.csv() function

setwd("C:/set/path/to/folder")

Influenza <- read.csv("C:/path/to/file/Wastewatergeneral_DATA_LABELS_2024-07-11_0948.csv")

Select the following variables from the wastewater data that you have read into R

Influenza <- Influenza[, c("Site.Province.",  "District.Name",
                       "Site.Name.", "Sample.Collection.Date", "Inf.A.Result", "Inf.B.Result.")]

Create an EpiWeek variables that uses the samples collection date to determine the epidemiological week

Influenza_2 <- mutate(Influenza, EpiWeek = ISOweek(Sample.Collection.Date))

Making sure all blank spaces are now replaced with NAs

Influenza_3 <- Influenza_2 %>%
    mutate_all(~ ifelse(. == "", NA, .))

Filter the data for Influenza A results only

Influenza_4 <- Influenza_3 %>%
  filter(!is.na(Inf.A.Result))

Group Influenza A results by epidemiological week (tabulates the number of positive and negative influenza A results for each week)

InfluenzaA <- Influenza_4 %>%
  group_by(EpiWeek) %>%
  summarize(
    Positive_WW = sum(Inf.A.Result == 'Positive'),
    Negative_WW = sum(Inf.A.Result == 'Negative'),
    Total = n())

Add column with the percentage positivity rates

InfluenzaA$positivityrate <- (InfluenzaA$Positive_WW / InfluenzaA$Total)*100

Remove rows where there is no epidemiological week recorded (NAs)

InfluenzaA_final <- InfluenzaA %>%
  filter(!is.na(EpiWeek))

Filter for only 2024

InfluenzaA_final_24 <- filter(InfluenzaA_final, EpiWeek >= "2024-W01")

Filter the data for Influenza B results only

Influenza_5 <- Influenza_3 %>%
  filter(!is.na(Inf.B.Result.))

Group Influenza B results by epidemiological week (tabulates the number of positive and negative influenza B results for each week)

InfluenzaB <- Influenza_5 %>%
  group_by(EpiWeek) %>%
  summarize(
    Positive_WW = sum(Inf.B.Result. == 'Positive'),
    Negative_WW = sum(Inf.B.Result. == 'Negative'),
    Total = n())

Add column with the percentage positivity rates

InfluenzaB$positivityrate <- (InfluenzaB$Positive_WW / InfluenzaB$Total)*100

Remove rows where there is no epidemiological week recorded (NAs)

InfluenzaB_final <- InfluenzaB %>%
  filter(!is.na(EpiWeek))

Filter for only 2024

InfluenzaB_final_24 <- filter(InfluenzaB_final, EpiWeek >= "2024-W01")

Producing plots with percentage positivity and absolute number of positive samples by epidemiological week

Produce the plot for Influenza A (limits of axis and the number used to divide the positivity rate may need adjusting depending on the maximum number of absolute positives in any given epidemiological week)

InfluenzaA_plot <- ggplot(InfluenzaA_final_24, aes(x = EpiWeek)) +
  geom_bar(aes(y = Positive_WW, fill = "Positive Wastewater Samples"), 
       stat = "identity", colour = "plum") +
  geom_point(aes(y = positivityrate / 4, color = "Positivity Rate"), size = 2) +
  geom_line(aes(y = positivityrate / 4, color = "Positivity Rate", group = 1)) +
  labs(x = "EpiWeek", 
   y = "Number of Positive Wastewater Samples", 
   title = "Influenza A",
   fill = "Legend",  # Custom label for the fill legend
   color = "Legend") + # Custom label for the color legend
  scale_y_continuous(name = "Number of Positive Wastewater Samples", 
                 limits = c(0, 25),
                 sec.axis = sec_axis(~ . /25, 
                                     name = "Percentage Positive", 
                                     labels = scales::percent)) +
  scale_color_manual(name = "Line (Secondary y-axis)", values = c("Positivity Rate" = "black")) +
  scale_fill_manual(name = "Bars (Primary y-axis)", values = c("Positive Wastewater Samples" = "plum")) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 12),  
    axis.text.y = element_text(size = 14),  # Adjusting y-axis text size
    axis.title = element_text(size = 14),   # Adjusting axis title size
    legend.text = element_text(size = 14),  
    legend.title = element_text(size = 14))

Set the desired width and height for the PNG

width = 15
height = 8

Save as a PNG

ggsave("InfluenzaA_200717.png", InfluenzaA_plot, width = width, height = height)

Produce the plot for Influenza B (limits of axis and the number used to divide the positivity rate may need adjusting depending on the maximum number of absolute positives in any given epidemiological week

InfluenzaB_plot <- ggplot(InfluenzaB_final_24, aes(x = EpiWeek)) +
  geom_bar(aes(y = Positive_WW, fill = "Positive Wastewater Samples"), 
       stat = "identity", colour = "lightblue") +
  geom_point(aes(y = positivityrate / 5.88, color = "Positivity Rate"), size = 2) +
  geom_line(aes(y = positivityrate / 5.88, color = "Positivity Rate", group = 1)) +
  labs(x = "EpiWeek", 
   y = "Number of Positive Wastewater Samples", 
   title = "InfluenzaB",
   fill = "Legend",  # Custom label for the fill legend
   color = "Legend") + # Custom label for the color legend
  scale_y_continuous(name = "Number of Positive Wastewater Samples", 
                 limits = c(0, 17),
                 breaks = seq(0, 17, by = 2),  # Set breaks to only whole numbers
                 sec.axis = sec_axis(~ . /17, 
                                     name = "Percentage Positive", 
                                     labels = scales::percent)) +
  scale_color_manual(name = "Line (Secondary y-axis)", values = c("Positivity Rate" = "black")) +
  scale_fill_manual(name = "Bars (Primary y-axis)", values = c("Positive Wastewater Samples" = "lightblue")) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 12),  
    axis.text.y = element_text(size = 14),  # Adjusting y-axis text size
    axis.title = element_text(size = 14),   # Adjusting axis title size
    legend.text = element_text(size = 14),  
    legend.title = element_text(size = 14))

Set the desired width and height for the PNG

width = 15
height = 8

Save as a PNG

ggsave("InfluenzaB_200717.png", InfluenzaB_plot, width = width, height = height)