Load libraries
# pkgs <- c(
# "tidyverse",
# "rmarkdown",
# "phyloseq",
# "reshape2",
# "tibble",
# "readr",
# "knitr",
# "regclass",
# )
pkgs <- c(
"phyloseq",
"tidyverse")
lapply(pkgs, require, character.only = TRUE)
# set theme
Load in pre-processed data
## Download Rdata file from github repo link https://github.com/ka-west/PBS_manuscript/raw/master/PBS_data.Rdata
# Load in data
load("data/BIO514-omicsData/PBS_data.Rdata")
First thing we want to do is see what samples have both microbimic data and NMR data.
To do this we need to check which sampleIDs match in the ps_M@sam_data object and the integrals data.frame.
We encounter our first problem - the Sample_ID
used in
microbiomic metadata has extra characters (e.g. _v1 or _v2). We need to
remove these characters.
# We encounter our first problem - the Sample_ID used in microbiomic metadata has extra characters (e.g. _v1 or _v2).
# we need to remove this
# rename sampleID in ps_M to make sure it matches the Integrals data frame
ps_M@sam_data$Sample_ID<-gsub("_v1","", ps_M@sam_data$Sample_ID)
ps_M@sam_data$Sample_ID<-gsub("_v2","", ps_M@sam_data$Sample_ID)
# subset just the samples which have NMR and Microbiomic data
idx<-which(integrals$Patient.TP %in% ps_M@sam_data$Sample_ID)
integrals_df <- integrals[idx,]
# now subset just the metadata form the data frame
meta_df <- integrals_df[, c(1:21, 38:41)]
To help let’s also rename the Sample_ID col to help with matching
# rename the Patient.TP col to Sample_ID to match microbiomic data
colnames(meta_df)[3] <- "Sample_ID"
# reorder meta data cols to more logical order
meta_df <-
meta_df[, c(3,
1,
2,
4,
5,
6,
7,
8,
24,
9,
10,
12,
13,
15,
11,
14,
16,
18,
17,
19,
20,
21,
22,
23,
25)]
# we can also save this as csv to open and view separately
write.csv(meta_df, "data/BIO514-omicsData/metadata.csv")
DT::datatable(
meta_df,
extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
pageLength = 20
),
filter = list(position = 'top', clear = FALSE)
)
Subset just the NMR data from the integrals data.frame for samples that matched with microbiome data.
# subset just the NMR data
NMR_df <- integrals_df[, c(3, 22:36)]
colnames(NMR_df)[1] <- "Sample_ID"
# we can also save this as csv to open and view separately
write.csv(NMR_df, "data/BIO514-omicsData/integrals.csv")
DT::datatable(
NMR_df,
extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
pageLength = 20),
filter = list(
position = 'top', clear = FALSE)
)
Inspect how many taxa have been unassigned.
taxdf <- as.data.frame(ps_M@tax_table)
sum(is.na(taxdf$Kingdom))
## [1] 2
sum(is.na(taxdf$Phylum))
## [1] 11
sum(is.na(taxdf$Class))
## [1] 48
sum(is.na(taxdf$Order))
## [1] 54
sum(is.na(taxdf$Family))
## [1] 325
sum(is.na(taxdf$Genus))
## [1] 1260
When dealing with integration of microbiome data with other data types we usually want a simplified way to look at the data.
For example we may want to just take taxa assignments at a certain level and have a combined set of count values.
We use the psmelt()
function in phyloseq to make a long
data frame. We can then use this object to group/transform into more
bite-size pieces.
melt <- psmelt(ps_M)
Group by family
fam_melt <- dplyr::select(melt,
Sample_ID, Abundance, Family)
fam <- fam_melt %>%
dplyr::group_by(Sample_ID, Family) %>%
dplyr::summarise(Abundance = sum(Abundance), .groups = "drop") %>%
dplyr::filter(Abundance > 1L)
fam$Abundance <- as.numeric(fam$Abundance)
fam <-
fam %>% pivot_wider(names_from = "Family", values_from = "Abundance")
# we make the NAs into 0
fam <- replace(fam, is.na(fam), 0)
# saving dataset
write.csv(fam, "data/BIO514-omicsData/family_count.csv")
Group by genus
gen_melt <- dplyr::select(melt,
Sample_ID, Abundance, Genus)
gen <- gen_melt %>%
dplyr::group_by(Sample_ID, Genus) %>%
dplyr::summarise(Abundance = sum(Abundance), .groups = "drop") %>%
dplyr::filter(Abundance > 1L)
gen$Abundance <- as.numeric(gen$Abundance)
gen <-
gen %>% pivot_wider(names_from = "Genus", values_from = "Abundance")
# we make the NAs into 0
gen <- replace(gen, is.na(gen), 0)
# saving dataset
write.csv(gen, "data/BIO514-omicsData/genera_count.csv")
Lets see what the genus view looks like
DT::datatable(
gen[,1:20],
extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
pageLength = 20),
filter = list(
position = 'top', clear = FALSE)
)
Copyright, Siobhon Egan, 2022.