## ----setup, include=FALSE-----------------------------------------------------------------------
library(knitr)
knitr::opts_chunk$set(eval = TRUE)
knitr::opts_chunk$set(cache = TRUE)
options(width = 98) 
opts_chunk$set(message = FALSE, error = FALSE, warning = FALSE,
               cache = TRUE, fig.width = 8, fig.height = 7)

## ----NeededPackages, message=FALSE--------------------------------------------------------------
cranpkgs=c("ggplot2","devtools","adaptiveGPCA","treelapse", "ade4","structSSI", "PMA","vegan", "ggrepel","gridExtra")
BioCpkgs=c("dada2", "curatedMetagenomicData", "phyloseq", "DECIPHER","genefilter")
ghpkgs= c("treeDA", "treelapse","metavizr")
instp <-  cranpkgs %in% installed.packages()
if(any(!instp)) {
   install.packages(cranpkgs[!instp],repos="https://cloud.r-project.org")
}
instp <- BioCpkgs %in% installed.packages()
if(any(!instp)) {
   source("http://bioconductor.org/biocLite.R")
   biocLite(BioCpkgs [!instp], ask = F)
}

if(!("treeDA"  %in% installed.packages()))
   BiocInstaller::biocLite("jfukuyama/treeDA", dependencies = TRUE)
if(!("treelapse"  %in% installed.packages()))
   BiocInstaller::biocLite("krisrs1128/treelapse", dependencies = TRUE)

suppressPackageStartupMessages(sapply(c(cranpkgs, BioCpkgs, ghpkgs), require, character.only = TRUE))

## ----non-local----------------------------------------------------------------------------------
ps_connect <-url("https://raw.githubusercontent.com/spholmes/F1000_workflow/master/data/ps.rds")
ps = readRDS(ps_connect)
ps

## ----taxfilter0---------------------------------------------------------------------------------
# Show available ranks in the dataset
rank_names(ps)
# Create table, number of features for each phyla
table(tax_table(ps)[, "Phylum"], exclude = NULL)

## ----removeNAphyla------------------------------------------------------------------------------
ps <- subset_taxa(ps, !is.na(Phylum) & !Phylum %in% c("", "uncharacterized"))

## ----prevfilter0--------------------------------------------------------------------------------
# Compute prevalence of each feature, store as data.frame
prevdf = apply(X = otu_table(ps),
               MARGIN = ifelse(taxa_are_rows(ps), yes = 1, no = 2),
               FUN = function(x){sum(x > 0)})
# Add taxonomy and total read counts to this data.frame
prevdf = data.frame(Prevalence = prevdf,
                    TotalAbundance = taxa_sums(ps),
                    tax_table(ps))

## ----lowprev------------------------------------------------------------------------------------
plyr::ddply(prevdf, "Phylum", function(df1){cbind(mean(df1$Prevalence),sum(df1$Prevalence))})

## ----taxfilter----------------------------------------------------------------------------------
# Define phyla to filter
filterPhyla = c("Fusobacteria", "Deinococcus-Thermus")
# Filter entries with unidentified Phylum.
ps1 = subset_taxa(ps, !Phylum %in% filterPhyla)
ps1

## ----plotprevalence, fig.width=9, fig.height=5, fig.cap="Taxa prevalence versus total counts."----
# Subset to the remaining phyla
prevdf1 = subset(prevdf, Phylum %in% get_taxa_unique(ps1, "Phylum"))
ggplot(prevdf1, aes(TotalAbundance, Prevalence / nsamples(ps),color=Phylum)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05, alpha = 0.5, linetype = 2) +  geom_point(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap(~Phylum) + theme(legend.position="none")

## ----prevalencefilter---------------------------------------------------------------------------
# Define prevalence threshold as 5% of total samples
prevalenceThreshold = 0.05 * nsamples(ps)
prevalenceThreshold
# Execute prevalence filter, using `prune_taxa()` function
keepTaxa = rownames(prevdf1)[(prevdf1$Prevalence >= prevalenceThreshold)]
ps2 = prune_taxa(keepTaxa, ps)

## ----taxglom------------------------------------------------------------------------------------
# How many genera would be present after filtering?
length(get_taxa_unique(ps2, taxonomic.rank = "Genus"))
ps3 = tax_glom(ps2, "Genus", NArm = TRUE)

## ----tipglom------------------------------------------------------------------------------------
h1 = 0.4
ps4 = tip_glom(ps2, h = h1)

## ----plotglomprep-------------------------------------------------------------------------------
multiPlotTitleTextSize = 15
p2tree = plot_tree(ps2, method = "treeonly",
                   ladderize = "left",
                   title = "Before Agglomeration") +
  theme(plot.title = element_text(size = multiPlotTitleTextSize))
p3tree = plot_tree(ps3, method = "treeonly",
                   ladderize = "left", title = "By Genus") +
  theme(plot.title = element_text(size = multiPlotTitleTextSize))
p4tree = plot_tree(ps4, method = "treeonly",
                   ladderize = "left", title = "By Height") +
  theme(plot.title = element_text(size = multiPlotTitleTextSize))

## ----plotglomtree, fig.width=14, fig.cap="Different types of agglomeration"---------------------
# group plots together
grid.arrange(nrow = 1, p2tree, p3tree, p4tree)

## ----abundancetransformation--------------------------------------------------------------------
plot_abundance = function(physeq,title = "",
                          Facet = "Order", Color = "Phylum"){
  # Arbitrary subset, based on Phylum, for plotting
  p1f = subset_taxa(physeq, Phylum %in% c("Firmicutes"))
  mphyseq = psmelt(p1f)
  mphyseq <- subset(mphyseq, Abundance > 0)
  ggplot(data = mphyseq, mapping = aes_string(x = "sex",y = "Abundance",
                              color = Color, fill = Color)) +
    geom_violin(fill = NA) +
    geom_point(size = 1, alpha = 0.3,
               position = position_jitter(width = 0.3)) +
    facet_wrap(facets = Facet) + scale_y_log10()+
    theme(legend.position="none")
}

## ----abundancetransformation2-------------------------------------------------------------------
# Transform to relative abundance. Save as new object.
ps3ra = transform_sample_counts(ps3, function(x){x / sum(x)})

## ----abundancetransformation3, fig.height=12, fig.width=10.5,fig.cap="Comparison of original abundances with transformed data"----
plotBefore = plot_abundance(ps3,"")
plotAfter = plot_abundance(ps3ra,"")
# Combine each plot into one graphic.
grid.arrange(nrow = 2,  plotBefore, plotAfter)

## ----subsettaxa, fig.cap= "Violin plot of relative abundances of Lactobacillales"---------------
psOrd = subset_taxa(ps3ra, Order == "Lactobacillales")
plot_abundance(psOrd, Facet = "Genus", Color = NULL)

## ----init-analysis------------------------------------------------------------------------------
.cran_packages <- c( "shiny","miniUI", "caret", "pls", "e1071", "ggplot2", "randomForest", "dplyr", "ggrepel", "nlme", "devtools",
                  "reshape2", "PMA", "structSSI", "ade4",
                  "ggnetwork", "intergraph", "scales")
.github_packages <- c("jfukuyama/phyloseqGraphTest")
.bioc_packages <- c("genefilter", "impute")
# Install CRAN packages (if not already installed)
.inst <- .cran_packages %in% installed.packages()
if (any(!.inst)){
  install.packages(.cran_packages[!.inst],repos = "http://cran.rstudio.com/")
}
.inst <- .github_packages %in% installed.packages()
if (any(!.inst)){
  devtools::install_github(.github_packages[!.inst])
}

.inst <- .bioc_packages %in% installed.packages()
if(any(!.inst)){
  source("http://bioconductor.org/biocLite.R")
  biocLite(.bioc_packages[!.inst])
}

## ----preprocessing-setup, fig.cap="Histogram of age groupings",fig.show="hold"------------------
qplot(sample_data(ps)$age, geom = "histogram",binwidth=20) + xlab("age")

## ----preprocessing2, fig.cap="Histograms comparing raw and log transformed read depths",fig.show="hold"----
qplot(log10(rowSums(otu_table(ps))),binwidth=0.2) +
  xlab("Logged counts-per-sample")

## ----outlier-detect, fig.cap="Exploratory ordination analysis with log abundances.",fig.wide= TRUE----
sample_data(ps)$age_binned <- cut(sample_data(ps)$age,
  				          breaks = c(0, 100, 200, 400))
levels(sample_data(ps)$age_binned) <- list(Young100="(0,100]", Mid100to200="(100,200]", Old200="(200,400]")
sample_data(ps)$family_relationship=gsub(" ","",sample_data(ps)$family_relationship)
pslog <- transform_sample_counts(ps, function(x) log(1 + x))
out.wuf.log <- ordinate(pslog, method = "MDS", distance = "wunifrac")
evals <- out.wuf.log$values$Eigenvalues
plot_ordination(pslog, out.wuf.log, color = "age_binned") +
  labs(col = "Binned Age") +
  coord_fixed(sqrt(evals[2] / evals[1]))

## ----outlier-analyze, fig.width=9, fig.height=5, fig.cap="The outlier samples are dominated by a single ASV."----
rel_abund <- t(apply(otu_table(ps), 1, function(x) x / sum(x)))
qplot(rel_abund[, 12], geom = "histogram",binwidth=0.05) +
  xlab("Relative abundance")

## ----remove-outliers----------------------------------------------------------------------------
outliers <- c("F5D165", "F6D165", "M3D175", "M4D175", "M5D175", "M6D175")
ps <- prune_samples(!(sample_names(ps) %in% outliers), ps)

## ----removelowreads-----------------------------------------------------------------------------
which(!rowSums(otu_table(ps)) > 1000)
ps <- prune_samples(rowSums(otu_table(ps)) > 1000, ps)
pslog <- transform_sample_counts(ps, function(x) log(1 + x))

## ----ordinations-bray,fig.cap="A PCoA plot using Bray-Curtis between  samples."-----------------
out.pcoa.log <- ordinate(pslog,  method = "MDS", distance = "bray")
evals <- out.pcoa.log$values[,1]
plot_ordination(pslog, out.pcoa.log, color = "age_binned",
                  shape = "family_relationship") +
  labs(col = "Binned Age", shape = "Litter")+
  coord_fixed(sqrt(evals[2] / evals[1]))

## ----ordinations-dpcoa, fig.wide=TRUE,fig.cap="A DPCoA plot incorporates phylogenetic information, but is  dominated by the first axis."----
out.dpcoa.log <- ordinate(pslog, method = "DPCoA")
evals <- out.dpcoa.log$eig
plot_ordination(pslog, out.dpcoa.log, color = "age_binned", label= "SampleID",
                  shape = "family_relationship") +
  labs(col = "Binned Age", shape = "Litter")+
  coord_fixed(sqrt(evals[2] / evals[1]))

## ----dpcoabiplot,fig.wide=TRUE,fig.cap="Taxa responsible for Axis 1 and 2"----------------------
plot_ordination(pslog, out.dpcoa.log, type = "species", color = "Phylum") +
  coord_fixed(sqrt(evals[2] / evals[1]))

## ----ordinations-wuf,fig.wide =TRUE, fig.cap="The sample positions produced by a PCoA using weighted Unifrac."----
out.wuf.log <- ordinate(pslog, method = "PCoA", distance ="wunifrac")
evals <- out.wuf.log$values$Eigenvalues
plot_ordination(pslog, out.wuf.log, color = "age_binned",
                  shape = "family_relationship") +
  coord_fixed(sqrt(evals[2] / evals[1])) +
  labs(col = "Binned Age", shape = "Litter")

## ----rankab-------------------------------------------------------------------------------------
abund <- otu_table(pslog)
abund_ranks <- t(apply(abund, 1, rank))

## ----rankthreshold------------------------------------------------------------------------------
abund_ranks <- abund_ranks - 329
abund_ranks[abund_ranks < 1] <- 1

## ----pca-rank-visualize-procedure, fig.cap="Rank threshold transformation"----------------------
library(dplyr)
library(reshape2)
abund_df <- melt(abund, value.name = "abund") %>%
  left_join(melt(abund_ranks, value.name = "rank"))
colnames(abund_df) <- c("sample", "seq", "abund", "rank")

abund_df <- melt(abund, value.name = "abund") %>%
  left_join(melt(abund_ranks, value.name = "rank"))
colnames(abund_df) <- c("sample", "seq", "abund", "rank")

sample_ix <- sample(1:nrow(abund_df), 8)
ggplot(abund_df %>%
         filter(sample %in% abund_df$sample[sample_ix])) +
  geom_point(aes(x = abund, y = rank, col = sample),
             position = position_jitter(width = 0.2), size = 1.5) +
  labs(x = "Abundance", y = "Thresholded rank") +
  scale_color_brewer(palette = "Set2")

## ----pca-rank-pca-setup-------------------------------------------------------------------------
library(ade4)
ranks_pca <- dudi.pca(abund_ranks, scannf = F, nf = 3)
row_scores <- data.frame(li = ranks_pca$li,
                         SampleID = rownames(abund_ranks))
col_scores <- data.frame(co = ranks_pca$co,
                         seq = colnames(abund_ranks))
tax <- tax_table(ps) %>%
  data.frame(stringsAsFactors = FALSE)
tax$seq <- rownames(tax)
main_orders <- c("Clostridiales", "Bacteroidales", "Lactobacillales",
                 "Coriobacteriales")
tax$Order[!(tax$Order %in% main_orders)] <- "Other"
tax$Order <- factor(tax$Order, levels = c(main_orders, "Other"))
tax$otu_id <- seq_len(ncol(otu_table(ps)))
row_scores <- row_scores %>%
  left_join(sample_data(pslog))
col_scores <- col_scores %>%
  left_join(tax)

## ----pca-rank-pca-plot, fig.wide=TRUE,fig.height=8,fig.cap="The biplot resulting from the PCA after the truncated-ranking transformation."----
evals_prop <- 100 * (ranks_pca$eig / sum(ranks_pca$eig))
ggplot() +
  geom_point(data = row_scores, aes(x = li.Axis1, y = li.Axis2), shape = 2) +
  geom_point(data = col_scores, aes(x = 25 * co.Comp1, y = 25 * co.Comp2, col = Order),
             size = .3, alpha = 0.6) +
  scale_color_brewer(palette = "Set2") +
  facet_grid(~ age_binned) +
  guides(col = guide_legend(override.aes = list(size = 3))) +
  labs(x = sprintf("Axis1 [%s%% variance]", round(evals_prop[1], 2)),
       y = sprintf("Axis2 [%s%% variance]", round(evals_prop[2], 2))) +
  coord_fixed(sqrt(ranks_pca$eig[2] / ranks_pca$eig[1])) +
  theme(panel.border = element_rect(color = "#787878", fill = alpha("white", 0)))

## ----ccpna-correspondence-analysis--------------------------------------------------------------
ps_ccpna <- ordinate(pslog, "CCA", formula = pslog ~ age_binned + family_relationship)

## ----ccpna-join-data, fig.cap="The mouse and bacteria scores generated by CCpnA.", fig.wide=TRUE, fig.height=10----
library(ggrepel)
ps_scores <- vegan::scores(ps_ccpna)
sites <- data.frame(ps_scores$sites)
sites$SampleID <- rownames(sites)
sites <- sites %>%
  left_join(sample_data(ps))

species <- data.frame(ps_scores$species)
species$otu_id <- seq_along(colnames(otu_table(ps)))
species <- species %>%
  left_join(tax)
evals_prop <- 100 * ps_ccpna$CCA$eig[1:2] / sum(ps_ccpna$CA$eig)
ggplot() +
  geom_point(data = sites, aes(x = CCA1, y = CCA2), shape = 2, alpha = 0.5) +
  geom_point(data = species, aes(x = CCA1, y = CCA2, col = Order), size = 0.5) +
  geom_text_repel(data = species %>% filter(CCA2 < -2),
                    aes(x = CCA1, y = CCA2, label = otu_id),
		    size = 1.5, segment.size = 0.1) +
  facet_grid(. ~ family_relationship) +
  guides(col = guide_legend(override.aes = list(size = 3))) +
  labs(x = sprintf("Axis1 [%s%% variance]", round(evals_prop[1], 2)),
        y = sprintf("Axis2 [%s%% variance]", round(evals_prop[2], 2))) +
  scale_color_brewer(palette = "Set2") +
  coord_fixed(sqrt(ps_ccpna$CCA$eig[2] / ps_ccpna$CCA$eig[1])*0.45   ) +
  theme(panel.border = element_rect(color = "#787878", fill = alpha("white", 0)))

## ----caret-pls----------------------------------------------------------------------------------
library(caret)
sample_data(pslog)$age2 <- cut(sample_data(pslog)$age, c(0, 100, 400))
dataMatrix <- data.frame(age = sample_data(pslog)$age2, otu_table(pslog))
# take 8 mice at random to be the training set, and the remaining 4 the test set
trainingMice <- sample(unique(sample_data(pslog)$host_subject_id), size = 8)
inTrain <- which(sample_data(pslog)$host_subject_id %in% trainingMice)
training <- dataMatrix[inTrain,]
testing <- dataMatrix[-inTrain,]
plsFit <- train(age ~ ., data = training,
                method = "pls", preProc = "center")

## ----caret-pls-confusion------------------------------------------------------------------------
plsClasses <- predict(plsFit, newdata = testing)
table(plsClasses, testing$age)

## ----caret-rf, eval =T--------------------------------------------------------------------------
library(randomForest)
rfFit <- train(age ~ ., data = training, method = "rf",
               preProc = "center", proximity = TRUE)
rfClasses <- predict(rfFit, newdata = testing)
table(rfClasses, testing$age)

## ----caret-pls-scores-loadings------------------------------------------------------------------
pls_biplot <- list("loadings" = loadings(plsFit$finalModel),
                   "scores" = scores(plsFit$finalModel))
class(pls_biplot$scores) <- "matrix"

pls_biplot$scores <- data.frame(sample_data(pslog)[inTrain, ],
                                pls_biplot$scores)

tax <- tax_table(ps)@.Data %>%
  data.frame(stringsAsFactors = FALSE)
main_orders <- c("Clostridiales", "Bacteroidales", "Lactobacillales",
                 "Coriobacteriales")
tax$Order[!(tax$Order %in% main_orders)] <- "Other"
tax$Order <- factor(tax$Order, levels = c(main_orders, "Other"))
class(pls_biplot$loadings) <- "matrix"
pls_biplot$loadings <- data.frame(tax, pls_biplot$loadings)

## ----caret-pls-scores-plot, fig.height = 7, fig.width=8, fig.wide=TRUE, fig.cap="PLS produces a biplot representation designed to separate  samples by a response variable."----
ggplot() +
  geom_point(data = pls_biplot$scores,
             aes(x = Comp.1, y = Comp.2), shape = 2) +
  geom_point(data = pls_biplot$loadings,
             aes(x = 25 * Comp.1, y = 25 * Comp.2, col = Order),
             size = 0.3, alpha = 0.6) +
  scale_color_brewer(palette = "Set2") +
  labs(x = "Axis1", y = "Axis2", col = "Binned Age") +
  guides(col = guide_legend(override.aes = list(size = 3))) +
  facet_grid( ~ age2) +
  theme(panel.border = element_rect(color = "#787878", fill = alpha("white", 0)))

## ----caret-proximity, fig.wide=TRUE, fig.cap="The random forest model determines a distance between samples, which can be input into PCoA to produce a proximity plot."----
rf_prox <- cmdscale(1 - rfFit$finalModel$proximity) %>%
  data.frame(sample_data(pslog)[inTrain, ])

ggplot(rf_prox) +
  geom_point(aes(x = X1, y = X2, col = age_binned),
             size = 1, alpha = 0.7) +
  scale_color_manual(values = c("#A66EB8", "#238DB5", "#748B4F")) +
  guides(col = guide_legend(override.aes = list(size = 4))) +
  labs(col = "Binned Age", x = "Axis1", y = "Axis2")

## ----caret-rf-importance, fig.height=6, fig.width=9, fig.cap="Random forest aids to interpretation: importance scores."----
as.vector(tax_table(ps)[which.max(importance(rfFit$finalModel)), c("Family", "Genus")])
impOtu <- as.vector(otu_table(pslog)[,which.max(importance(rfFit$finalModel))])
maxImpDF <- data.frame(sample_data(pslog), abund = impOtu)
ggplot(maxImpDF) +   geom_histogram(aes(x = abund)) +
  facet_grid(age2 ~ .) +
  labs(x = "Abundance of discriminative bacteria", y = "Number of samples")

## ----multitable-setup---------------------------------------------------------------------------
metab <- read.csv("https://raw.githubusercontent.com/spholmes/F1000_workflow/master/data/metabolites.csv",row.names = 1)
microbe_connect <-url("https://raw.githubusercontent.com/spholmes/F1000_workflow/master/data/microbe.rda")
load(microbe_connect)
microbe

## ----multitable-filtering-----------------------------------------------------------------------
library("genefilter")
keep_ix <- rowSums(metab == 0) <= 3
metab <- metab[keep_ix, ]
microbe <- prune_taxa(taxa_sums(microbe) > 4, microbe)
microbe <- filter_taxa(microbe, filterfun(kOverA(3, 2)), TRUE)
metab <- log(1 + metab, base = 10)
X <- otu_table(microbe)
X[X > 50] <- 50
dim(X)
dim(metab)

## ----multitable-sparse-cca----------------------------------------------------------------------
library(PMA)
cca_res <- CCA(t(X),  t(metab), penaltyx = .15, penaltyz = .15)
cca_res

## ----loadade4,echo=FALSE------------------------------------------------------------------------
library(ade4)

## ----multitable-plug-in-pca---------------------------------------------------------------------
combined <- cbind(t(X[cca_res$u != 0, ]),
                  t(metab[cca_res$v != 0, ]))
pca_res <- dudi.pca(combined, scannf = F, nf = 3)

## ----annotation---------------------------------------------------------------------------------
genotype <- substr(rownames(pca_res$li), 1, 2)
sample_type <- substr(rownames(pca_res$l1), 3, 4)
feature_type <- grepl("\\.", colnames(combined))
feature_type <- ifelse(feature_type, "Metabolite", "OTU")
sample_info <- data.frame(pca_res$li, genotype, sample_type)
feature_info <- data.frame(pca_res$c1,
                           feature = substr(colnames(combined), 1, 6))

## ----multitable-interpret-pca, fig.wide=TRUE, fig.cap="A PCA triplot produced from the CCA selected features in from muliple data types;metabolites and OTUs."----
ggplot() +  geom_point(data = sample_info,
            aes(x = Axis1, y = Axis2, col = sample_type, shape = genotype), size = 3) + 
  geom_label_repel(data = feature_info,
                   aes(x = 5.5 * CS1, y = 5.5 * CS2, label = feature, fill = feature_type),
                   size = 2, segment.size = 0.3,
                   label.padding = unit(0.1, "lines"), label.size = 0) +
  geom_point(data = feature_info,
             aes(x = 5.5 * CS1, y = 5.5 * CS2, fill = feature_type),
             size = 1, shape = 23, col = "#383838") +
  scale_color_brewer(palette = "Set2") +
  scale_fill_manual(values = c("#a6d854", "#e78ac3")) +
  guides(fill = guide_legend(override.aes = list(shape = 32, size = 0))) +
  coord_fixed(sqrt(pca_res$eig[2] / pca_res$eig[2])) +
  labs(x = sprintf("Axis1 [%s%% Variance]",
                   100 * round(pca_res$eig[1] / sum(pca_res$eig), 2)),
       y = sprintf("Axis2 [%s%% Variance]",
                   100 * round(pca_res$eig[2] / sum(pca_res$eig), 2)),
       fill = "Feature Type", col = "Sample Type")

## ---- eval =FALSE-------------------------------------------------------------------------------
#  #not run
#  data(AntibioticPhyloseq)
#  theme_set(theme_bw())
#  pp = processPhyloseq(AntibioticPhyloseq)
#  out.ff = gpcaFullFamily(pp$X, pp$Q, k = 2)
#  out.agpca = visualizeFullFamily(out.ff,
#                      sample_data = sample_data(AntibioticPhyloseq),
#                      sample_mapping = aes(x = Axis1, y = Axis2, color = condition),
#                      var_data = tax_table(AntibioticPhyloseq),
#                      var_mapping = aes(x = Axis1, y = Axis2, color = Phylum))

## -----------------------------------------------------------------------------------------------
##devtools::install.github("jfukuyama/treeDA")
library(treeDA)
library(adaptiveGPCA)
data(AntibioticPhyloseq)
theme_set(theme_bw())

## -----------------------------------------------------------------------------------------------
out.treeda = treeda(response = sample_data(AntibioticPhyloseq)$type,
    predictors = otu_table(AntibioticPhyloseq),
    tree = phy_tree(AntibioticPhyloseq), p = 15)