## ----style, eval=TRUE, echo=FALSE, results="asis"------------------------ BiocStyle::latex() ## ----knitr, echo=FALSE, results="hide"----------------------------------- library("knitr") opts_chunk$set(tidy=FALSE,dev="png",fig.show="hide", fig.width=4,fig.height=4.5, message=FALSE, highlight=FALSE) ## ----Quick start--------------------------------------------------------- library(STAN) data(example) # Fitting a HMM hmm_fitted = fitHMM(observations, hmm_ex) # Fitting a bdHMM bdhmm_fitted = fitHMM(observations, bdhmm_ex, dirFlags=flags) # Calculating the viterbi paths viterbi_bdhmm = getViterbi(bdhmm_fitted$hmm, observations) viterbi_hmm = getViterbi(hmm_fitted$hmm, observations) ## ----Loading library, results="hide"------------------------------------- library(STAN) ## ----yeast example data, echo=TRUE--------------------------------------- data(yeastTF_databychrom_ex) str(yeastTF_databychrom_ex) data(yeastTF_probeAnno_ex) str(yeastTF_probeAnno_ex) ## ----State labeling yeast, echo=TRUE------------------------------------- stateLabel = c("F1", "F2", "F3", "F4", "F5", "R1", "R2", "R3", "R4", "R5", "U1", "U2") ## ----Initial emissions, echo=TRUE---------------------------------------- nStates = 12 data(yeastTF_initMeans) data(yeastTF_initCovs) gaussEmission = HMMEmission(type="Gaussian", parameters=list(mean=yeastTF_initMeans, cov=yeastTF_initCovs), nStates=nStates) gaussEmission ## ----Initial transitions, echo=TRUE-------------------------------------- transMat = matrix(1/nStates, nrow=nStates, ncol=nStates) initProb = rep(1/nStates, nStates) ## ----Defining the yeast model, echo=TRUE, results="hide"----------------- dirobs = as.integer(c(rep(0,10), 1, 1)) ## ----Fitting the bdHMM to yeast data, echo=TRUE, results="hide"---------- bdhmm = bdHMM(initProb=initProb, transMat=transMat, emission=gaussEmission, nStates=nStates, status="initial", stateLabel=stateLabel, transitionsOptim="analytical", directedObs=dirobs) bdhmm_fitted = fitHMM(yeastTF_databychrom_ex, bdhmm, maxIters=100, verbose=FALSE) ## ----Plotting fitted in yeast, echo=TRUE, eval=FALSE, results="hide"----- ## means_fitted = do.call("rbind",bdhmm_fitted$hmm@emission@parameters$mean, )[c(1:5, 11:12),] ## library(gplots) ## heat = c("dark green", "dark green", "dark green", "gold", "orange", "red") ## colfct = colorRampPalette(heat) ## colpal = colfct(200) ## colnames(means_fitted) = colnames(yeastTF_databychrom_ex[[1]]) ## rownames(means_fitted) = c(paste("F", 1:5, sep=""), paste("U", 1:2, sep="")) ## heatmap.2(means_fitted, col=colpal, trace="none", cexCol=0.9, cexRow=0.9, ## cellnote=round(means_fitted,1), notecol="black", dendrogram="row", ## Rowv=TRUE, Colv=FALSE, notecex=0.9) ## ----Plotting the means, echo=FALSE, results="hide"---------------------- library(gplots) pdf("means.pdf") means_fitted = do.call("rbind",bdhmm_fitted$hmm@emission@parameters$mean, )[c(1:5, 11:12),] library(gplots) heat = c("dark green", "dark green", "dark green", "gold", "orange", "red") colfct = colorRampPalette(heat) colpal = colfct(200) colnames(means_fitted) = colnames(yeastTF_databychrom_ex[[1]]) rownames(means_fitted) = c(paste("F", 1:5, sep=""), paste("U", 1:2, sep="")) heatmap.2(means_fitted, col=colpal, trace="none", cexCol=0.9, cexRow=0.9, cellnote=round(means_fitted,1), notecol="black", dendrogram="row", Rowv=TRUE, Colv=FALSE, notecex=0.9) dev.off() ## ----Calculating viterbi path in yeast, echo=TRUE, results="hide"-------- yeastTF_viterbi = getViterbi(bdhmm_fitted$hmm, yeastTF_databychrom_ex) str(yeastTF_viterbi) ## ----Retrieving UCSC genome annotation, echo=TRUE, eval=FALSE, results="hide"---- ## library(Gviz) ## chr = "chrIV" ## gen = "sacCer3" ## gtrack <- GenomeAxisTrack() ## yeastTF_SGDGenes <- UcscTrack(genome = gen, chromosome = chr, ## track = "SGD Gene", from=1217060, to=1225000, trackType = "GeneRegionTrack", ## rstarts = "exonStarts", rends = "exonEnds", gene = "name", ## symbol = "name", transcript = "name", strand = "strand", ## fill = "blue", name = "SGD Genes", col="black") ## ----Creating yeast Gviz data tracks, echo=TRUE, eval=FALSE, results="hide"---- ## faccols = hcl(h = seq(15, 375 - 360/dim(yeastTF_databychrom_ex$chr04)[2], ## length = dim(yeastTF_databychrom_ex$chr04)[2])%%360, c = 100, l = 65) ## names(faccols) = colnames(yeastTF_databychrom_ex$chr04) ## ## dlist=list() ## for(n in colnames(yeastTF_databychrom_ex$chr04)) { ## dlist[[n]] = DataTrack(data = yeastTF_databychrom_ex$chr04[,n], ## start = yeastTF_probeAnno_ex$chr04, ## end = yeastTF_probeAnno_ex$chr04+8, ## chromosome = "chrIV", genome=gen, ## name = n, type="h", col=faccols[n]) ## } ## ----Creating yeast Gviz viterbi tracks, echo=TRUE, eval=FALSE, results="hide"---- ## library(GenomicRanges) ## library(IRanges) ## myViterbiDirs = list(F=c("F1", "F2", "F3", "F4", "F5"), U=c("U1", "U2"), ## R=c("R1", "R2", "R3", "R4", "R5")) ## myViterbiPanels = list() ## cols = rainbow(7) ## cols = cols[c(1:5,1:5,6:7)] ## names(cols) = stateLabel ## ## for(dir in c("F", "U", "R")) { ## myPos = yeastTF_probeAnno_ex$chr04 >= 1217060 & yeastTF_probeAnno_ex$chr04 <= 1225000 ## myRle = Rle(yeastTF_viterbi$chr04[myPos]) ## currItems = which(myRle@values %in% myViterbiDirs[[dir]]) ## ## start = yeastTF_probeAnno_ex$chr04[myPos][start(myRle)][currItems] ## width = myRle@lengths[currItems] ## ids = as.character(myRle@values[currItems]) ## values = as.character(myRle@values[currItems]) ## ## myViterbiPanels[[dir]] = AnnotationTrack(range=GRanges(seqnames=rep("chrIV", ## length(currItems)), ranges=IRanges(start=start, width=width*8, names=values)), ## genome=gen, chromosome=chr, name=paste("Viterbi\n", "(", dir, ")", sep=""), ## id=ids[order(start)], shape="box",fill=cols[values[order(start)]], col="black", ## stacking="dense") ## } ## ----Gviz plot yeast, echo=TRUE, eval=FALSE, results="hide"------------- ## sizes = rep(1,17) ## sizes[14] = 2 ## sizes[15:17] = 0.7 ## sizes[1] = 1.3 ## plotTracks(c(list(gtrack), dlist, list(yeastTF_SGDGenes), myViterbiPanels), ## from=1217060, to=1225000, sizes=sizes, showFeatureId=TRUE, featureAnnotation="id", ## fontcolor.feature="black", cex.feature=0.7, background.title="darkgrey", showId=TRUE) ## ----Plotting the result2, echo=FALSE, results="hide"-------------------- #library(colorspace) pdf("yeast_ex.pdf", width=7*1.2, height=7*1.5) library(Gviz) chr = "chrIV" gen = "sacCer3" gtrack <- GenomeAxisTrack() data(yeastTF_SGDGenes) faccols = hcl(h = seq(15, 375 - 360/dim(yeastTF_databychrom_ex$chr04)[2], length = dim(yeastTF_databychrom_ex$chr04)[2])%%360, c = 100, l = 65) names(faccols) = colnames(yeastTF_databychrom_ex$chr04) dlist=list() for(n in colnames(yeastTF_databychrom_ex$chr04)) { dlist[[n]] = DataTrack(data = yeastTF_databychrom_ex$chr04[,n], start = yeastTF_probeAnno_ex$chr04, end = yeastTF_probeAnno_ex$chr04+8, chromosome = "chrIV", genome=gen, name = n, type="h", col=faccols[n]) } library(GenomicRanges) library(IRanges) myViterbiDirs = list(F=c("F1", "F2", "F3", "F4", "F5"), U=c("U1", "U2"), R=c("R1", "R2", "R3", "R4", "R5")) myViterbiPanels = list() cols = rainbow(7) cols = cols[c(1:5,1:5,6:7)] names(cols) = stateLabel for(dir in c("F", "U", "R")) { myPos = yeastTF_probeAnno_ex$chr04 >= 1217060 & yeastTF_probeAnno_ex$chr04 <= 1225000 myRle = Rle(yeastTF_viterbi$chr04[myPos]) currItems = which(myRle@values %in% myViterbiDirs[[dir]]) start = yeastTF_probeAnno_ex$chr04[myPos][start(myRle)][currItems] width = myRle@lengths[currItems] ids = as.character(myRle@values[currItems]) values = as.character(myRle@values[currItems]) myViterbiPanels[[dir]] = AnnotationTrack(range=GRanges(seqnames=rep("chrIV", length(currItems)), ranges=IRanges(start=start, width=width*8, names=values)), genome=gen, chromosome=chr, name=paste("Viterbi\n", "(", dir, ")", sep=""), id=ids[order(start)], shape="box",fill=cols[values[order(start)]], col="black", stacking="dense") } sizes = rep(1,17) sizes[14] = 2 sizes[15:17] = 0.7 sizes[1] = 1.3 plotTracks(c(list(gtrack), dlist, list(yeastTF_SGDGenes), myViterbiPanels), from=1217060, to=1225000, sizes=sizes, showFeatureId=TRUE, featureAnnotation="id", fontcolor.feature="black", cex.feature=0.7, background.title="darkgrey", showId=TRUE) dev.off() ## ----Loading human data, echo=TRUE, results="hide"----------------------- data(humanCD4T_signal_ex) data(humanCD4T_probeAnno_ex) nStates = 8 stateLabel = c("F1", "F2", "F3", "F4", "R1", "R2", "R3", "R4") ## ----Showing the flag sequence, echo=TRUE-------------------------------- data(humanCD4T_flags_ex) humanCD4T_flags_ex$chr7[600:650] ## ----Fitting the bdHMM to the human CD4T cell data, echo=TRUE, results="hide"---- data(humanCD4T_initMeans) data(humanCD4T_initCovs) gaussEmission = HMMEmission(type="Gaussian", parameters=list(mean=humanCD4T_initMeans, cov=humanCD4T_initCovs), nStates=nStates) transMat = matrix(1/nStates, nrow=nStates, ncol=nStates) initProb = rep(1/nStates, nStates) dirobs = as.integer(rep(0,13)) bdhmm = bdHMM(initProb=initProb, transMat=transMat, emission=gaussEmission, nStates=nStates, status="initial", stateLabel=stateLabel, transitionsOptim="analytical", directedObs=dirobs) mybdHMM_CD4T = fitHMM(humanCD4T_signal_ex, bdhmm, maxIters=1000, dirFlags=humanCD4T_flags_ex) ## ----Defining state directionality, echo=TRUE, results="hide", eval=FALSE---- ## posterior_bdhmm = getPosterior(mybdHMM_CD4T$hmm, obs=humanCD4T_signal_ex) ## ## myTwins = 5:8 ## score = rep(0,4) ## for(i in 1:(nStates/2)) { ## numer = sum(abs(posterior_bdhmm$chr7[,i]-posterior_bdhmm$chr7[,myTwins[i]])) ## denom = sum((posterior_bdhmm$chr7[,i]+posterior_bdhmm$chr7[,myTwins[i]])) ## score[i] = numer/denom ## } ## ## names(score) = c("F1/R1", "F2/R2", "F3/R3", "F4/R4") ## barplot(score, col=rainbow(4), cex.names=0.8, ylim=c(0,1)) ## abline(h=0.5, lty=3) ## ----Calculating the viterbi path, echo=TRUE, results="hide"------------- humanCD4T_viterbi = getViterbi(mybdHMM_CD4T$hmm, humanCD4T_signal_ex) stateLabel2 = c("F1", "F2", "U1", "U2", "R1", "R2", "U1", "U2") humanCD4T_viterbi_collapsed = lapply(humanCD4T_viterbi, function(chrom) factor(stateLabel2[chrom])) ## ----Plotting data and viterbi state path 1, echo=TRUE, results="hide", eval=FALSE---- ## library(Gviz) ## data(humanCD4T_ucscGenes) ## chr = "chr7" ## gen = "hg18" ## gtrack <- GenomeAxisTrack() ## humanCD4T_ideogramChr7 <- IdeogramTrack(genome = gen, chromosome = chr) ## ## faccols = c("#FB9EB1", "#FB9EB1", "#FB9EB1", "#FB9EB1", "#FB9EB1", "#DAB36A", "#DAB36A", ## "#DAB36A", "#7FBFF5", "#7FBFF5", "#7FBFF5", "#7FBFF5", "#7FBFF5") ## names(faccols) = colnames(humanCD4T_signal_ex$chr7) ## dlist=list() ## for(n in colnames(humanCD4T_signal_ex$chr7)) { ## dlist[[n]] = DataTrack(data=humanCD4T_signal_ex$chr7[,n], start=humanCD4T_probeAnno_ex$chr7, ## end = humanCD4T_probeAnno_ex$chr7+200, chromosome="chr7", genome=gen, name=n, ## type="h", col=faccols[n]) ## } ## ## myViterbiDirs = list(F=c("F1", "F2"), U=c("U1", "U2"), R=c("R1", "R2")) ## myViterbiPanels = list() ## cols = rainbow(4) ## cols = cols[c(1:2,1:2,3:4)] ## ## for(dir in c("F", "U", "R")) { ## myRle = Rle(humanCD4T_viterbi_collapsed$chr7) ## currItems = which(myRle@values %in% myViterbiDirs[[dir]]) ## ## start = humanCD4T_probeAnno_ex$chr7[start(myRle)][currItems] ## width = myRle@lengths[currItems] ## ids = myRle@values[currItems] ## values = myRle@values[currItems] ## currGRange = GRanges(seqnames=rep("chr7", length(currItems)),ranges=IRanges(start=start, ## width=width*200, names=values[currItems])) ## ## myViterbiPanels[[dir]] = AnnotationTrack(range=currGRange, genome="hg18", chromosome="chr7", ## name=paste("Viterbi\n", "(", dir, ")", sep=""), id=ids[order(start)], ## shape="box",fill=cols[values[order(start)]], col="black", stacking="dense") ## } ## ## flagpos = humanCD4T_probeAnno_ex$chr7[range(which(humanCD4T_flags_ex$chr7 == "R"))] ## flagpanel = list(AnnotationTrack(start=flagpos[1], end=flagpos[2], feature="flags", ## id="flags-1", strand="-", chromosome="chr7", genome="hg18", ## stacking="squish", name="flags", fill="coral", col="black", shape="arrow")) ## ## ## sizes = rep(1,20) ## sizes[16] = 2 ## sizes[17:20] = 0.7 ## sizes[1] = 0.5 ## plotTracks(c(list(humanCD4T_ideogramChr7), list(gtrack), dlist, list(humanCD4T_ucscGenes), ## flagpanel, myViterbiPanels), from=134600000, to=135350000, sizes=sizes, ## showFeatureId=TRUE, featureAnnotation="id", fontcolor.feature="black", ## cex.feature=0.7, background.title="darkgrey") ## ----Real plotting, echo=FALSE, results="hide", eval=TRUE---------------- posterior_bdhmm = getPosterior(mybdHMM_CD4T$hmm, obs=humanCD4T_signal_ex) myTwins = 5:8 score = rep(0,4) for(i in 1:(nStates/2)) { numer = sum(abs(posterior_bdhmm$chr7[,i]-posterior_bdhmm$chr7[,myTwins[i]])) denom = sum((posterior_bdhmm$chr7[,i]+posterior_bdhmm$chr7[,myTwins[i]])) score[i] = numer/denom } names(score) = c("F1/R1", "F2/R2", "F3/R3", "F4/R4") pdf("dir_score_human.pdf") barplot(score, col=rainbow(4), cex.names=0.8, ylim=c(0,1)) abline(h=0.5, lty=3) dev.off() pdf("human_results.pdf", width=7*1.2, height=7*1.5) library(Gviz) data(humanCD4T_ucscGenes) chr = "chr7" gen = "hg18" gtrack <- GenomeAxisTrack() humanCD4T_ideogramChr7 <- IdeogramTrack(genome = gen, chromosome = chr) faccols = c("#FB9EB1", "#FB9EB1", "#FB9EB1", "#FB9EB1", "#FB9EB1", "#DAB36A", "#DAB36A", "#DAB36A", "#7FBFF5", "#7FBFF5", "#7FBFF5", "#7FBFF5", "#7FBFF5") names(faccols) = colnames(humanCD4T_signal_ex$chr7) dlist=list() for(n in colnames(humanCD4T_signal_ex$chr7)) { dlist[[n]] = DataTrack(data=humanCD4T_signal_ex$chr7[,n], start=humanCD4T_probeAnno_ex$chr7, end = humanCD4T_probeAnno_ex$chr7+200, chromosome="chr7", genome=gen, name=n, type="h", col=faccols[n]) } myViterbiDirs = list(F=c("F1", "F2"), U=c("U1", "U2"), R=c("R1", "R2")) myViterbiPanels = list() cols = rainbow(4) cols = cols[c(1:2,1:2,3:4)] for(dir in c("F", "U", "R")) { myRle = Rle(humanCD4T_viterbi_collapsed$chr7) currItems = which(myRle@values %in% myViterbiDirs[[dir]]) start = humanCD4T_probeAnno_ex$chr7[start(myRle)][currItems] width = myRle@lengths[currItems] ids = myRle@values[currItems] values = myRle@values[currItems] currGRange = GRanges(seqnames=rep("chr7", length(currItems)),ranges=IRanges(start=start, width=width*200, names=values[currItems])) myViterbiPanels[[dir]] = AnnotationTrack(range=currGRange, genome="hg18", chromosome="chr7", name=paste("Viterbi\n", "(", dir, ")", sep=""), id=ids[order(start)], shape="box",fill=cols[values[order(start)]], col="black", stacking="dense") } flagpos = humanCD4T_probeAnno_ex$chr7[range(which(humanCD4T_flags_ex$chr7 == "R"))] flagpanel = list(AnnotationTrack(start=flagpos[1], end=flagpos[2], feature="flags", id="flags-1", strand="-", chromosome="chr7", genome="hg18", stacking="squish", name="flags", fill="coral", col="black", shape="arrow")) sizes = rep(1,20) sizes[16] = 2 sizes[17:20] = 0.7 sizes[1] = 0.5 plotTracks(c(list(humanCD4T_ideogramChr7), list(gtrack), dlist, list(humanCD4T_ucscGenes), flagpanel, myViterbiPanels), from=134600000, to=135350000, sizes=sizes, showFeatureId=TRUE, featureAnnotation="id", fontcolor.feature="black", cex.feature=0.7, background.title="darkgrey") dev.off() ## ----kmeans, echo=TRUE, eval=TRUE---------------------------------------- nStates = 7 myMat = do.call("rbind", yeastTF_databychrom_ex) myMat = myMat[apply(myMat, 1, function(x) all(! is.na(x))),] myMat[, c("YPDexprW", "YPDexprC")] = t(apply(myMat[, c("YPDexprW", "YPDexprC")], 1, sort, decreasing=TRUE)) km = kmeans(myMat, centers=nStates, iter.max=1000, nstart=100)$centers km[order(km[,"YPDexprW"], decreasing=TRUE),] ## ----sessInfo, results="asis", echo=FALSE-------------------------------- toLatex(sessionInfo())