## ----echo=FALSE, results="hide", warning=FALSE, message=FALSE------------ suppressPackageStartupMessages({ library(SRAdb) library(reshape2) library(Rsubread) library(Rsamtools) library(ChIPpeakAnno) library(GeneNetworkBuilder) library(rtracklayer) library(GenomicAlignments) library(idr) library(ChIPQC) library(csaw) library(ggplot2) library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(org.Hs.eg.db) library(reactome.db) library(BSgenome.Hsapiens.UCSC.hg19) library(motifStack) library(knitr) library(trackViewer) library(png) library(grid) }) opts_chunk$set(eval=FALSE, fig.width=6, fig.height=6) ## ----eval=TRUE----------------------------------------------------------- extdata <- system.file("extdata", package = "ChIPseqStepByStep") load(file.path(extdata, "minimal.rds")) ## ------------------------------------------------------------------------ # ## load library SRAdb to retrieve SRA files for GSE66081 dataset # library(SRAdb) # ## SRAdb need to download a light sqlite file # ## to extract sra file information # ## and then download by getSRAfile function. # sqlfile <- "SRAmetadb.sqlite" # if(!file.exists(sqlfile)) # sqlfile <- getSRAdbFile() # sra_con <- dbConnect(SQLite(), sqlfile) # conversion <- sraConvert(c("SRP055170"), sra_con=sra_con) # out <- getSRAfile(conversion$sample, # sra_con=sra_con, # fileType="sra") # # ## ectract file annotations # rs <- listSRAfile(conversion$sample, # sra_con=sra_con, # fileType="sra") # experiment <- dbGetQuery(sra_con, # paste0("select * from experiment where", # " experiment_accession in ('", # paste(conversion$experiment, # collapse="','"), "')")) # info <- merge(experiment[, c("experiment_accession", "title", "library_layout")], # rs[, c("experiment", "run")], by=1) ## ----eval=TRUE----------------------------------------------------------- info[, 2:3] ## ------------------------------------------------------------------------ # runs <- info[, "run"] # ## extract fastq files from sra by sratoolkit # sapply(paste0(runs, ".sra"), # function(.ele) system(paste("fastq-dump", .ele))) ## ------------------------------------------------------------------------ # grp <- do.call(rbind, strsplit(info$title, "(;|:) ")) # info <- cbind(info, grp) # runs <- split(runs, grp[, 2]) # group <- gsub("_ChIPSeq", "", names(runs)) # mapply(function(.ele, n) # system(paste("cat", paste0(.ele, ".fastq", collapse=" "), # ">>", paste0(n, ".fastq"), collapse=" ")), # runs, group) # rm(paste0(info$run, ".fastq")) ## ------------------------------------------------------------------------ # library(Rsubread) # fastq.files <- paste0(group, ".fastq") # bam.files <- paste0(group, ".bam") # # ##getPhredOffset, if the offset is not correct, it will report error. # x <- qualityScores(filename=fastq.files[1], # input_format="FASTQ", offset=33) ## ----eval=TRUE----------------------------------------------------------- x[1:5, 1:10] ## ------------------------------------------------------------------------ # ## build index and this can be re-used. # hg19.fasta <- # "Genomes/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" # dir.create("index") # buildindex(basename="index/hg19", # reference=hg19.fasta) # # ## alignment # align(index="./index/hg19", readfile1=fastq.files, type=1, # output_file=bam.files, maxMismatches=2, nthreads=2, # phredOffset=33, unique=TRUE) ## ------------------------------------------------------------------------ # ## remove reads which are mapped to identical locations, # ## using mapping location of the first base of each read. # ## and resort by postion # library(Rsamtools) # null <- sapply(group, function(gp){ # out <- asSam(paste0(gp, ".bam"), "tmp") # removeDupReads(out, threshold=2, outputFile="tmp.rmdup.sam") # asBam("tmp.rmdup.sam", paste0(gp, "rmdup.bam"), indexDestination=TRUE) # unlink(out) # unlink("tmp.rmdup.sam") # }) ## ------------------------------------------------------------------------ # ## pool IgG depend on experiment # IgG.bg <- list(rep1.2=c("IgG_rep1.rmdup.bam", "IgG_rep2.rmdup.bam", # "IgG_rep1.2.rmdup.bam"), # rep3.4=c("IgG_rep3.rmdup.bam", "IgG_rep4.rmdup.bam", # "IgG_rep3.4.rmdup.bam"), # rep3.5=c("IgG_rep3.rmdup.bam", "IgG_rep5.rmdup.bam", # "IgG_rep3.5.rmdup.bam")) # null <- sapply(IgG.bg, function(.ele){ # out <- mergeBam(files=.ele[-3], destination = .ele[3], # indexDestination=TRUE) # }) ## ------------------------------------------------------------------------ # library(ChIPpeakAnno) # library(TxDb.Hsapiens.UCSC.hg19.knownGene) # pair <- data.frame(treat=c("YAP", "TAZ", "TEAD4", "JUN"), # control=c("1.2", "1.2", "3.4", "3.5")) # # use chromosome 1 to save the time. # chr1 <- as(seqinfo(TxDb.Hsapiens.UCSC.hg19.knownGene)["chr1"], "GRanges") # cPout <- list() # for(i in seq.int(nrow(pair))){ # cPout[[i]] <- cumulativePercentage( # bamfiles = c(paste0(pair[i, "treat"], "_rep1.rmdup.bam"), # paste0(pair[i, "treat"], "_rep2.rmdup.bam"), # paste0("IgG_rep", pair[i, "control"], ".rmdup.bam")), # gr=chr1) # } ## ------------------------------------------------------------------------ # for(i in seq.int(nrow(pair))){ # system(paste("macs2 callpeak --to-large -t", # paste0(pair[i, "treat"], "_rep1.rmdup.bam"), # "-c ", paste0("IgG_rep", pair[i, "control"], ".rmdup.bam"), # " -f BAM -g hs -q 0.1 -n", # paste0(pair[i, "treat"], "_rep1.q1"))) # system(paste("macs2 callpeak --to-large -t", # paste0(pair[i, "treat"], "_rep2.rmdup.bam"), # "-c ", paste0("IgG_rep", pair[i, "control"], ".rmdup.bam"), # " -f BAM -g hs -q 0.1 -n", # paste0(pair[i, "treat"], "_rep2.q1"))) # ## we will use idr to filter the results later # } ## ------------------------------------------------------------------------ # library(ChIPpeakAnno) # macs2.files <- dir(".", pattern="*.q1_peaks.narrowPeak$") # peaks <- sapply(macs2.files, function(.ele) # toGRanges(.ele, format="narrowPeak")) # peaks.bk <- peaks # peaks <- lapply(peaks, function(.ele) # .ele[order(.ele$pValue, decreasing=TRUE)]) ## ------------------------------------------------------------------------ # peaks <- lapply(peaks, function(.ele) .ele[1:min(1e5, length(.ele))]) ## ----eval=TRUE----------------------------------------------------------- lengths(peaks) ## ------------------------------------------------------------------------ # group <- gsub("^(.*?)_.*$", "\\1", names(peaks)) # peaks.grp <- split(peaks, group) # ## find overlapping peaks # GSE66081 <- sapply(peaks.grp, findOverlapsOfPeaks, simplify = FALSE) # GSE66081.bk <- GSE66081 # GSE66081 <- sapply(GSE66081, function(.ele) # .ele$peaklist[[names(.ele$peaklist)[grepl("\\/\\/\\/", # names(.ele$peaklist))]]], # simplify=FALSE) ## ----eval=TRUE----------------------------------------------------------- lengths(GSE66081) ## ------------------------------------------------------------------------ # library(ChIPpeakAnno) # new.group.names <- # lapply(GSE66081.bk, # function(.ele) # sub(".q1_peaks.narrowPeak", ".bam", # names(.ele$peaklist)[!grepl("\\/\\/\\/", # names(.ele$peaklist))])) # GSE66081 <- mapply(function(.peaks, .bamfiles) # IDRfilter(peaksA=.peaks[[1]], peaksB=.peaks[[2]], # bamfileA=.bamfiles[1], bamfileB=.bamfiles[2], # IDRcutoff=0.01), # peaks.grp, new.group.names) # ## The original peaks are filtered to decrease the memory usage. # peaks.keep <- sapply(GSE66081, function(.ele) # as.character(unlist(.ele$peakNames))) # peaks.keep <- lapply(peaks.keep, function(.ele) # gsub("^.*__", "", .ele)) # lengths(peaks.keep) # peaks.keep <- unlist(peaks.keep) # peaks.s <- lapply(peaks, function(.ele) # .ele[names(.ele) %in% peaks.keep]) # lengths(peaks.s) # peaks.unlist <- unlist(GRangesList(peaks.s), use.names = FALSE) ## ------------------------------------------------------------------------ # library(rtracklayer) # null <- mapply(function(.dat, .name) # export(.dat, gsub("^(.*?).q1.*$", "\\1.fil.bed", .name), # format="BED"), # peaks.s, names(peaks.s)) # ## filter the peaks by qValue < 10^-10 to get similar number peaks # ## as filtered by idr to confirm the effect of IDR filter. # peaks.bk <- lapply(peaks.bk, function(.ele) .ele[.ele$qValue>10]) # null <- mapply(function(.dat, .name) # export(.dat, gsub("^(.*?).q1.*$", "\\1.fil.q01.bed", .name), # format="BED"), # peaks.bk, names(peaks.bk)) ## ------------------------------------------------------------------------ # ## QC # library(csaw) # scc <- lapply(gsub("^(.*?).q1.*$", "\\1.rmdup.bam", names(peaks)), # correlateReads, cross=TRUE, # param=readParam(minq=30, # restrict=paste0("chr", c(1:21, "X", "Y")), # dedup=TRUE)) # names(scc) <- gsub(".q1.*$", "", names(peaks)) # scc.param <- lapply(scc, function(.ele){ # readsLength <- 50 # cutline <- 2 * readsLength # read_length <- which.max(.ele[1:cutline]) # fragment_length <- which.max(.ele[cutline:length(.ele)]) + cutline - 1 # baseline <- which.min(.ele[cutline:length(.ele)]) + cutline - 1 # #normalized.strand.coefficient # NSC <- .ele[fragment_length] / .ele[baseline] # # relative strand correlation # RSC <- (.ele[fragment_length] - .ele[baseline])/(.ele[read_length] - .ele[baseline]) # c(read_length=read_length-1, # fragment_length=fragment_length-1, # baseline=baseline-1, # NSC=NSC, # RSC=RSC) # }) ## ----eval=TRUE, fig.width=6, fig.height=9-------------------------------- do.call(rbind, scc.param)[, c("NSC", "RSC")] op <- par(mfrow=c(4, 2)) for(i in 1:length(scc)){ plot(1:length(scc[[i]])-1, scc[[i]], xlab="Delay (bp)", ylab="cross-correlation", main=names(scc)[i], type="l", ylim=c(scc[[i]][scc.param[[i]]["baseline"]]*.8, max(scc[[i]])*1.1)) abline(v=scc.param[[i]]["fragment_length"], col="red", lty=3) abline(h=scc[[i]][scc.param[[i]][c("fragment_length", "read_length", "baseline")]+1], col=c("blue", "green", "gray30"), lty=2) } ## ------------------------------------------------------------------------ # library(GenomicAlignments) # library(rtracklayer) # bwPath <- "bw" # dir.create(bwPath) # cvgs <- mapply(function(.ele, .name){ # gal <- readGAlignments(paste0(.name, ".rmdup.bam")) # cvg <- coverage(gal, width = as.numeric(.ele["fragment_length"])) # seqinfo(cvg) <- seqinfo(TxDb.Hsapiens.UCSC.hg19.knownGene) # export.bw(cvg, # con=file.path(bwPath, # paste0(.name, ".rmdup.bigWig"))) # cvg # }, scc.param, names(scc.param)) ## ----eval=TRUE, fig.width=4.5, fig.height=9------------------------------ par(op) library(ChIPQC) samples <- data.frame(SampleID=gsub("^(.*?).q1.*$", "\\1", names(peaks)), Factor=gsub("^(.*?)_.*$", "\\1", names(peaks)), Replicate=gsub("^.*?_rep(.*?).q1.*$", "\\1", names(peaks)), bamReads=gsub("^(.*?).q1.*$", "\\1.rmdup.bam", names(peaks)), Peaks=names(peaks), PeakCaller="narrow") samples ## ------------------------------------------------------------------------ # ## before filter # exampleExp <- ChIPQC(experiment=samples, annotaiton="hg19") # ChIPQCreport(exampleExp, reportFolder="ChIPQC", facetBy="Factor") # samples.fil <- samples # samples.fil$Peaks <- gsub("^(.*?).q1.*$", "\\1.fil.bed", names(peaks)) # samples.fil$PeakCaller="bed" # ## after filter # exampleExp.fil <- ChIPQC(experiment=samples.fil, annotaiton="hg19") # ChIPQCreport(exampleExp.fil, # reportFolder="ChIPQC.fil", # facetBy="Factor") # samples.fil.q01 <- samples.fil # samples.fil.q01$Peaks <- # gsub("^(.*?).q1.*$", "\\1.fil.q01.bed", names(peaks)) # ## filtered by qValue from the MACS2 result # exampleExp.fil.q01 <- ChIPQC(experiment=samples.fil.q01, # annotaiton="hg19") # ChIPQCreport(exampleExp.fil.q01, # reportFolder="ChIPQC.fil.q01", # facetBy="Factor") ## ------------------------------------------------------------------------ # ol <- findOverlapsOfPeaks(GSE66081, connectedPeaks="keepAll") ## ----eval=TRUE, message=FALSE, warning=FALSE, fig.width=3.5, fig.height=3.5---- makeVennDiagram(ol) ## ------------------------------------------------------------------------ # peaks.summit <- lapply(peaks, function(.ele) { # .ele <- shift(.ele, .ele$peak) # width(.ele) <- 1 # .ele # }) # # TEAD.summit <- unlist(GRangesList(peaks.summit[group=="TEAD4"])) # TAZ.summit <- unlist(GRangesList(peaks.summit[group=="TAZ"])) # bof <- binOverFeature(TEAD.summit, annotationData=TAZ.summit, # radius=300, nbins=300, FUN=length) ## ----eval=TRUE, message=FALSE, warning=FALSE, fig.width=3.5, fig.height=3.5---- plot(as.numeric(rownames(bof)), bof, ylab="Peak density", xlab="Distance to the summit of TAZ peaks", main="Position of TEAD4 peak summit", type="l", xlim=c(-300, 300)) ## ------------------------------------------------------------------------ # overlaps <- ol$peaklist[["TAZ///TEAD4///YAP"]] # seqlevelsStyle(overlaps) ## == UCSC # aCR<-assignChromosomeRegion(overlaps, nucleotideLevel=FALSE, # precedence=c("Promoters", # "immediateDownstream", # "fiveUTRs", "threeUTRs", # "Exons", "Introns"), # TxDb=TxDb.Hsapiens.UCSC.hg19.knownGene) ## ----eval=TRUE, message=FALSE, warning=FALSE, fig.width=6, fig.height=4.5---- par(mar=c(10.1, 4.1, 4.1, 2.1)) barplot(aCR$percentage, las=3) ## ------------------------------------------------------------------------ # library(TxDb.Hsapiens.UCSC.hg19.knownGene) # annoData <- annoGR(TxDb.Hsapiens.UCSC.hg19.knownGene, feature="gene") ## ----eval=TRUE----------------------------------------------------------- annoData ## ------------------------------------------------------------------------ # ## annotate the peaks by nearest annotations. # YAP.TAZ.TEAD4.nearest <- # annotatePeakInBatch(overlaps, # AnnotationData=annoData) # ## annotate the peaks by both side in a given range. # YAP.TAZ.TEAD4.2side <- # annotatePeakInBatch(overlaps, # AnnotationData=annoData, # output="bindingRegion_bothSidesNSS", # bindingRegion=c(-100000, 100000)) ## ----eval=TRUE, message=FALSE, warning=FALSE, fig.width=4.5, fig.height=4.5---- pie1(table(YAP.TAZ.TEAD4.2side$insideFeature)) ## ------------------------------------------------------------------------ # ## save annotations # YAP.TAZ.TEAD4.2side.m <- as.data.frame(YAP.TAZ.TEAD4.2side) # library(WriteXLS) # WriteXLS("YAP.TAZ.TEAD4.2side.m", # "YAP.TAZ.TEAD4.overlapping.peaks.anno.xls") ## ------------------------------------------------------------------------ # ## enrichment analysis # library(org.Hs.eg.db) # over <- getEnrichedGO(YAP.TAZ.TEAD4.2side, # orgAnn="org.Hs.eg.db", # feature_id_type="entrez_id", # maxP=.01, condense=TRUE) ## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------- head(over[["bp"]][, -c(3, 10)]) dim(over[["bp"]]) ## ------------------------------------------------------------------------ # library(reactome.db) # path <- getEnrichedPATH(YAP.TAZ.TEAD4.2side, # orgAnn="org.Hs.eg.db", # pathAnn="reactome.db", # feature_id_type="entrez_id", # maxP=.05) ## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------- head(path) ## ------------------------------------------------------------------------ # bdp <- peaksNearBDP(overlaps, annoData, MaxDistance=100000) ## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------- bdp ## ------------------------------------------------------------------------ # library(trackViewer) # gene <- "SYDE2" # eID <- mget(gene, org.Hs.egSYMBOL2EG) # gr <- as(annoData[eID[[1]]], "GRanges") # gr.promoter <- promoters(gr, upstream=5000, downstream=2000) # seqlevels(gr.promoter) <- "chr1" # seqinfo(gr.promoter) <- seqinfo(gr.promoter)["chr1"] # bams.rep1 <- sub(".bam", ".rmdup.bam", # bam.files[grepl("rep1", bam.files)]) # syde <- lapply(bams.rep1, importBam, ranges=gr.promoter) # names(syde) <- gsub("_rep1.rmdup.bam", "", bams.rep1) # trs <- geneModelFromTxdb(TxDb.Hsapiens.UCSC.hg19.knownGene, # orgDb = org.Hs.eg.db, gr = gr.promoter) # trs.names <- sapply(trs, function(.ele) .ele@name) # trs <- trs[match(gene, trs.names)] # names(trs) <- gene # optSty <- optimizeStyle(trackList(syde, trs, heightDist=c(.9, .1)), # theme="col") # trackList <- optSty$tracks # viewerStyle <- optSty$style # for(i in 1:length(trackList)) # setTrackStyleParam(trackList[[i]], "ylabgp", list(cex=.8)) ## ----eval=TRUE----------------------------------------------------------- vp <- viewTracks(trackList, gr=gr.promoter, viewerStyle=viewerStyle) ## ----eval=TRUE----------------------------------------------------------- browseTracks(trackList, gr=gr.promoter) ## ------------------------------------------------------------------------ # dist2TSS <- # lapply(list(YAP=GSE66081[["YAP"]], # TAZ=GSE66081[["TAZ"]], # TEAD4=GSE66081[["TEAD4"]], # YAP.TAZ.TEAD4=ol.b$peaklist[["YAP.TAZ///TEAD"]]), # function(.ele) { # .ele <- annotatePeakInBatch(.ele, # AnnotationData=annoData, # output="nearestLocation") # .ele$shortestDistance}) # # dist2TSS.cut <- lapply(dist2TSS, cut, breaks=c(0, 1e3, 1e4, 1e5, 1e10)) # dist2TSS.table <- sapply(dist2TSS.cut, table) # dist2TSS.percentage <- apply(dist2TSS.table, 2, # function(.ele) .ele/sum(.ele)) # library(ggplot2) # library(reshape2) # dist2TSS.percentage <- melt(dist2TSS.percentage) # dist2TSS.percentage$value <- dist2TSS.percentage$value * 100 ## ----eval=TRUE, message=FALSE, warning=FALSE, fig.width=5.5, fig.height=3.5---- ggplot(dist2TSS.percentage, aes(x=Var2, y=value, fill=Var1)) + geom_bar(stat="identity") + xlab("") + ylab("%") + labs(title="Distance to TSS") + theme_bw() + scale_fill_manual(name="", values=c("#FF0000FF", "#FF0000BB", "#FF000077", "#FF000033"), labels=c("<1kb", "1-10kb", "10-100kb", ">100kb")) ## ------------------------------------------------------------------------ # library(GEOquery) # getGEOSuppFiles("GSE49651") # epipeaks.files <- dir("GSE49651", full.names=TRUE) # library(ChIPpeakAnno) # epipeaks <- lapply(epipeaks.files[3:5], # function(.ele) toGRanges(gzfile(.ele), format="BED")) # names(epipeaks) <- # gsub("GSE49651_MDAMB231.(.*?).hg19.*._peaks.txt.gz", "\\1", # basename(epipeaks.files)[3:5]) ## ------------------------------------------------------------------------ # promoter.UCSC <- promoters(TxDb.Hsapiens.UCSC.hg19.knownGene, 5e3, 5e3) # promoter.UCSC <- unique(promoter.UCSC) # attach(epipeaks) # ol.promoter <- findOverlapsOfPeaks(H3K4me1, H3K4me3, H3K27Ac, # promoter.UCSC, # ignore.strand = FALSE) # promoter <- c(ol.promoter$peaklist[["H3K4me3///H3K27Ac///promoter.UCSC"]], # ol.promoter$peaklist[["H3K4me1///H3K4me3///H3K27Ac///promoter.UCSC"]]) # enhancer.active <- ol.promoter$peaklist[["H3K4me1///H3K27Ac"]] # enhancer.inactive <- ol.promoter$peaklist[["H3K4me1"]] # # mcols(enhancer.active) <- DataFrame(type="enhancer.active") # mcols(enhancer.inactive) <- DataFrame(type="enhancer.inactive") # mcols(promoter) <- DataFrame(type="promoter") # types <- c(promoter, enhancer.active, enhancer.inactive) # # YAP.TAZ.TEAD <- ol$peaklist[["TAZ///TEAD4///YAP"]] # seqlevelsStyle(YAP.TAZ.TEAD) <- "UCSC" # YAP.TAZ.TEAD <- YAP.TAZ.TEAD[seqnames(YAP.TAZ.TEAD) %in% # seqlevels(types)] # YAP.TAZ.TEAD4.type <- findOverlaps(YAP.TAZ.TEAD, types) # tbl <- table(types[subjectHits(YAP.TAZ.TEAD4.type)]$type) # tbl["not.classified"] <- length(YAP.TAZ.TEAD) - # length(unique(queryHits(YAP.TAZ.TEAD4.type))) ## ----eval=TRUE, message=FALSE, warning=FALSE, fig.width=4.5, fig.height=4.5---- pie1(tbl) ## ------------------------------------------------------------------------ # ##heatmap # YAP.TAZ.TEAD.assigned <- YAP.TAZ.TEAD[queryHits(YAP.TAZ.TEAD4.type)] # YAP.TAZ.TEAD.assigned$type <- types[subjectHits(YAP.TAZ.TEAD4.type)]$type # YAP.TAZ.TEAD.assigned <- c(YAP.TAZ.TEAD.assigned[YAP.TAZ.TEAD.assigned$type=="enhancer.active"], YAP.TAZ.TEAD.assigned[YAP.TAZ.TEAD.assigned$type=="promoter"]) # YAP.TAZ.TEAD.assigned <- unique(YAP.TAZ.TEAD.assigned) # # library(rtracklayer) # YAP.TAZ.TEAD.assigned.center <- # reCenterPeaks(YAP.TAZ.TEAD.assigned, width=1) # YAP.TAZ.TEAD.assigned.reCenter <- # reCenterPeaks(YAP.TAZ.TEAD.assigned, width=2000) # untar("GSE49651/GSE49651_RAW.tar") # # bams <- c("YAP", "TAZ", "TEAD4", "JUN") # library(BSgenome.Hsapiens.UCSC.hg19) # len <- seqlengths(Hsapiens) # write.table(len, file="hg19.genome.txt", # quote=FALSE, col.names=FALSE, # sep="\t") # sapply(bams, function(.ele){ # system(paste0("genomeCoverageBed -bga -split -ibam ", # .ele, "_rep1.rmdup.bam -g hg19.genome.txt > ", # .ele, "_rep1.bedGraph")) # }) # # sapply(c("GSM1204470_MDAMB231.H3K4me1_1.hg19.tags.bedGraph.gz", # "GSM1204472_MDAMB231.H3K4me3_1.hg19.tags.bedGraph.gz", # "GSM1204474_MDAMB231.H3K27Ac_1.hg19.tags.bedGraph.gz"), gunzip) # files <- c(YAP="YAP_rep1.bedGraph", # TAZ="TAZ_rep1.bedGraph", # TEAD4="TEAD4_rep1.bedGraph", # JUN="JUN_rep1.bedGraph", # H3K4me1="GSM1204470_MDAMB231.H3K4me1_1.hg19.tags.bedGraph", # H3K4me3="GSM1204472_MDAMB231.H3K4me3_1.hg19.tags.bedGraph", # H3K27Ac="GSM1204474_MDAMB231.H3K27Ac_1.hg19.tags.bedGraph") # export(YAP.TAZ.TEAD.assigned.reCenter, "tmp.bed", format="BED") # sapply(files, function(.ele) # system(paste("intersectBed -a", .ele, "-b tmp.bed >", # gsub("bedGraph", "sub.bedGraph", .ele)))) # unlink("tmp.bed") # library(trackViewer) # cvglists <- sapply(gsub("bedGraph", "sub.bedGraph", files), # importData, format="bedGraph") # names(cvglists) <- names(files) # sig <- featureAlignedSignal(cvglists, YAP.TAZ.TEAD.assigned.center, # upstream=1000, downstream=1000) ## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------- names(sig) heatmap <- featureAlignedHeatmap(sig, YAP.TAZ.TEAD.assigned.center, upstream=1000, downstream=1000, annoMcols=c("type"), margin=c(.1, .01, .15, .25)) ## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------- featureAlignedDistribution(sig, YAP.TAZ.TEAD.assigned.center, upstream=1000, downstream=1000, type="l") ## ------------------------------------------------------------------------ # library(BSgenome.Hsapiens.UCSC.hg19) # YAP.TAZ.TEAD4.uniq <- unique(YAP.TAZ.TEAD4) # YAP.TAZ.TEAD4.uniq <- YAP.TAZ.TEAD4.uniq[!is.na(YAP.TAZ.TEAD4.uniq$feature)] # strand(YAP.TAZ.TEAD4.uniq) <- as.character(YAP.TAZ.TEAD4.uniq$feature_strand) # seq <- getAllPeakSequence(YAP.TAZ.TEAD4.uniq, # upstream=0, downstream=0, # genome=Hsapiens) # freqs <- oligoFrequency(Hsapiens$chr1, MarkovOrder=3) # os <- oligoSummary(seq, oligoLength=6, MarkovOrder=3, # quickMotif=TRUE, freqs=freqs, revcomp=TRUE) # zscore <- sort(os$zscore) ## ----eval=TRUE, message=FALSE, warning=FALSE, fig.width=3.5, fig.height=3.5---- h <- hist(zscore, breaks=100) text(zscore[length(zscore)], max(h$counts)/10, labels=names(zscore[length(zscore)]), adj=1) ## ------------------------------------------------------------------------ # library(motifStack) # pfms <- mapply(function(.ele, id) # new("pfm", mat=.ele, name=paste("SAMPLE motif", id)), # os$motifs, 1:length(os$motifs)) ## ----eval=TRUE, message=FALSE, warning=FALSE, fig.width=4.5, fig.height=3.5---- motifStack(pfms) ## ------------------------------------------------------------------------ # ap1 <- GSE66081[["JUN"]] # seq.ap1 <- getAllPeakSequence(ap1, upstream=0, downstream=0, genome=Hsapiens) # os.ap1 <- oligoSummary(seq.ap1, oligoLength=7, MarkovOrder=3, # quickMotif=FALSE, freqs=freqs, revcomp=TRUE) # zscore.ap1 <- sort(os.ap1$zscore) ## ----eval=TRUE, fig.width=3.5, fig.height=3.5---------------------------- h.ap1 <- hist(zscore.ap1, breaks=100) text(zscore.ap1[length(zscore.ap1)], max(h.ap1$counts)/10, labels=names(zscore.ap1[length(zscore.ap1)]), adj=1) ## ------------------------------------------------------------------------ # library(GEOquery) # library(limma) # gset <- getGEO("GSE59232", GSEMatrix =TRUE, AnnotGPL=TRUE)[[1]] # pD <- pData(gset)[, c("title", "geo_accession")] # gset <- gset[, grepl("MDA-MB-231 cells", pD$title, fixed = TRUE)] # pD <- pData(gset)[, c("title", "geo_accession")] # ex <- exprs(gset) ## log2 transformed expression profile # fl <- do.call(rbind, # strsplit(sub("MDA-MB-231 cells ", # "", pD$title), ", ")) # sml <- sub("#.$", "", fl[, 1]) # design.table <- data.frame(group=sml, # batch=fl[, 1], # replicate=fl[, 2]) # design <- model.matrix(~ -1 + group + batch + replicate, # data = design.table) # colnames(design) <- sub("group", "", # make.names(colnames(design))) # fit <- lmFit(gset, design) # cont.matrix <- makeContrasts(contrasts = "siYAP.TAZ-siControl", # levels=design) # fit2 <- contrasts.fit(fit, cont.matrix) # fit2 <- eBayes(fit2) # tT <- topTable(fit2, adjust="fdr", number = nrow(fit2)) # # # library(simpIntLists) # interactionmap <- findInteractionList("human", "Official") # interactionmap <- lapply(interactionmap, function(.ele) # cbind(from=as.character(.ele$name), # to=as.character(.ele$interactors))) # interactionmap <- do.call(rbind, interactionmap) # library(GeneNetworkBuilder) # rootgene <- "TAZ" # TFbindingTable <- # cbind(from=rootgene, # to=unique(YAP.TAZ.TEAD4.nearest.anno$symbol)) # sifNetwork <- # buildNetwork(TFbindingTable=TFbindingTable, # interactionmap=interactionmap, level=2) # tT$symbols <- tT$Gene.symbol # tT <- tT[, c("ID", "logFC", "P.Value", "symbols")] # expressionData <- tT[!duplicated(tT[, "symbols"]), ] # cifNetwork <- # filterNetwork(rootgene=rootgene, sifNetwork=sifNetwork, # exprsData=expressionData, mergeBy="symbols", # miRNAlist=character(0), # tolerance=1, cutoffPVal=0.001, # cutoffLFC=1.5) # ## polish network # gR<-polishNetwork(cifNetwork) ## ----eval=TRUE----------------------------------------------------------- ## plot by Rgraphviz library(Rgraphviz) plotNetwork<-function(gR, layouttype="dot", ...){ if(!is(gR,"graphNEL")) stop("gR must be a graphNEL object") if(!(GeneNetworkBuilder:::inList(layouttype, c("dot", "neato", "twopi", "circo", "fdp")))){ stop("layouttype must be dot, neato, twopi, circo or fdp") } g1<-Rgraphviz::layoutGraph(gR, layoutType=layouttype, ...) nodeRenderInfo(g1)$col <- nodeRenderInfo(gR)$col nodeRenderInfo(g1)$fill <- nodeRenderInfo(gR)$fill renderGraph(g1) } plotNetwork(gR) ## ---- eval=TRUE---------------------------------------------------------- browseNetwork(gR) ## ----eval=TRUE----------------------------------------------------------- sessionInfo()