## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(crop=NULL)

## ----eval=TRUE----------------------------------------------------------------
suppressWarnings(suppressMessages(require(netDx)))

## ----eval=TRUE----------------------------------------------------------------
suppressMessages(library(curatedTCGAData))

## ---- eval=TRUE---------------------------------------------------------------
brca <- suppressMessages(curatedTCGAData("BRCA",
                                         c("mRNAArray", 
										 "miRNASeqGene"),
                                         dry.run=FALSE, version="1.1.38"))

## ---- eval = TRUE-------------------------------------------------------------
summary(brca)

## ---- eval=TRUE---------------------------------------------------------------
source("prepare_data.R")
brca <- prepareData(brca,setBinary=TRUE)

## ----eval=TRUE----------------------------------------------------------------
pID <- colData(brca)$patientID
colData(brca)$ID <- pID

## ---- eval=TRUE---------------------------------------------------------------
expr <- assays(brca)
groupList <- list()
for (k in 1:length(expr)) {	# loop over all layers
	cur <- expr[[k]]; nm <- names(expr)[k]

	# all measures from this layer go into our single PSN
	groupList[[nm]] <- list(nm=rownames(cur)) 

	# assign same layer name as in input data
	names(groupList[[nm]])[1] <- nm;
}

## ----eval=TRUE----------------------------------------------------------------
sims <- list(a="pearsonCorr", b="pearsonCorr")
names(sims) <- names(groupList)

## ---- eval=TRUE---------------------------------------------------------------
brcaData <- dataList2List(brca, groupList)

## ---- eval=TRUE---------------------------------------------------------------
brcaList <- brcaData$assays
brcaList <- c(brcaList, list(brcaData$pheno))
names(brcaList)[3] <- "pheno"

## ---- eval=TRUE---------------------------------------------------------------
brca <- convertToMAE(brcaList)

## ----eval=TRUE----------------------------------------------------------------
nco <- round(parallel::detectCores()*0.75) # use 75% available cores
message(sprintf("Using %i of %i cores", nco, parallel::detectCores()))

outDir <- paste(tempdir(),"pred_output",sep=getFileSep()) # use absolute path
if (file.exists(outDir)) unlink(outDir,recursive=TRUE)
numSplits <- 2L

## ----eval=TRUE----------------------------------------------------------------
t0 <- Sys.time()
set.seed(42) # make results reproducible
model <- suppressMessages(
	buildPredictor(
		dataList=brca,			## your data
		groupList=groupList,	## grouping strategy
		sims = sims,
		outDir=outDir, 			## output directory
		trainProp=0.8,			## pct of samples to use to train model in each split
		numSplits=2L,			## number of train/test splits
		featSelCutoff=1L,		## threshold for calling something feature-selected
		featScoreMax=2L,		## max score for feature selection
		numCores=nco,			## set higher for parallelizing
		debugMode=FALSE,
		keepAllData=FALSE,	## set to TRUE for debugging or low-level files used by the dictor
		logging="none"
  ))
t1 <- Sys.time()
print(t1-t0)

## ----lab1-getresults,eval=TRUE------------------------------------------------
results <- getResults(model,unique(colData(brca)$STATUS),
                      featureSelCutoff=2L,featureSelPct=0.50)

## ---- eval=TRUE---------------------------------------------------------------
summary(results)

## ---- eval=TRUE---------------------------------------------------------------
results$performance

## ----  eval=TRUE--------------------------------------------------------------
results$featureScores

## ---- eval=TRUE---------------------------------------------------------------
confMat <- confusionMatrix(model)

## ---- eval=TRUE---------------------------------------------------------------
results$selectedFeatures

## ---- fig.width=8,fig.height=8, eval=TRUE-------------------------------------
## this call doesn't work in Rstudio; for now we've commented this out and saved the PSN file. 
psn <- getPSN(brca,groupList,sims = sims,selectedFeatures=results$selectedFeatures)
require(Rtsne)
tsne <- tSNEPlotter(
	psn$patientSimNetwork_unpruned, 
	colData(brca)
	)

## -----------------------------------------------------------------------------
sessionInfo()