The following is a script file containing all R code of all sections in this chapter.

The Available Data

Gene (Feature) Selection

Predicting Cytogenetic Abnormalities

Defining the Prediction Task

The Evaluation Metric

Comparing the Models

ALLb.wf <- function(form, train, test,
                    learner, learner.pars=NULL,
                    predictor="predict",predictor.pars=NULL,
                    featSel.meth = "s2", 
                    available.fsMethods=list(s1=list("all"),s2=list('rf',30),
                                             s3=list('varclus',30,50)),
                    .model=FALSE,
                    ...)
{
    ## The characteristics of the selected feature selection method
    fs.meth <- available.fsMethods[[featSel.meth]] 
    
    ## The target variable
    tgt <- as.character(form[[2]])
    tgtCol <- which(colnames(train)==tgt)

    ## Anova filtering  
    f <- Anova(train[,tgt],p=0.01)
    ff <- filterfun(f)
    genes <- genefilter(t(train[,-tgtCol]),ff)
    genes <- names(genes)[genes]
    train <- train[,c(tgt,genes)]
    test <- test[,c(tgt,genes)]
    tgtCol <- 1

    ## Specific filtering 
    if (fs.meth[[1]]=='varclus') {
      pred <- varsEnsemble(tgt,train,test,fs.meth,
                            learner,learner.pars,
                            predictor,predictor.pars,
                            list(...))

    } else {
      if (fs.meth[[1]]=='rf') {
        require(randomForest,quietly=TRUE)
        rf <- randomForest(form,train,importance=TRUE)
        imp <- importance(rf)
        rf.genes <- rownames(imp)[order(imp[,"MeanDecreaseAccuracy"], 
                                        decreasing = TRUE)[1:fs.meth[[2]]]]
        train <- train[,c(tgt,rf.genes)]
        test <- test[,c(tgt,rf.genes)]
      }

      if (learner == 'knn') 
        pred <- kNN(form,train,test,
                    stand.stats=list(rowMedians(t(as.matrix(train[,-tgtCol]))),
                        rowIQRs(t(as.matrix(train[,-tgtCol])))),
                    ...)
      else {
        model <- do.call(learner,c(list(form,train),learner.pars))
        pred <- do.call(predictor,c(list(model,test),predictor.pars))
      }

    }

    return(list(trues=responseValues(form,test), preds=pred, 
                model=if (.model && learner!="knn") model else NULL))

}

Note: The myALL.Rdata file loaded below can be obtained at the section Other Information on the top menus of this web page.