In : PLOS ONE, (13), 3, pp. e0194317, 2018
To reproduce the results shown in the paper (both Figures and Tables) you need to have a current working installation of R (www.r-project.org).
On top of this installation of R, you also need to install a series of extra packages, which we will load below.
The following code snipet includes basic settings that you need to execute before proceding with replicating our results. Among these settings you have the loading of the necessary extra packages (that you must first install before executing this code), and also the loading of a script file that you can download here.
## Font size used on graphs
## txtSz <- 14 # this was used for the PDF figures of the paper
txtSz <- 10
## required packages
library(knitr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(performanceEstimation)
library(tidyr)
## Loading a script containing some auxiliary functions
source("auxFuncs.R")
You also need to download the Rdata files that contain the R objects with the results of the estimation procedures we compare in the paper. Download the two following files:
In summary, you should now have on your current R working directory the following files:
library()
function.The following code produces the results show in Table 4 of the paper. This code will show the table in this page, but for the paper we generated a Latex version of the table, whose code is commented in the code.
metr <- "Alpha"
## Loading the results of the selected metric
loadAndPrepare(metr)
medScores <- group_by(res,Strat2,Lang) %>%
summarise(medDif = median(dif2Gold)) %>%
ungroup()
tab4 <- spread(medScores,Strat2,medDif)
tab4[,-1] <- round(tab4[,-1],3)
## Last row with medians
tab4 <- rbind(tab4, c("Median",round(apply(tab4[,-1],2,median),3)))
## Table 4
kable(tab4)
Lang | xval(9:1, strat, block) | xval(9:1, no-strat, block) | xval(9:1, strat, rand) | seq(9:1, 20, equi) | seq(9:1, 10, equi) | seq(2:1, 10, semi-equi) |
---|---|---|---|---|---|---|
alb | 0.052 | 0.036 | 0.206 | 0.001 | 0.001 | 0.001 |
bul | 0.009 | 0.013 | 0.046 | -0.019 | -0.025 | -0.043 |
eng | -0.016 | -0.017 | -0.01 | -0.04 | -0.042 | -0.039 |
ger | 0.037 | 0.049 | 0.059 | 0.009 | 0.01 | 0.001 |
hun | 0.009 | 0.013 | 0.025 | -0.011 | -0.007 | -0.007 |
pol | 0.011 | 0.016 | 0.054 | -0.02 | -0.017 | -0.031 |
por | -0.048 | -0.048 | -0.015 | -0.04 | -0.045 | -0.085 |
rus | 0.008 | 0.008 | 0.029 | -0.027 | -0.029 | -0.045 |
scb | -0.046 | -0.051 | 0.026 | -0.047 | -0.043 | -0.069 |
slk | 0.018 | 0.015 | 0.055 | -0.025 | -0.023 | -0.039 |
slv | 0.003 | -0.004 | 0.04 | -0.029 | -0.026 | -0.031 |
spa | -0.008 | 0.031 | 0.07 | 0.012 | 0.011 | -0.011 |
swe | 0.055 | 0.057 | 0.106 | 0.011 | 0.006 | -0.028 |
Median | 0.009 | 0.013 | 0.046 | -0.02 | -0.023 | -0.031 |
## Latex version of the table (uncomment next line to get the latex code)
## Hmisc:::latex(tab4,file="",rowname=NULL)
Similar code, now for Table 5.
metr <- "F"
## Loading the results of the selected metric
loadAndPrepare(metr)
medScores <- group_by(res,Strat2,Lang) %>%
summarise(medDif = median(dif2Gold)) %>% ungroup()
tab5 <- spread(medScores,Strat2,medDif)
tab5[,-1] <- round(tab5[,-1],3)
## Last row with medians
tab5 <- rbind(tab5, c("Median",round(apply(tab5[,-1],2,median),3)))
kable(tab5)
Lang | xval(9:1, strat, block) | xval(9:1, no-strat, block) | xval(9:1, strat, rand) | seq(9:1, 20, equi) | seq(9:1, 10, equi) | seq(2:1, 10, semi-equi) |
---|---|---|---|---|---|---|
alb | 0.026 | 0.016 | 0.137 | -0.014 | -0.007 | -0.009 |
bul | 0.02 | 0.024 | 0.047 | 0.003 | -0.002 | -0.019 |
eng | -0.019 | -0.02 | -0.015 | -0.027 | -0.027 | -0.028 |
ger | 0.056 | 0.058 | 0.072 | 0.025 | 0.028 | 0.014 |
hun | 0.022 | 0.022 | 0.03 | -0.006 | -0.009 | -0.005 |
pol | 0.013 | 0.02 | 0.044 | -0.001 | 0 | -0.007 |
por | -0.05 | -0.045 | -0.04 | -0.049 | -0.056 | -0.092 |
rus | 0.008 | 0.01 | 0.025 | -0.019 | -0.018 | -0.021 |
scb | -0.034 | -0.037 | 0 | -0.03 | -0.032 | -0.05 |
slk | 0.005 | 0.008 | 0.025 | -0.013 | -0.015 | -0.013 |
slv | 0.003 | 0 | 0.029 | -0.022 | -0.026 | -0.032 |
spa | -0.001 | 0.024 | 0.06 | 0.007 | 0.01 | 0.012 |
swe | 0.03 | 0.037 | 0.071 | 0.008 | 0.006 | -0.011 |
Median | 0.008 | 0.016 | 0.03 | -0.013 | -0.009 | -0.013 |
## Latex version of the table (uncomment next line to get the latex code)
## Hmisc:::latex(tab5,file="",rowname=NULL)
The following code reproduces the graph shown in Figure 3.
metr <- "Alpha"
## Loading the results of the selected metric
loadAndPrepare(metr)
## dif2Gold is = Estimate - GoldStandard
g <- ggplot(res, aes(x=Lang,y=dif2Gold,col=Lang)) +
geom_boxplot() + geom_hline(yintercept=0, alpha=0.6) +
facet_wrap(~ Strat2) +
guides(col=FALSE) +
xlab("") +
ylab("Distribution of the Difference to Gold Standard") +
theme(text=element_text(size=txtSz), axis.text.x=element_text(angle=90,vjust=0.3))
## THe graph of figure 3
g
The following code reproduces the graph shown in Figure 4.
metr <- "Alpha"
## Loading the results of the selected metric
loadAndPrepare(metr)
## dif2Gold is = Estimate - GoldStandard
g <- ggplot(res, aes(x=Strat2,y=dif2Gold)) +
geom_boxplot() + geom_hline(yintercept=0, col= "red", alpha=0.5, lwd=1.1) +
guides(col=FALSE) + ylab("Distribution of the Difference to Gold Standard") +
xlab("") +
theme(text=element_text(size=txtSz))
# theme(text=element_text(size=txtSz), axis.text.x=element_text(angle=25,vjust=0.3))
## Graph on figure 4
g
The following code reproduces the graph shown in Figure 5.
metr <- "F"
## Loading the results of the selected metric
loadAndPrepare(metr)
## dif2Gold is = Estimate - GoldStandard
g <- ggplot(res, aes(x=Strat2,y=dif2Gold)) +
geom_boxplot() + geom_hline(yintercept=0, col= "red", alpha=0.5, lwd=1.1) +
guides(col=FALSE) + ylab("Distribution of the Difference to Gold Standard") +
xlab("") +
theme(text=element_text(size=txtSz))
# theme(text=element_text(size=txtSz), axis.text.x=element_text(angle=25,vjust=0.3))
## Graph on figure 5
g
The following code reproduces the graph shown in Figure 6.
metr <- "Alpha"
## Loading the results of the selected metric
loadAndPrepare(metr)
thrsh1 <- 5
thrsh2 <- 30
d1 <- select(res,Strat2,Lang,Iter,rat2Gold) %>%
mutate(percDiff=100*(rat2Gold-1),
ok = between(percDiff,-thrsh1, thrsh1)) %>%
filter(ok) %>%
group_by(Strat2,Lang) %>% tally()
d2 <- select(res,Strat2,Lang,Iter,rat2Gold) %>%
mutate(percDiff=100*(rat2Gold-1),
bad = !between(percDiff,-thrsh2, thrsh2)) %>%
filter(bad) %>%
group_by(Strat2,Lang) %>% tally()
d <- select(res,Strat2,Lang,Iter,rat2Gold) %>%
mutate(percDiff=100*(rat2Gold-1)) %>%
group_by(Strat2,Lang) %>% tally()
tots <- left_join(d,d1,by=c("Strat2","Lang")) %>%
rename("Total" = n.x, "TotalOK" = n.y) %>%
left_join(d2,by=c("Strat2","Lang")) %>%
rename("TotalBad" = n)
tots[is.na(tots)] <- 0
percs <- mutate(tots, OK=100*TotalOK/Total,
Bad=100*TotalBad/Total,
Rest=100-OK-Bad) %>%
select(Strat2,Lang,OK,Bad,Rest)
dt <- gather(percs,Type,Perc,OK:Rest)
dt$Type <- factor(dt$Type,levels=c("Bad","Rest","OK"),
labels=c(paste(">",thrsh2),paste("[",thrsh1,",",thrsh2,"]"),paste("<",thrsh1)))
g <- ggplot(dt,aes(x=Lang,y=Perc,fill=Type)) +
geom_col(position="fill") +
facet_wrap(~ Strat2) +
ylab(paste("Proportion of Iterations with Difference to Gold Standard between",thrsh1,"and",thrsh2))
xlab("") +
theme(text=element_text(size=txtSz), axis.text.x=element_text(angle=90,vjust=0.3))
## NULL
## Graph on screen
g
The following code reproduces the graph shown in Figure 7.
metr <- "Alpha"
## Loading the results of the selected metric
loadAndPrepare(metr)
thrsh1 <- 5
thrsh2 <- 30
d1 <- select(res,Strat2,Iter,rat2Gold) %>%
mutate(percDiff=100*(rat2Gold-1),
ok = between(percDiff,-thrsh1, thrsh1)) %>%
filter(ok) %>%
group_by(Strat2) %>% tally()
d2 <- select(res,Strat2,Iter,rat2Gold) %>%
mutate(percDiff=100*(rat2Gold-1),
bad = !between(percDiff,-thrsh2, thrsh2)) %>%
filter(bad) %>%
group_by(Strat2) %>% tally()
d <- select(res,Strat2,Iter,rat2Gold) %>%
mutate(percDiff=100*(rat2Gold-1)) %>%
group_by(Strat2) %>% tally()
tots <- left_join(d,d1,by=c("Strat2")) %>%
rename("Total" = n.x, "TotalOK" = n.y) %>%
left_join(d2,by=c("Strat2")) %>%
rename("TotalBad" = n)
tots[is.na(tots)] <- 0
percs <- mutate(tots, OK=100*TotalOK/Total,
Bad=100*TotalBad/Total,
Rest=100-OK-Bad) %>%
select(Strat2,OK,Bad,Rest)
dt <- gather(percs,Type,Perc,OK:Rest)
dt$Type <- factor(dt$Type,levels=c("Bad","Rest","OK"),
labels=c(paste(">",thrsh2),paste("[",thrsh1,",",thrsh2,"]"),paste("<",thrsh1)))
g <- ggplot(dt,aes(x=Strat2,y=Perc,fill=Type)) +
geom_col(position="fill") +
ylab(paste("Proportion of Differences to Gold Standard between",thrsh1,"and",thrsh2)) +
xlab("") +
theme(text=element_text(size=txtSz), axis.text.x=element_text(angle=25,vjust=0.3))
## Graph on screen
g
The following code reproduces the graph shown in Figure 8.
metr <- "F"
## Loading the results of the selected metric
loadAndPrepare(metr)
thrsh1 <- 5
thrsh2 <- 30
d1 <- select(res,Strat2,Iter,rat2Gold) %>%
mutate(percDiff=100*(rat2Gold-1),
ok = between(percDiff,-thrsh1, thrsh1)) %>%
filter(ok) %>%
group_by(Strat2) %>% tally()
d2 <- select(res,Strat2,Iter,rat2Gold) %>%
mutate(percDiff=100*(rat2Gold-1),
bad = !between(percDiff,-thrsh2, thrsh2)) %>%
filter(bad) %>%
group_by(Strat2) %>% tally()
d <- select(res,Strat2,Iter,rat2Gold) %>%
mutate(percDiff=100*(rat2Gold-1)) %>%
group_by(Strat2) %>% tally()
tots <- left_join(d,d1,by=c("Strat2")) %>%
rename("Total" = n.x, "TotalOK" = n.y) %>%
left_join(d2,by=c("Strat2")) %>%
rename("TotalBad" = n)
tots[is.na(tots)] <- 0
percs <- mutate(tots, OK=100*TotalOK/Total,
Bad=100*TotalBad/Total,
Rest=100-OK-Bad) %>%
select(Strat2,OK,Bad,Rest)
dt <- gather(percs,Type,Perc,OK:Rest)
dt$Type <- factor(dt$Type,levels=c("Bad","Rest","OK"),
labels=c(paste(">",thrsh2),paste("[",thrsh1,",",thrsh2,"]"),paste("<",thrsh1)))
g <- ggplot(dt,aes(x=Strat2,y=Perc,fill=Type)) +
geom_col(position="fill") +
ylab(paste("Proportion of Differences to Gold Standard between",thrsh1,"and",thrsh2)) +
xlab("") +
theme(text=element_text(size=txtSz), axis.text.x=element_text(angle=25,vjust=0.3))
## Graph on screen
g
The following code reproduces the graph shown in Figure 9.
metr <- "Alpha"
## Loading the results of the selected metric
loadAndPrepare(metr)
## The source raw data for the tests (the results on each iteration)
rawScores <- mutate(res, absDif=abs(dif2Gold)) %>% select(Strat2, Lang, absDif)
## Now building a performanceEstimation ComparisonResults object
ds <- unique(rawScores$Lang) # different languages correspond to diff data sets
nds <- length(ds)
tasks <- sapply(ds, function(d) PredTask(x ~.,data.frame(x=1:2,y=2:3),d))
al <- unique(as.character(rawScores$Strat2)) # different strategies correspond to diff algorithms
nal <- length(al)
workflows <- sapply(al, function(a) Workflow(wfID=a))
estTask <- EstimationTask(metrics=metr)
peRes <- vector("list",nds)
names(peRes) <- ds
for(i in 1:nds) {
peRes[[i]] <- vector("list",nal)
names(peRes[[i]]) <- al
}
for(d in ds) {
for(a in al) {
peRes[[d]][[a]] <- EstimationResults(tasks[[d]],workflows[[a]],estTask,
as.matrix(filter(rawScores,Lang==d,Strat2==a) %>% select(absDif)),list())
}
}
obj <- ComparisonResults(peRes)
## Now onto the tests
comp <- pairedComparisons(obj, baseline="xval(9:1, strat, rand)")
CDdiagram.Nemenyi(comp)
The following code reproduces the graph shown in Figure 10.
metr <- "F"
## Loading the results of the selected metric
loadAndPrepare(metr)
## The source raw data for the tests (the results on each iteration)
rawScores <- mutate(res, absDif=abs(dif2Gold)) %>% select(Strat2, Lang, absDif)
## Now building a performanceEstimation ComparisonResults object
ds <- unique(rawScores$Lang) # different languages correspond to diff data sets
nds <- length(ds)
tasks <- sapply(ds, function(d) PredTask(x ~.,data.frame(x=1:2,y=2:3),d))
al <- unique(as.character(rawScores$Strat2)) # different strategies correspond to diff algorithms
nal <- length(al)
workflows <- sapply(al, function(a) Workflow(wfID=a))
estTask <- EstimationTask(metrics=metr)
peRes <- vector("list",nds)
names(peRes) <- ds
for(i in 1:nds) {
peRes[[i]] <- vector("list",nal)
names(peRes[[i]]) <- al
}
for(d in ds) {
for(a in al) {
peRes[[d]][[a]] <- EstimationResults(tasks[[d]],workflows[[a]],estTask,
as.matrix(filter(rawScores,Lang==d,Strat2==a) %>% select(absDif)),list())
}
}
obj <- ComparisonResults(peRes)
## Now onto the tests
comp <- pairedComparisons(obj, baseline="xval(9:1, strat, rand)")
CDdiagram.Nemenyi(comp)
The following code carries out the Wilcoxon signed rank tests supporting the graphs shown in Figure 11.
First for the F metric
metr <- "F"
## Loading the results of the selected metric
loadAndPrepare(metr)
## The source raw data for the tests (the results on each iteration)
rawScores <- mutate(res, absDif=abs(dif2Gold)) %>% select(Strat2, Lang, absDif)
## Now building a performanceEstimation ComparisonResults object
task <- PredTask(x ~.,data.frame(x=1:2,y=2:3)) # a fake task
al <- unique(as.character(rawScores$Strat2)) # different strategies correspond to diff algorithms
nal <- length(al)
workflows <- sapply(al, function(a) Workflow(wfID=a))
estTask <- EstimationTask(metrics="alpha")
## One single data set
nds <- 1
peRes3 <- vector("list",nds)
names(peRes3) <- "Langs"
for(i in 1:nds) {
peRes3[[i]] <- vector("list",nal)
names(peRes3[[i]]) <- al
}
avgScores <- group_by(rawScores,Strat2,Lang) %>% summarize(avgAbsDif = mean(absDif)) %>% ungroup()
for(a in al) {
peRes3[[1]][[a]] <- EstimationResults(task,workflows[[a]],estTask, as.matrix(filter(avgScores,Strat2==a) %>% select(avgAbsDif)),list())
}
objW <- ComparisonResults(peRes3)
## Now onto the tests
## Using xval(9:1, strat, block) as baseline
compX <- pairedComparisons(objW, baseline="xval(9:1, strat, block)")
compX$alpha$WilcoxonSignedRank.test
## , , Langs
##
## MedScore DiffMedScores p.value
## xval(9:1, strat, block) 0.02740000 NA NA
## xval(9:1, strat, rand) 0.05042105 -0.02302105 0.01049805
## xval(9:1, no-strat, block) 0.02625000 0.00115000 0.02661133
## seq(2:1, 10, semi-equi) 0.03200000 -0.00460000 0.24389648
## seq(9:1, 20, equi) 0.03050000 -0.00310000 0.73535156
## seq(9:1, 10, equi) 0.03200000 -0.00460000 0.41430664
## Using xval(9:1, strat, block) as baseline
compS <- pairedComparisons(objW, baseline="seq(9:1, 20, equi)")
compS$alpha$WilcoxonSignedRank.test
## , , Langs
##
## MedScore DiffMedScores p.value
## seq(9:1, 20, equi) 0.03050000 NA NA
## xval(9:1, strat, block) 0.02740000 0.00310000 0.73535156
## xval(9:1, strat, rand) 0.05042105 -0.01992105 0.02148438
## xval(9:1, no-strat, block) 0.02625000 0.00425000 0.68481445
## seq(2:1, 10, semi-equi) 0.03200000 -0.00150000 0.05901713
## seq(9:1, 10, equi) 0.03200000 -0.00150000 0.02536986
Now for the Alpha metric
metr <- "Alpha"
## Loading the results of the selected metric
loadAndPrepare(metr)
## The source raw data for the tests (the results on each iteration)
rawScores <- mutate(res, absDif=abs(dif2Gold)) %>% select(Strat2, Lang, absDif)
## Now building a performanceEstimation ComparisonResults object
task <- PredTask(x ~.,data.frame(x=1:2,y=2:3)) # a fake task
al <- unique(as.character(rawScores$Strat2)) # different strategies correspond to diff algorithms
nal <- length(al)
workflows <- sapply(al, function(a) Workflow(wfID=a))
estTask <- EstimationTask(metrics="alpha")
## One single data set
nds <- 1
peRes3 <- vector("list",nds)
names(peRes3) <- "Langs"
for(i in 1:nds) {
peRes3[[i]] <- vector("list",nal)
names(peRes3[[i]]) <- al
}
avgScores <- group_by(rawScores,Strat2,Lang) %>% summarize(avgAbsDif = mean(absDif)) %>% ungroup()
for(a in al) {
peRes3[[1]][[a]] <- EstimationResults(task,workflows[[a]],estTask, as.matrix(filter(avgScores,Strat2==a) %>% select(avgAbsDif)),list())
}
objW <- ComparisonResults(peRes3)
## Now onto the tests
## Using xval(9:1, strat, block) as baseline
compX <- pairedComparisons(objW, baseline="xval(9:1, strat, block)")
compX$alpha$WilcoxonSignedRank.test
## , , Langs
##
## MedScore DiffMedScores p.value
## xval(9:1, strat, block) 0.03327273 NA NA
## xval(9:1, strat, rand) 0.06184211 -0.028569378 0.001708984
## xval(9:1, no-strat, block) 0.03454545 -0.001272727 0.094238281
## seq(2:1, 10, semi-equi) 0.03533333 -0.002060606 0.339599609
## seq(9:1, 20, equi) 0.03140000 0.001872727 0.892578125
## seq(9:1, 10, equi) 0.02980000 0.003472727 0.497314453
## Using xval(9:1, strat, block) as baseline
compS <- pairedComparisons(objW, baseline="seq(9:1, 20, equi)")
compS$alpha$WilcoxonSignedRank.test
## , , Langs
##
## MedScore DiffMedScores p.value
## seq(9:1, 20, equi) 0.03140000 NA NA
## xval(9:1, strat, block) 0.03327273 -0.001872727 0.892578125
## xval(9:1, strat, rand) 0.06184211 -0.030442105 0.013427734
## xval(9:1, no-strat, block) 0.03454545 -0.003145455 1.000000000
## seq(2:1, 10, semi-equi) 0.03533333 -0.003933333 0.006103516
## seq(9:1, 10, equi) 0.02980000 0.001600000 0.068115234