https://data.mendeley.com/datasets/h3yhs5gy3w/1
dataset <- read.csv("dataset.csv", stringsAsFactors=FALSE)
### install required packages (if needed)
```

```
J. Intell. 2020, 8, 11
```

```
### remove # to make this code run
#install.packages(c("psych","mirt"))
### get results function
### includes also the effect size measures that were
### studied in the simulation and also more useful
### descriptive statistics
get_results <- function(data,keys="sim"){
 ### keys for scoring
 if(length(keys)==1){keys <- rep(0,ncol(data))}else{
    keys <- keys
 }
 ### quantiles for two ability groups
 p2 <- .5
 ### quantiles for five ability groups
 p5 <- c(.2,.4,.6,.8)
 ### load psych library
 require(psych)
 ### score all items
 scored <- score.multiple.choice(key=keys,data=data,score=F)
 ### ability groups
 abil2.c <- rep(0,nrow(scored))
 for(i in 1:length(p2)){
  if(i < length(p2)){
    abil2.c[rowSums(scored)>quantile(rowSums(scored),p=p2[i])
            & rowSums(scored)<=quantile(rowSums(scored),p=p2[i+1])] <- i
  }else{abil2.c[rowSums(scored)>quantile(rowSums(scored),p=p2[i])] <- i
  }
}
### ability groups
abil5.c <- rep(0,nrow(scored))
for(i in 1:length(p5)){
  if(i < length(p5)){
    abil5.c[rowSums(scored)>quantile(rowSums(scored),p=p5[i])
            & rowSums(scored)<=quantile(rowSums(scored),p=p5[i+1])] <- i
  }else{abil5.c[rowSums(scored)>quantile(rowSums(scored),p=p5[i])] <- i
  }
}
### list distractors with relative frequency < .05
rf05 <- list()
for(j in 1:ncol(data)){
  rf05[[j]] <- table(data[,j][data[,j]!=keys[j]])/length(data[,j])<.05
}
### general Cohen's w, 2 ability groups
chi_g2 <- list()
cw_g2 <- list()
tab_c2_l <- list()
zero_columns2 <- list()
for(k in 1:ncol(data)){
  tab_c2 <- matrix(table(data[,k][data[,k]!=keys[k]],abil2.c[data[,k]!=keys[k]])
   [!rf05[[k]],],ncol=length(unique(abil2.c[data[,k]!=keys[k]])))
```

```
J. Intell. 2020, 8, 11
```

```
zero_columns2[[k]] <- colSums(tab_c2)==0
  tab_c2 <- tab_c2[,colSums(tab_c2)>0]
  tab_c2_l[[k]]<-tab_c2
  if(sum(!rf05[[k]])>=2){chi_g2[[k]] <- chisq.test(tab_c2)}else{
    chi_g2[[k]] <- NA
  }
  ### Cohen's w - general
  if(sum(!rf05[[k]])>=2){cw_g2[[k]] <- sqrt(sum(((chi_g2[[k]]$observed/sum(tab_c2)
  -chi_g2[[k]]$expected/sum(tab_c2))ˆ2)/(chi_g2[[k]]$expected/sum(tab_c2))))}else{
    cw_g2[[k]] <- NA
  }
}
### general Cohen's w, 5 ability groups
chi_g5 <- list()
cw_g5 <- list()
tab_c5_l <- list()
zero_columns5 <- list()
for(k in 1:ncol(data)){
  tab_c5 <- matrix(table(data[,k][data[,k]!=keys[k]],abil5.c[data[,k]!=keys[k]])
  [!rf05[[k]],],ncol=length(unique(abil5.c[data[,k]!=keys[k]])))
  zero_columns5[[k]] <- colSums(tab_c5)==0
  tab_c5 <- tab_c5[,colSums(tab_c5)>0]
  tab_c5_l[[k]]<-tab_c5
  if(sum(!rf05[[k]])>=2){chi_g5[[k]] <- chisq.test(tab_c5)}else{
    chi_g5[[k]] <- NA
  }
  ### Cohen's w - general
  if(sum(!rf05[[k]])>=2){cw_g5[[k]] <- sqrt(sum(((chi_g5[[k]]$observed/sum(tab_c5)
  -chi_g5[[k]]$expected/sum(tab_c5))ˆ2)/(chi_g5[[k]]$expected/sum(tab_c5))))}else{
    cw_g5[[k]] <- NA
  }
}
### canonical correlation
can_cor <- list()
ncol_mmat <- list()
for(k in 1:ncol(data)){
  ncol_mmat[[k]] <- if(sum(!rf05[[k]])>=2)
  {ncol(model.matrix(rowSums(scored[scored[,k]==0,-1])~-1+factor(data[,k]
  [scored[,k]==0]))[,!rf05[[k]]])}else{
    NA
  }
  can_cor[[k]] <- if(sum(!rf05[[k]])>=2)
  {cancor(rowSums(scored[scored[,k]==0,-k]),model.matrix(rowSums(scored[scored[,k]
  ==0,-1])~-1+factor(data[,k][scored[,k]==0]))[,!rf05[[k]]])$cor}else{
    NA
  }
}
### point-biserial coefficient PB_DC
pb_dc <- list()
### Goodman-Kruskal gamma
```

```
gkg <- list()
gkg_tab <- list()
### start loop
for(v in 1:ncol(data)){
  pb_dc_d <- list()
  gkg_d <- list()
  gkg_tab_d <- list()
  ### function to calculate
  ### Goodman-Kruskal gamma
  ### taken from here:
  ### https://stat.ethz.ch/pipermail/r-help/2003-March/030835.html
  goodman <- function(x,y){
    Rx <- outer(x,x,function(u,v) sign(u-v))
    Ry <- outer(y,y,function(u,v) sign(u-v))
    S1 <- Rx*Ry
    return(sum(S1)/sum(abs(S1)))}
  ### start loop
  non_key <- unique(data[,v])[!unique(data[,v])%in%keys[v]]
  for(w in non_key){
    MDC <- mean(rowSums(scored)[data[,v]%in%c(keys[v],w)])
    SDC <- sd(rowSums(scored)[data[,v]%in%c(keys[v],w)])
    MD <- mean(rowSums(scored)[data[,v]%in%w])
    PD <- mean(data[,v]%in%w)
    PC <- mean(data[,v]%in%keys[v])
    ### r-PB_D
    ### r-PB_DC
    pb_dc_d[[w]] <- (MD-MDC)/SDC*sqrt(PD/PC)
    ### Goodman-Kruskal gamma
    score_other_items <- factor(rowSums(scored[,-v]))
    tab_gkg_d <- table(data[data[,v]%in%c(keys[v],w),v],score_other_items[data[,v]
    %in%c(keys[v],w)])
    ### exclude ability levels with zero frequency
    tab_gkg_d <- tab_gkg_d[,colSums(tab_gkg_d)>0]
    gkg_d[[w]] <- goodman(as.numeric(colnames(tab_gkg_d)),
                          tab_gkg_d[as.numeric(rownames(tab_gkg_d))%in%keys[v],]
                          /colSums(tab_gkg_d))
```

```
gkg_tab_d[[w]] <- tab_gkg_d
```
gkg = gkg,

}

*J. Intell.* **2020**, *8*, 11

```
}
  pb_dc[[v]] <- pb_dc_d
  gkg[[v]] <- gkg_d
  gkg_tab[[v]] <- gkg_tab_d
### return results
res <- list(rf05 = rf05,
```

```
tab_c2_l = tab_c2_l, zero_columns2 = zero_columns2,
tab_c5_l = tab_c5_l, zero_columns5 = zero_columns5,
cw_g2 = cw_g2, cw_g5 = cw_g5,
can_cor = can_cor,
pb_dc = pb_dc,
```

```
gkg_tab = gkg_tab,
            ncol_mmat = ncol_mmat)
return(res)
}
### frequencies of distractor usage
### including correct response
apply(dataset,2,table)
### load psych library
library(psych)
### score all items
scored <- score.multiple.choice(key=c(7,6,8,2,1,5,1,6,3,2,4,5),data=dataset,score=F)
### does choosing a certain other distractor
### imply better overall scores?
#
### run suggested distractor analysis
ms_res<-get_results(dataset,keys = c(7,6,8,2,1,5,1,6,3,2,4,5))
### show Results for Table 4
#
### show for which items the distractor choice frequencies were
### below 5%:
ms_res$rf05
### Items 1 to 5 have too many distractors with response frequencies
### below 5%.
#
### get 2PL parameters from mirt
library(mirt)
est_test2pl <- mirt(scored, 1, itemtype="2PL")
### show results
coef(est_test2pl)