#This scripts are based on the supplemental material of the paper:
# Peña-Araya, V., Pietriga, E., & Bezerianos, A. (2019). A Comparison of Visualizations for Identifying Correlation over Space and Time. IEEE transactions on visualization and computer graphics.

library(plyr)
library(ggplot2)

source("CI-Functions-Bonferroni.R")

data_pilot <- read.csv("pilot.csv")
data <- subset(data_pilot, isTraining==0)
data$feature <- revalue(data$feature, c("Center"="Center", "Source"="Repelling Node", "Sink"="Attracting Node", "SourceVortex"="Repelling Focus", "SinkVortex"="Attracting Focus", "Saddle"="Saddle"))
data_time<-subset(data, correctCount!=0)


features <- c("Center", "Repelling Node", "Attracting Node", "Repelling Focus", "Attracting Focus", "Saddle")
data_time$meanTime = data_time$time/data_time$correctCount

summary_time <- summarySEwithin(data_time, measurevar="meanTime", withinvars=c("technique", "feature"), idvar="participant")
ggplot(summary_time, aes(x=feature, y=meanTime, fill=technique)) + geom_bar(position=position_dodge(), stat="identity", colour="gray50") + theme_bw(base_size = 30) + geom_errorbar(aes(ymin=meanTime-ci, ymax=meanTime+ci), width=.2, position=position_dodge(.9)) + ylim(-0.5, 16) + scale_fill_manual(values=cbPalette)

aggregated_time <- ddply(data_time,
                            c("participant","technique", "feature"),
                            summarise,
                            mean_time = mean(meanTime)
                            
)

# order for the transpose
elements <- aggregated_time
elements <- elements [ order(elements$participant, elements$technique), ]
statstable_time <- ddply(elements,
                         c("participant","technique"),
                         summarise,
                         time=mean(mean_time)
)
elements <- statstable_time

#
elements <- reshape(elements, timevar="technique", idvar=c("participant"), direction="wide")
colnames(elements) <- gsub("time.", "", colnames(elements))


# drop columns with N/A
elements <- na.omit(elements)

data <- elements

techniqueA <- bootstrapMeanCI(data$FROLIC)
techniqueB <- bootstrapMeanCI(data$IBFV)
techniqueC <- bootstrapMeanCI(data$PARTICLES)

analysisData <- c()

analysisData$name <- c("PS","IBFV","OLIC")
analysisData$pointEstimate <- c(techniqueC[1], techniqueB[1], techniqueA[1])
analysisData$ci.max <- c(techniqueC[3], techniqueB[3], techniqueA[3])
analysisData$ci.min <- c(techniqueC[2], techniqueB[2], techniqueA[2])

datatoprint <- data.frame(factor(analysisData$name),analysisData$pointEstimate, analysisData$ci.min, analysisData$ci.max)
colnames(datatoprint) <- c("Technique", "mean_time", "lowerBound_CI", "upperBound_CI ") #We use the name mean_time for the value of the mean even though it's not a time, it's just to parse the data for the plot

path  = paste0("plots/")
filename = paste0("time_means_task_all")

write.table(datatoprint, paste0(path,"printed_",filename,".txt",seq=""), sep=",",row.names=FALSE)

barChart(datatoprint, analysisData$name, nbTechs = 3, ymin = 0, ymax = 8, mycolor = "steelblue3", "", "")

diffBA = bootstrapMeanCI_corr(data$IBFV - data$FROLIC, 3)
diffCB = bootstrapMeanCI_corr(data$IBFV - data$PARTICLES, 3)
diffCA = bootstrapMeanCI_corr(data$FROLIC - data$PARTICLES, 3)


analysisData <- c()
analysisData$name <- c("IBFV-OLIC","IBFV-PS","OLIC-PS") # Symbol name has been changed in paper to Glyph
analysisData$pointEstimate <- c(diffBA[1], diffCB[1], diffCA[1])
analysisData$ci.max <- c(diffBA[6], diffCB[6], diffCA[6])
analysisData$ci.min <- c(diffBA[5], diffCB[5], diffCA[5])
analysisData$level <- c(diffBA[4], diffCB[4], diffCA[4])
analysisData$ci_corr.max <- c(diffBA[6], diffCB[6], diffCA[6])
analysisData$ci_corr.min <- c(diffBA[5], diffCB[5], diffCA[5])

datatoprint <- data.frame(factor(analysisData$name), analysisData$pointEstimate, analysisData$ci.max, analysisData$ci.min, analysisData$level, analysisData$ci_corr.max, analysisData$ci_corr.min)
colnames(datatoprint) <- c("technique", "mean_time", "lowerBound_CI", "upperBound_CI", "corrected_CI", "lowerBound_CI_corr", "upperBound_CI_corr") #We use the name mean_time for the value of the mean even though it's not a time, it's just to parse the data for the plot
filenamediff = paste(featureName, "timediff", sep="_")

barChart(datatoprint, analysisData$name, nbTechs = 3, ymin = -2, ymax = 2, mycolor = "steelblue3", "", "")

#######


plotFeature<- function(featureName) {
  elements <- aggregated_time
  elements <- subset(elements, feature==featureName)
  elements <- elements [ order(elements$participant, elements$technique), ]
  statstable_time <- ddply(elements,
                           c("participant","technique"),
                           summarise,
                           time=mean(mean_time)
  )
  elements <- statstable_time
  
  print(summary(statstable_time))
  #
  elements <- reshape(elements, timevar="technique", idvar=c("participant"), direction="wide")
  colnames(elements) <- gsub("time.", "", colnames(elements))
  
  
  # drop columns with N/A
  elements <- na.omit(elements)
  
  data <- elements
  
  techniqueA <- bootstrapMeanCI(data$FROLIC)
  techniqueB <- bootstrapMeanCI(data$IBFV)
  techniqueC <- bootstrapMeanCI(data$PARTICLES)
  
  analysisData <- c()
  
  analysisData$name <- c("PS","IBFV","OLIC")
  analysisData$pointEstimate <- c(techniqueC[1], techniqueB[1], techniqueA[1])
  analysisData$ci.max <- c(techniqueC[3], techniqueB[3], techniqueA[3])
  analysisData$ci.min <- c(techniqueC[2], techniqueB[2], techniqueA[2])
  
  datatoprint <- data.frame(factor(analysisData$name),analysisData$pointEstimate, analysisData$ci.min, analysisData$ci.max)
  colnames(datatoprint) <- c("Technique", "mean_time", "lowerBound_CI", "upperBound_CI ") #We use the name mean_time for the value of the mean even though it's not a time, it's just to parse the data for the plot
  
  path  = paste0("plots/")
  filename = paste(featureName, "time", sep="_")
  
  
  barChart(datatoprint, analysisData$name, nbTechs = 3, ymin = 0, ymax = 15, mycolor = "steelblue3", "", "")
  ggsave(paste(filename, "pdf", sep="."), device = cairo_pdf,  width=3.5, height=2)
  
  # CIs with adapted alpha value for multiple comparisons not needed here
  diffBA = bootstrapMeanCI_corr(data$IBFV - data$FROLIC, 18)
  diffCB = bootstrapMeanCI_corr(data$IBFV - data$PARTICLES, 18)
  diffCA = bootstrapMeanCI_corr(data$FROLIC - data$PARTICLES, 18)
  
  analysisData <- c()
  analysisData$name <- c("IBFV-OLIC","IBFV-PS","OLIC-PS") 
  analysisData$pointEstimate <- c(diffBA[1], diffCB[1], diffCA[1])
  analysisData$ci.max <- c(diffBA[6], diffCB[6], diffCA[6])
  analysisData$ci.min <- c(diffBA[5], diffCB[5], diffCA[5])
  analysisData$level <- c(diffBA[4], diffCB[4], diffCA[4])
  analysisData$ci_corr.max <- c(diffBA[6], diffCB[6], diffCA[6])
  analysisData$ci_corr.min <- c(diffBA[5], diffCB[5], diffCA[5])
  
  datatoprint <- data.frame(factor(analysisData$name), analysisData$pointEstimate, analysisData$ci.max, analysisData$ci.min, analysisData$level, analysisData$ci_corr.max, analysisData$ci_corr.min)
  colnames(datatoprint) <- c("technique", "mean_time", "lowerBound_CI", "upperBound_CI", "corrected_CI", "lowerBound_CI_corr", "upperBound_CI_corr") #We use the name mean_time for the value of the mean even though it's not a time, it's just to parse the data for the plot
  filenamediff = paste(featureName, "timediff", sep="_")
  
  barChart(datatoprint, analysisData$name, nbTechs = 3, ymin = -13, ymax = 13, mycolor = "steelblue3", "", "")
  ggsave(paste(filenamediff, "pdf", sep="."), width=3.5, height=2, device = cairo_pdf)
}

plotFeature("Saddle")
plotFeature("Center")
plotFeature("Repelling Node")
plotFeature("Repelling Focus")
plotFeature("Attracting Focus")
plotFeature("Attracting Node")