# This script was made by Joshua Balsters at Gorilla to organise data from the SART Gorilla Academy course. 16/02/2021 #load the packages with functions you'll need for data processing and analysis library(dplyr) library(stringr) library(ggstatsplot) # clear environment rm(list=ls()) # set working directory setwd("~/Dropbox (Cauldron)/Cauldron Team Folder/Sales and Marketing/Research Methods Course/Materials/SART/analysis/data_SART_exp") # load data files Fixed<-read.csv("data_exp_42207-v5_task-rara.csv",header=TRUE,sep=",") Random<-read.csv("data_exp_42207-v5_task-95m9.csv",header=TRUE,sep=",") task_data<-data.frame() # create empty variable for data task_dataF<-data.frame() # create empty variable for data task_dataR<-data.frame() # create empty variable for data for (j in 1:2) { # Loop across task nodes (Fixed and Random SART) if (j==1) { #in the first loop use the data from the Fixed SART data<-Fixed condition<-"Fixed" } else if (j==2) { #in the second loop use the data from the Random SART data<-Random condition<-"Random" } # find unique IDs from task.csv IDs<-unique(data$Participant.Private.ID) IDs<-IDs[!is.na(IDs)] # sometimes last value of IDs is NA so this removes that from vector ID_length<-length(IDs) # how many participants are there in the data for (i in 1:ID_length) { # loop through each participant # select an ID code from the list of participants and show it in Console currentID<-IDs[i] print(currentID) # filter out single participant SS_data<-filter(data,data$Participant.Private.ID==currentID) # make a note of which group the participant is from group<-SS_data$Participant.Starting.Group[1] # Create a pipe to 1) remove columns you don't need, 2) only keep Zone Type responses and 3) trial type task, 4) only keep the first response in each trial SS_data<-SS_data %>% select(Participant.Starting.Group,Participant.Private.ID,Screen.Name,Zone.Type,Reaction.Time,Response,display, type, number,Trial.Number) %>% filter(str_detect(Zone.Type, "response")) %>% filter(type=="task") %>% distinct(Trial.Number, .keep_all = TRUE) SS_data$Response <- as.factor(SS_data$Response) # change the data to be a factor type so you can use summary function to count the number of correct and incorrect trials blah<-summary(SS_data$Response) # if the participant made no correct/incorrect responses summary will give you NA rather than 0 if (is.na(blah["correct"])) { # if correct is NA make the value of OE 200 because they missed all 200 required responses to numbers 1-2,4-9 OE<-200 } else { #otherwise ommision errors = total trials (200) - correct responses OE<-200-blah["correct"] } if (is.na(blah["incorrect"])) { # if the participant made no incorrect responses make commission errors 0 CE<-0 } else { #otherwise make it the number of incorrect responses CE<-blah["incorrect"] } #add the results to either the Fixed or Random SART dataset if (j==1) { task_dataF[i,1:4]<-c(currentID,group,OE,CE) } else if (j==2) { task_dataR[i,1:4]<-c(currentID,group,OE,CE) } } } #label your columns so you know what they are names(task_dataF)[1] <- "ID" names(task_dataF)[2] <- "Group" names(task_dataF)[3] <- "FixedOE" names(task_dataF)[4] <- "FixedCE" names(task_dataR)[1] <- "ID" names(task_dataR)[2] <- "Group" names(task_dataR)[3] <- "RandomOE" names(task_dataR)[4] <- "RandomCE" # merge combines 2 datasets using a common key, in this case ID task_data <- merge(task_dataF,task_dataR,by="ID") # when you do stats in R then variables need to be correctly specified as numbers, characters, factors etc. Use str() to see what each variable type is # change these variables to be factors task_data$ID<-as.factor(task_data$ID) task_data$Group.x<-as.factor(task_data$Group.x) task_data$Group.y<-as.factor(task_data$Group.y) #change these variables to be numbers task_data$FixedOE<- as.integer(task_data$FixedOE) task_data$FixedCE<- as.integer(task_data$FixedCE) task_data$RandomOE<- as.integer(task_data$RandomOE) task_data$RandomCE<- as.integer(task_data$RandomCE) write.csv(task_data, file="data_rstats.csv") # save the data as a csv file for analysis in another package # Use the ggstatsplot between subject stats to test for differences between groups - see the documentation for more details # Fixed CE plot ggstatsplot::ggbetweenstats( data = task_data, x = Group.x, y = FixedCE, title = "Fixed SART commision errors by Group", type = "np" # Based on previous analysis I know the data fails Levene's test so I've set the analysis to use non-parametric stats, you can also change this to use parametric, robust, or Bayesian stats ) # Random CE plot ggstatsplot::ggbetweenstats( data = task_data, x = Group.x, y = RandomCE, title = "Random SART commision errors by Group", type = "np" )