4 Prepare behavioral data

As a first step, we create a design table for each participant that collects information about the assignment of events to virtual days and when the events occur with respect to virtual time and real time (seconds).

# define the function
virtem_behavior_prepare_design_tbl <- function(subjects = c("031", "032")){

  # load design file with virtual time
  fname <- here("data", "behavior", "design", "daysTime.txt")
  virtual_time <- read.csv(fname, header = FALSE)
  virtual_time <- c(t(as.matrix(virtual_time)))
  
  # load design file with times of events in actual time in seconds (after day start) --> real time
  fname <- here("data", "behavior", "design", "daysTimeS.txt")
  real_time <- read.csv(fname, header = FALSE)
  real_time <- c(t(as.matrix(real_time)))
  
  for (i_sub in 1:length(subjects)){
    sub_id <- subjects[i_sub]
    
    # initialize the tibble
    design_tbl <- tibble(sub_id = rep(sub_id, n_days * n_events_day), 
                         day=c(rep(1,5),rep(2,5),rep(3,5),rep(4,5)),
                         event=rep(1:n_events_day,4), virtual_time, 
                         real_time, pic = numeric(n_days * n_events_day))

    # load design file to get stimulus assignments
    fname <- here("data", "behavior", "logs", "input_files", sprintf("P%s_input.txt", sub_id))
    input_data <- read.table(fname)
    
    # extract the trials where pics were presented
    pic_trial_idx = input_data[,5] == 1; # picture trial if column 5 is a 1
    pic_trials <- input_data[pic_trial_idx,]
    
    # assign the 5 picture IDs of each day to the respective days
    for (i_day in 1:n_days){
          all_reps <- pic_trials[pic_trials[,3]==i_day,6] # all repetitions for this day
          design_tbl$pic[design_tbl$day==i_day] <- all_reps[1:n_events_day] # extract IDs for first rep
          
          # check assumption that IDs are correct is met
          if (!identical(all_reps, rep(all_reps[1:n_events_day],7))){
                stop("error when finding picture IDs for subject ", sub_id)
          }
    }
    
    # sanity check: compare to Lorena's day order
    #fname <- here("data", "behavior", "design", sprintf("P%s_daysOrder.txt", sub_id))
    #days_order <- read.csv(fname, header = FALSE)
    #days_order <- c(t(as.matrix(days_order)))
    #stopifnot(all(days_order == design_tbl$pic))
    
    # write design tibble to file for this subject
    out_dir = here("data", "behavior", "design_tbl")
    if(!dir.exists(out_dir)){dir.create(out_dir)}
    write_csv(design_tbl, file.path(out_dir, sprintf("%s_design_tbl.txt", sub_id)))
  }
}

# call the function to get the design tibble for each subject and write it to file
virtem_behavior_prepare_design_tbl(subjects = subjects)

4.1 Sorting task

Here is a description of the task from the methods:

The day sorting task (Figure 1D) was performed in front of a computer screen. The 20 event images from the day learning task were presented on the screen in a miniature version. They were arranged in a circle around a central area displaying 4 rectangles. Participants were instructed to drag and drop all events of the same sequence into the same rectangle with a computer mouse. Participants freely chose which rectangle corresponded to which sequence as the sequences were not identifiable by any label and were presented in differing orders across mini-blocks during learning.

Thus, in analysis, we take the grouping as provided by the rectangles and assign the four groups of events to the four days in a way that maximizes the overlap between actual days and sorted days. This may be trivial if performance is perfect or very high, but if several scenes were mixed up between days, it can become difficult to ascertain which day participants were trying to re-assemble. The best fitting permutation is written to file.

For analysis of the sorting task, we took the grouping of event images as provided by the participants and assigned them to the four sequences to ensure maximal overlap between actual and sorted sequence memberships. While the assignment of groupings to sequences is unambiguous when performance is, as in our sample, high, this procedure is potentially liberal at lower performance levels. We then calculated the percentage of correctly sorted event images for each participant, see the raincloud plot(100) in Figure 2A.

# define function to prepare the data from the day sorting task
virtem_behavior_prepare_data_day_sorting <- function(subjects = c("036", "037")){
      
  # initialize
  n_correct = numeric(length(subjects))
  out_dir = here("data", "behavior", "day_sorting")
  if(!dir.exists(out_dir)){dir.create(out_dir)}
  
  for (i_sub in 1:length(subjects)){
    sub_id <- subjects[i_sub]
    
    # load design file with true assignment of stimuli to days
    fname <- here("data", "behavior", "design_tbl", sprintf("%s_design_tbl.txt", sub_id))
    col_classes <- c("sub_id" = "c", "day"="n", "event"="n", "pic"="n")
    design_tbl <- as_tibble(read_csv(fname, col_types = col_classes))
    
    # load the data from the day sorting task and store what we need
    fname <- here("data", "behavior", "logs", "day_sorting", 
                  sprintf("%s_rectangles_results.mat", sub_id))
    sorting_log <- readMat(fname)
    obj_pos <- sorting_log$objectPositions
    
    # extract quadrant for  each image
    pic_q <- numeric(n_days*n_events_day)
    for (i_pic in 1:nrow(obj_pos)){
          # coordinates of current image
          curr_x <- obj_pos[i_pic,1]
          curr_y <- obj_pos[i_pic,2]
          
          if (curr_x < 0 && curr_y > 0){ # x smaller than 0, y bigger than 0 --> quadrant 1
                pic_q[i_pic] <- 1
          }  
          else if (curr_x > 0 && curr_y > 0){ # x bigger than 0, y bigger than 0 --> quadrant 2
                pic_q[i_pic] <- 2
          }
          else if (curr_x < 0 && curr_y < 0){ # x smaller than 0, y smaller than 0 --> quadrant 3
                pic_q[i_pic] <- 3
          }
          else if (curr_x > 0 && curr_y < 0){ # x bigger than 0, y smaller than 0 --> quadrant 4
                pic_q[i_pic] <- 4
          }
          else {stop("Error, can't sort!")}
    }
    
    # match quadrant number to days for all possible combinations
    # surely there must be a better way to get all combinations as a matrix?!
    all_perms <- t(array(unlist(permn(1:4)), dim = c(4, 24)))
    n_matches <- numeric(nrow(all_perms))
    
    for (i_perm in 1:nrow(all_perms)){
          
          # get vector with pic-day assignment for this permutation
          comb_to_test <-numeric(20)
          for (i_day in 1:n_days){
                comb_to_test[design_tbl$pic[design_tbl$day == all_perms[i_perm, i_day]]] <- i_day
          }
          
          # count the number of matches
          n_matches[i_perm] <- sum(comb_to_test == pic_q)
    }
    
    # find the best permutation and the number of hits
    winner_perm <- which.max(n_matches)
    n_correct[i_sub] <- max(n_matches)
    
    # sort the behavioral responses in the same way as the design table
    to_sort <- 1:20 # this works because the data in the sorting task logfile follows the picture number
    sort_idx <- match(design_tbl$pic,to_sort)
    stopifnot(design_tbl$pic == to_sort[sort_idx]) # make sure we have identical vectors now
    sorted_pic_q <- pic_q[sort_idx]
    
    # store which pic was sorted to which day based on winning permutation
    design_tbl$sorted_day <- numeric(n_days*n_events_day)
    for (i_day in 1:n_days){
          
          # recode the day labels to match the day labels in the design tibble
          idx <- sorted_pic_q == i_day
          design_tbl$sorted_day[idx] <- all_perms[winner_perm,i_day]
    }
    stopifnot(sum(design_tbl$day == design_tbl$sorted_day) == n_correct[i_sub])
    
    # write design tibble to file for this subject
    write_csv(design_tbl, 
              file.path(out_dir,sprintf("%s_behavior_tbl_day_sorting.txt", sub_id)))
  }
}

# call the function for all subjects
virtem_behavior_prepare_data_day_sorting(subjects = subjects)

4.2 Timeline task

In this task, participants saw a timeline ranging from 6 a.m. to midnight together with miniature versions of the five event images belonging to one sequence (Figure 1E). Participants were instructed to drag and drop the event images next to the timeline so that scene positions reflected the event times they had inferred in the day learning task. To facilitate precise alignment to the timeline, event images were shown with an outward pointing triangle on their left side, on which participants were instructed to base their responses.

Participants responses are read out from the logfiles of this task and converted to virtual hours. The data are saved in the text file including all behavioral data (virtem_behavioral_data.txt).

# define the function to prepare data from the timeline task
virtem_behavior_prepare_data_timeline <- function(subjects = c("036", "037", "039")){
      
  # initialize
  out_dir = here("data", "behavior", "timeline")
  if(!dir.exists(out_dir)){dir.create(out_dir)}
  
  for (i_sub in 1:length(subjects)){
    sub_id <- subjects[i_sub]
    
    # load design file with true assignment of stimuli to days (& data from day sorting)
    fname <- here("data", "behavior", "day_sorting", 
                  sprintf("%s_behavior_tbl_day_sorting.txt", sub_id))
    col_classes <- c("sub_id" = "c", "day"="n", "event"="n", "virtual_time" = "d", 
                     "real_time" = "d", "pic"="n", "sorted_day" = "d")
    design_tbl <- as_tibble(read_csv(fname, col_types = col_classes))
    
    # add column for remembered time based on timeline task
    design_tbl$memory_time <- numeric(n_days * n_events_day)
    
    for (i_day in 1:n_days){
          
      # load the data from the timeline task from this day
      fname <- here("data", "behavior", "logs", "timeline", 
                    sprintf("%s_timeline_results_day%d.mat", sub_id, i_day))
      timeline_log <- readMat(fname)
      
      for (i_pic in 1:n_events_day){
            
        # get the number of this picture
        pic_name <- timeline_log$ud[[2]][1+(i_pic-1)*2]
        pic <- as.numeric(str_extract(pic_name, "\\d{1,2}"))

        # which row in our table are we looking at
        tbl_idx <- which(design_tbl$pic == pic)
        
        # extract the response from the logfile and transform it to be in virtual hours
        design_tbl$memory_time[tbl_idx] <- (timeline_log$objectPositions[i_pic,2]+0.5)*18+6
      }
    }
    
  # store the remembered order for each virtual day based on remembered times
    design_tbl <- design_tbl %>% 
      group_by(day) %>% 
      mutate(
        memory_order = rank(memory_time, ties.method = "first"))
    
    # write design tibble to file for this subject
    write_csv(design_tbl,
              file.path(out_dir, sprintf("%s_behavior_tbl_timeline.txt", sub_id)))
  }
}

# run for all subjects
virtem_behavior_prepare_data_timeline(subjects = subjects)

Now we are ready to combine the data from the two memory tests into the final dataframe that we write to file for the actual analyses.

# define function
virtem_behavior_prepare_data_combine_across_subjects <- function(subjects = c("036", "037")){
      
  # set up a dataframe to collect the data
  beh_data = tibble()
  
  for (i_sub in 1:length(subjects)){
    sub_id <- subjects[i_sub]
    
    # load data from CSV
    fname <- here("data", "behavior", "timeline", 
                  sprintf("%s_behavior_tbl_timeline.txt", sub_id))
    col_types_list <- cols_only(
          sub_id = col_integer(),
          day = col_integer(),
          event = col_integer(),
          pic = col_integer(),
          virtual_time = col_double(),
          real_time = col_double(),
          memory_time = col_double(),
          memory_order = col_double(),
          sorted_day = col_integer())
    timeline_tbl <- as_tibble(read_csv(fname, col_types = col_types_list))
    
    # append to table with data from all subjects
    beh_data <- bind_rows(beh_data, timeline_tbl)
  }
  
  # reorder to have a more intuitive order
  beh_data <- beh_data[,c(1,2,3,6,4,5,8,9,7)]
  
  # write data to file
  write_csv(beh_data, file.path(dirs$data4analysis, "behavioral_data.txt"))
}

# combine behavioral data frames across subjects
virtem_behavior_prepare_data_combine_across_subjects(subjects = subjects)

We have a similar dataframe from Nicole Montijn’s study conducted at Utrecht University that we will later use to replicate the generalization bias. Let’s move it to the folder with the analysis data.

# copy to analysis data folder (that will be shared)
fname <- here("data", "behavior", "replication_data_montijn", "beh_dataNDM.txt")
file.copy(fname, dirs$data4analysis)

## [1] FALSE

4.3 Picture viewing tasks

In the picture viewing tasks (Figure 1B), participants viewed a stream of the event images. Their task was to look at the images attentively and to respond via button press whenever a target picture, which showed the father feeding the family’s dog, was presented.

Below, we check how well participants detected the targets.

pvt_target_detect <- tibble()

for (i_run in 1:n_runs){
  for (i_sub in 1:length(subjects)){
    

    # load the logfile for this run (pre or post)
    log_fn <- file.path(dirs$pvt_log_dir, sprintf('P%s_%svirtem.txt', subjects[i_sub], runs[i_run]))
    log <- read.table(log_fn)
    colnames(log) <- c("pic", "fix_start", "pic_start", "volume", "response", "RT", "trial_end")
    
    # add column for block and subject ID to log
    log <- log %>% 
      add_column(run = runs[i_run]) %>%
      add_column(sub_id = subjects[i_sub])
    
    # calculate proportion of hits and average RT
    curr_dat <- log %>% 
      filter(pic == 21) %>%
      summarise(sub_id = unique(sub_id),
                run = unique(run),
                perc_hits = sum(response)/nrow(.)*100,
                avg_rt = mean(RT[response==1])) # calculate average RT for hits
    
    # add to the overall tibble
    pvt_target_detect <- rbind(pvt_target_detect, curr_dat)
  }
}
      
head(pvt_target_detect)

sub_id	run	perc_hits	avg_rt
031	pre	100	921
032	pre	90	993
033	pre	70	904
034	pre	100	948
035	pre	100	710
036	pre	100	747

# calculate mean and standard deviation across participants
pvt_target_detect_summary <- pvt_target_detect %>% 
  group_by(run) %>%
  summarise(mean_perc_hits = mean(perc_hits),
            sd_perc_hits = sd(perc_hits),
            mean_rt = mean(avg_rt),
            sd_rt = sd(avg_rt),
            .groups="drop")
head(pvt_target_detect_summary)

run	mean_perc_hits	sd_perc_hits	mean_rt	sd_rt
post	95.7	6.9	841	162
pre	95.7	7.9	881	131

Target detection in picture viewing task:
Pre-learning: 95.71% ± 7.90% mean±standard deviation of percentage of hits; 881.34ms ± 131.43ms mean±standard deviation of average reaction times

Post-learning: 95.71% ± 6.90% mean±standard deviation of percentage of hits; 841.40ms ± 162.16ms mean±standard deviation of average reaction times

sub_id	run	perc_hits	avg_rt
031	pre	100	921
032	pre	90	993
033	pre	70	904
034	pre	100	948
035	pre	100	710
036	pre	100	747

sub_id	run	perc_hits	avg_rt
031	pre	100	921
032	pre	90	993
033	pre	70	904
034	pre	100	948
035	pre	100	710
036	pre	100	747

sub_id	run	perc_hits	avg_rt
031	pre	100	921
032	pre	90	993
033	pre	70	904
034	pre	100	948
035	pre	100	710
036	pre	100	747