7 Run RSA Searchlight

7.1 Prepare functional data

We want to run RSA on the multi-voxel patterns corresponding to the object presentations during the picture viewing tasks. For this, we use the data as it was preprocessed by Lorena Deuker. We will do some further cleaning and extract the relevant volumes from the time series to eventually run RSA searchlights. What will be done in the following sections has been done previously for the ROI data only. Now we do it for the whole field of view in preparation of the RSA searchlights.

Data cleaning: Extract residuals from motion parameter regression

In this first section of the script we will create a dataframe that includes the relevant file names for the files that are needed to calculate the clean time series. These files include the motion parameters from FSL FEAT, which was run on the functional data from the picture viewing task. As described above, these data were split into 10 blocks. We will use the preprocessed FEAT output images which were already co-registered to the analysis space (‘samespace’). Further, we will use a mask to only include voxels with data in all blocks. This mask has already been created and is available in the analysis space.

#PREPARE DATA FRAME FOR CLEANING

# create a tibble with filenames etc for data cleaning, start with subject IDs
func_dat_df <- tibble(subject = rep(as.numeric(subjects), each = n_runs * n_blocks))
# add run info (run 1 and 2 equal the pre and post PVT)
func_dat_df <- add_column(func_dat_df, run = rep(rep(1:n_runs, each = n_blocks), n_subs))
# add block information (blocks 1 to 10 are the PVT blocks preprocessed separately)
func_dat_df <- add_column(func_dat_df, block = rep(1:n_blocks, n_subs*n_runs))
# add the motion parameter file
func_dat_df <- add_column(func_dat_df, mc_par_fn = 
                            file.path(dirs$feat_dir,
                                      paste0("VIRTEM_P0", func_dat_df$subject),
                                      sprintf("RSA_%02d_Block%02d.feat",
                                              func_dat_df$run, func_dat_df$block),
                                      "mc", "prefiltered_func_data_mcf.par"))
# add the functional data file
func_dat_df <- add_column(func_dat_df, filtered_func = 
                            file.path(dirs$samespace_dir,
                                      paste0("VIRTEM_P0", func_dat_df$subject),
                                      sprintf("VIRTEM_P%03d_RSA_%02d_Block%02d_func.nii.gz",
                                              func_dat_df$subject, func_dat_df$run, func_dat_df$block)))
# add the brain mask data file
func_dat_df <- add_column(func_dat_df, brain_mask = 
                            file.path(dirs$samespace_dir,
                                      paste0("VIRTEM_P0", func_dat_df$subject),
                                      sprintf("VIRTEM_P%03d_common_mask_perBlock.nii.gz",
                                              func_dat_df$subject)))

# add output directory
func_dat_df <- add_column(func_dat_df, out_dir = file.path(dirs$searchlight, "clean_timeseries"))
if(!dir.exists(file.path(dirs$searchlight, "clean_timeseries"))){
  dir.create(file.path(dirs$searchlight, "clean_timeseries"))
}

Define function for data cleaning

In this next section, we will first define a function and then run it on all datasets. In this function, the preprocessed data will be loaded and transformed to matrix format. For every voxel within the brain mask, movement correction parameters are used as predictors in a GLM with the voxel time series as dependent variable. Finally, the residuals from this GLM (i.e. what could not be explained by motion) will be written to a text file.

# DEFINE THE FUNCTION TO GET THE RESIDUAL TIME SERIES 
run_motion_glm <- function(df = func_dat_df[1,]){
  
  # load the motion params and convert to tibble
  mc_pars <- read.table(df$mc_par_fn, header = FALSE)
  mp_df <- as_tibble(mc_pars)
  colnames(mp_df) <- paste0("mp", 1:6)
  
  # load the brain mask, check it's only zeros and ones, then convert to logical
  brain_mask_nii <- readNIfTI2(df$brain_mask)
  assertthat::assert_that(all.equal(unique(c(img_data(brain_mask_nii))), c(0,1)))
  brain_mask <- array(NA, dim(brain_mask_nii))
  brain_mask[] <- as.logical(img_data(brain_mask_nii))
  
  # load the functional data
  func_nii <- readNIfTI2(df$filtered_func)
  
  # initialize output
  clean_dat <- array(0, dim(func_nii))
  
  counter <- 0
  for (i_dim1 in 1:dim(func_nii)[1]){
    for (i_dim2 in 1:dim(func_nii)[2]){
      for (i_dim3 in 1:dim(func_nii)[3]){
        if (brain_mask[i_dim1,i_dim2,i_dim3]){
          
          # extract this voxel's data and merge with motion params
          mp_df$vox_dat <- func_nii[i_dim1,i_dim2,i_dim3,]
          
          # run the glm and store the residuals
          clean_dat[i_dim1,i_dim2,i_dim3,] <- resid(glm('vox_dat~mp1+mp2+mp3+mp4+mp5+mp6', data = mp_df))
        }
      
      # print a message to show progress
      counter <-counter+1
      if (counter%%(prod(dim(func_nii)[1:3])/100) == 0) {
        print(paste0((counter/prod(dim(func_nii)[1:3])*100), "% done"))}     
      }
    }
  }
    
  # create nifti with same header info as original data from clean time series
  clean_dat_nii = copyNIfTIHeader(img = func_nii, arr = clean_dat)
  
  # create output folder
  out_dir_sub <- file.path(df$out_dir, sprintf("%03d",df$subject))
  if(!dir.exists(out_dir_sub)){dir.create(out_dir_sub, recursive = TRUE)}
  
  # write cleaned nifti to file
  fn <- sprintf("%03d_run%02d_block%02d_clean_func_data.nii.gz",
              df$subject, df$run, df$block)
  writenii(nim = clean_dat_nii, filename = file.path(out_dir_sub,fn))
}

Run data cleaning

Now actually run the function either in parallel or serially.

# next step depends on whether we are in parallel or serial mode
if (!run_parallel){ # run serially

  print("Attempting to run motion parameter cleaning serially. This will take a very long time if runnning for all subjects")
  # run the function for each row of the data frame,
  # i.e. for each block in each run for each subject
  for(i in 1:nrow(func_dat_df)){
    
    tic(sprintf("Subject %s, run %d, block %d",
                func_dat_df$subject[i], func_dat_df$run[i], func_dat_df$block[i]))
    run_motion_glm(df = func_dat_df[i,])
    toc()
  }
  
} else if (run_parallel){ # run in parallel, assumes CBS HTCondor is available
  
  # write the data frame to file
  fn <- file.path(here("data","mri", "rsa", "searchlight", "clean_timeseries"),
                  "htc_config_clean_time_series.txt")
  fn_def <- cat(sprintf('"fn <- "%s"',fn))
  write.table(func_dat_df, fn,
              append = FALSE, sep = ",", dec = ".", row.names = FALSE, col.names = TRUE)
  
  # store the function definition as text
  func_def <- capture.output(run_motion_glm)
  func_def[1] <- paste0("run_motion_glm <- ",func_def[1])
  #func_def <- func_def[-length(func_def)]
  
  #write the Rscript that we want to run
  rscript_fn <- here("data","mri", "rsa", "searchlight", "clean_timeseries", "run_clean_timeseries.R")
  con <- file(rscript_fn)
  open(con, "w")
  writeLines(c(
    "\n# handle input",
    "args = commandArgs()",
    "i <- as.numeric(args[length(args)])",
    "\n#load required packages",
    noquote(sprintf('lib_dir <- "%s"',"/data/pt_02261/virtem/virtem_code/R3.6.1/library/Linux")),
    '.libPaths(c(lib_dir,.libPaths()))',
    'lapply(c("oro.nifti", "assertthat", "dplyr", "neurobase"), library, character.only = TRUE)',
    "\n# read the data and transform ROI column back to list",
    noquote(sprintf('fn <- "%s"',fn)),
    'func_dat_df <- read.table(fn, sep = ",", header = TRUE, stringsAsFactors = FALSE)',
    "\n#define the function to run motion GLM",
    func_def,
    "\n# run the function on one line of the data frame",
    "run_motion_glm(df = func_dat_df[i,])"),con)
  close(con)
  
  # folder for condor output
  htc_dir <- here("htc_logs", "clean_timeseries")
  if(!exists(htc_dir)){dir.create(htc_dir, recursive = TRUE)}
  
  # write the submit script
  fn <- here("data","mri", "rsa", "searchlight", "clean_timeseries", "run_clean_timeseries.submit")
  con <- file(fn)
  open(con, "w")
  writeLines(c(
    "universe = vanilla",
    "executable = /afs/cbs.mpg.de/software/scripts/envwrap",
    "request_memory = 9000",
    "notification = error"
    ),con)

    for (i in 1:nrow(func_dat_df)){
        writeLines(c(
        sprintf("\narguments = R+ --version 3.6.1 Rscript %s %d", rscript_fn, i),
        sprintf("log = %s/%d.log", htc_dir, i),
        sprintf("output = %s/%d.out", htc_dir, i),
        sprintf("error = %s/%d.err", htc_dir, i),
        sprintf("Queue\n")),con)
        }
  close(con)
  
  # submit to condor
  #system("reset-memory-max")
  batch_id <- system(paste("condor_submit", fn), intern = TRUE)
  batch_id <- regmatches(batch_id[2], gregexpr("[[:digit:]]+", batch_id[2]))[[1]][2]
  cat(sprintf("submitted jobs (ID = %s) to clean time series for searchlights. Time to wait...", batch_id))
  pause_until_batch_done(batch_id = batch_id, wait_interval = 600)
  

  #system("reset-memory-max")
  #system("condor_q")
}

Extract relevant volumes

As the presentation of images in the PVT pre and post blocks was locked to the onset of a new volume (see above), the second volume after image onset was selected for every trial (effectively covering the time between 2270-4540 ms after stimulus onset).

In this step, we split the presentation logfile from the picture viewing task into the 10 blocks. We reference the volume count to the first volume of each block and extract the relevant volumes from the cleaned 4D timeseries data, accounting for the temporal offset. This is done for each block separately in both the pre and post runs. The data are written to 3D-niftis.

offset_tr = 2

for (i_sub in subjects){
  for (i_run in 1:n_runs){
    
    # load the logfile for this run (pre or post)
    log_fn <- file.path(dirs$pvt_log_dir, sprintf('P%s_%svirtem.txt', i_sub, runs[i_run]))
    log <- read.table(log_fn)
    colnames(log) <- c("pic", "fix_start", "pic_start", "volume", "response", "RT", "trial_end")
    
    # split the log file into the 10 blocks
    log_split <- split(log, rep(1:10, each = 21))
    
    # reference volume numbers to the first volume of that block
    vol_block <- lapply(log_split, function(x){x$volume - x$volume[1]})
    log_split <- mapply(cbind, log_split, "vol_block"=vol_block, SIMPLIFY=FALSE)
    
    for (i_block in 1:n_blocks){
      
      # define the full 4D file
      clean_func_fn <- file.path(dirs$searchlight, "clean_timeseries", i_sub,
                                 sprintf("%s_run%02d_block%02d_clean_func_data.nii.gz",
                                         i_sub, i_run, i_block))
  
      # index of relevant volumes when accounting for offset
      rel_vols <- log_split[[i_block]]$vol_block + offset_tr
      
      # extract the relevant volumes from the ROI timeseries data
      out_dir <- file.path(dirs$searchlight, "rel_vols_3D", i_sub)
      if(!dir.exists(out_dir)){dir.create(out_dir, recursive = TRUE)}

      for (i_vol in 1:length(rel_vols)){
        # subtract 1 from volume number because of 0-based FSL indexing
        fslroi(file = clean_func_fn, tmin = rel_vols[i_vol]-1, tsize = 1, 
               outfile = file.path(out_dir, 
                                   sprintf("%s_run%02d_block%02d_pic%02d.nii.gz",
                                           i_sub, i_run, i_block, log_split[[i_block]]$pic[i_vol])),
               verbose = FALSE)
      }
    }
  }
}

Concatenate relevant volumes

Next, we create 4D-files for both the pre and the post run. Each file will have consist of 200 (20 relevant pictures x 10 blocks) volumes. We take care to order the volumes according to picture identity as this is most convenient for later RSA.

for (i_sub in subjects){
  for (i_run in 1:n_runs){
    
    # files to concatenate
    in_dir <- file.path(dirs$searchlight, "rel_vols_3D", i_sub)
    files <- c(file.path(in_dir, sprintf("%s_run%02d_block%02d_pic%02d.nii.gz",
                                         i_sub, i_run, c(rep(1:10,n_pics)), c(rep(1:n_pics, each = n_blocks)))))
    
    # output file
    out_dir <- file.path(dirs$searchlight, "rel_vols_4D")
    if(!dir.exists(out_dir)){dir.create(out_dir, recursive = TRUE)}
    fn <- file.path(out_dir, sprintf("%s_run%02d_rel_vols.nii.gz", 
                                     i_sub, i_run)) 
    
    # merge the files
    fslmerge(infiles = files, direction = "t", outfile = fn, 
             retimg = FALSE, verbose = FALSE)
    
    # change datatype to short to save space and memory for the cluster jobs
    fslmaths(file = fn, outfile = fn, retimg = FALSE,
             opts = "-odt short", opts_after_outfile = TRUE, verbose = FALSE)
  }
}

7.2 First-level RSA searchlight

We further probed how temporal distances between events shaped representational change using searchlight analyses. Using the procedures described above, we calculated pattern similarity change values for search spheres with a radius of 3 voxels around the center voxel. Search spheres were centered on all brain voxels within our field of view. Within a given search sphere, only gray matter voxels were analyzed. Search spheres not containing more than 25 gray matter voxels were discarded.

Building on the prepared data, we want to run the searchlight analysis. To speed things up, we split the brain into chunks that we analyze in parallel jobs.

Set analysis parameters

First, let’s set some analysis parameters. These include

  • the radius of the search spheres. This is the radius around the center voxel, so that the diameter of the spheres is given by 2*radius + 1
  • the minimum number of valid voxels that a sphere has to contain to be analyzed
  • the number of chunks into which we split the analysis to speed up the computations using the HPC cluster

Further, we define a function that returns the voxels in the sphere of a given radius around. This canonical sphere definition will then be combined with the actual sphere center coordinates. It is based on the way the sphere definition is implemented in the fmri package for R.

# how large should the searchlights be 
radius <- 3 # radius around center voxel

# how many voxels (counting the center) must be in a searchlight for it to be run; smaller ones are skipped.
min_voxels <- 25

# how many chunks to split the analysis up into (for speed reasons)
n_chunks <- 40

# function to get searchsphere of given radius (based on: https://rdrr.io/cran/fmri/src/R/searchlight.r)
searchlight <- function(radius){
    rad <- as.integer(radius)
    nr <- 2*rad+1
    indices <- rbind(rep(-rad:rad,nr*nr),
                     rep(-rad:rad,rep(nr,nr)),
                     rep(-rad:rad,rep(nr*nr,nr)))
    indices[,apply(indices^2,2,sum)<=radius^2]
}

Prepare RSA-predictions

To relate pattern similarity change in each search sphere to the temporal structure of the sequences we need the relationships of the events in virtual time. We calculate these here analogously to the main ROI analyses, but save them for each subject separately.

for (i_sub in subjects){
  
  # set up the dataframe to use for RSA
  pics <- which(upper.tri(matrix(TRUE, n_pics, n_pics), diag = FALSE), arr.ind=TRUE)
  
  # load the behavioral data
  fn <- file.path(dirs$timeline_dat_dir, sprintf("%s_behavior_tbl_timeline.txt", i_sub))
  col_types_list <- cols_only(sub_id = col_character(), day = col_factor(), 
                              event = col_integer(), pic = col_integer(),
                              virtual_time = col_double(), real_time = col_double(),
                              memory_time = col_double(), memory_order = col_double(), 
                              sorted_day = col_integer())
  beh_dat <- read_csv(fn, col_types = col_types_list)
  
  # sort behavioral data according to picture identity
  beh_dat_ordered <- beh_dat[order(beh_dat$pic),]
  
  # find the order of comparisons
  pics <- which(upper.tri(matrix(nrow = 20, ncol = 20)), arr.ind=TRUE)
  
  # extract the data for the first and second picture in each pair from the behavioral data
  pic1_dat <- beh_dat_ordered[pics[,1],]
  pic2_dat <- beh_dat_ordered[pics[,2],]
  colnames(pic1_dat) <- paste0(colnames(pic1_dat), "1")
  colnames(pic2_dat) <- paste0(colnames(pic2_dat), "2")
  
  # combine the two tibbles
  pair_dat <- cbind(pic1_dat, pic2_dat)
  
  # reorder the tibble columns to make a bit more sense
  pair_dat <- pair_dat %>%
    select(sub_id1,
           day1, day2,
           pic1, pic2,
           event1, event2, 
           virtual_time1, virtual_time2, 
           real_time1, real_time2, 
           memory_order1, memory_order2, 
           memory_time1, memory_time2, 
           sorted_day1, sorted_day2) %>%
    rename(sub_id = sub_id1)

  # prepare RSA distance measures
  pair_dat <- pair_dat %>%
    mutate(
      
      # pair of events from say or different day (dv = deviation code)
      same_day = day1 == day2,
      same_day_dv = plyr::mapvalues(same_day, from = c(FALSE, TRUE), to = c(-1, 1)),
      
      # absolute difference in time metrics
      vir_time_diff = abs(virtual_time1 - virtual_time2),
      order_diff = abs(event1 - event2),
      real_time_diff = abs(real_time1 - real_time2)) %>%
    
    # z-score the time metric predictors
    mutate_at(
      c("vir_time_diff", "order_diff", "real_time_diff"), scale)

  # write to file
  out_dir <- file.path(dirs$searchlight, "rsa_predictions")
  if(!dir.exists(out_dir)){dir.create(out_dir, recursive = TRUE)}
  fn <- file.path(out_dir, sprintf("%s_rsa_info.txt", i_sub))
  write.table(pair_dat, file = fn)
}

Create lookup table for searchlight

As a first step, we prepare a table that - for each individual sphere - holds the coordinates of the voxels making up the search spheres. To find these voxels we rely on both a brain mask and a gray matter mask obtained during preprocessing. The brain mask is combined across the 10 preprocessing blocks and all voxels within this brain mask can be sphere centers. We get the coordinates of voxels around these sphere centers given the desired radius. Then, we exclude the non-gray matter voxels from the spheres. We check which spheres still contain the minimum number of voxels (do this here to speed up the subsequent computations) and discard the spheres that don’t hold enough voxels, e.g. because they are in white matter or on the edge of the brain/field of view.

We split this table into chunks to speed up the calculations and save the chunked lookup-tables for later use. To be able to relate the rows of the table (each row is a sphere) back to the brain, we create a nifti image that numbers all voxels which are sphere centers. These numbers correspond to the row names of the lookup table and will later be used to map the RSA results of each sphere back to the brain.

# Set up dataframe
in_df <- tibble(subject = subjects)

# how many chunks to run the analysis in for each
in_df$n_chunks <- n_chunks

# input files with relevant functional volumes
in_df$rel_vol_pre_fn <- file.path(dirs$searchlight, "rel_vols_4D", 
                              sprintf("%s_run%02d_rel_vols.nii.gz", subjects, 1)) 
in_df$rel_vol_post_fn <- file.path(dirs$searchlight, "rel_vols_4D", 
                              sprintf("%s_run%02d_rel_vols.nii.gz", subjects, 2)) 

# which voxels should be center voxels of spheres --> typically brain mask
in_df$ctr_mask_fn <- file.path(dirs$samespace_dir, paste0("VIRTEM_P", subjects),
                               sprintf("VIRTEM_P%s_common_mask_perBlock.nii.gz",
                                       subjects))
# which voxels should be used for correlations --> typically graymatter mask
in_df$feat_mask_fn <- file.path(dirs$samespace_dir, paste0("VIRTEM_P", subjects),
                                sprintf("VIRTEM_P%s_common_graymatter_mask_brainmasked.nii.gz",
                                       subjects))

# radius of spheres
in_df$radius <- radius

# where to store outputs
in_df$out_path <- file.path(dirs$searchlight, sprintf("first_level_%dvox_radius", radius), subjects)
in_df$chunk_path <- file.path(in_df$out_path, "chunks")

prepare_searchlight_lut <- function(in_df = in_df[1,]){

  if(!dir.exists(in_df$out_path)){dir.create(in_df$out_path, recursive = TRUE)}
  if(!dir.exists(in_df$chunk_path)){dir.create(in_df$chunk_path, recursive = TRUE)}
  
  # load the mask with voxels that will be sphere centers (typically brain mask)
  ctr_mask_nii <- readNIfTI(in_df$ctr_mask_fn, reorient=FALSE)
  if (!all.equal(unique(c(img_data(ctr_mask_nii))), c(0,1))){
      stop("center mask not binary!")}
  
  # load the mask with voxels that will be the features used for correlations
  feat_mask_nii <- readNIfTI(in_df$feat_mask_fn, reorient=FALSE)
  feat_mask_array <- array(0, dim = dim(feat_mask_nii))
  feat_mask_array[feat_mask_nii > 0.7 ] <- 1 
  if (!all.equal(unique(c(feat_mask_array)), c(0,1))){
      stop("make sure feature mask is binary!")}
  
  # make sure mask dimensions match functional images and each other
  if (!all.equal(dim(ctr_mask_nii), dim(feat_mask_array))){
       stop("mask dimensions don't match each other!")}
  
  # find all the voxel coordinates that will be sphere centers
  ctr_inds <- which(ctr_mask_nii != 0, arr.ind=TRUE) # inds has subscripts for 3D coords of center voxels
  if (ncol(ctr_inds) != 3) { stop('wrong dims on inds')}
  
  # 3D array with voxel numbers to save for future reference
  vox_num_array <- array(0, dim(ctr_mask_nii))  # make a blank 3d matrix 'brain', all zeros
  vox_num_array[ctr_inds] <- 1:nrow(ctr_inds)  # and put integers into the voxel places
  
  # create nifti with same header info as input data
  vox_num_nii <- copyNIfTIHeader(img = ctr_mask_nii, arr = vox_num_array)
  write_nifti(vox_num_nii, file.path(in_df$out_path, "vox_num"))    # write out as a NIfTI image
  
  # get the search sphere 3D subscripts
  sphere_coords <- searchlight(radius)
  
  # actually fill up the lookup table. This goes through every voxel, so can take some time.
  # the table will hold the sphere indices for each center voxel
  lookup_tbl <- array(NA, c(nrow(ctr_inds), ncol(sphere_coords)))  
  rownames(lookup_tbl) <- vox_num_nii[ctr_inds]
  for (i in 1:nrow(ctr_inds)) {    # i <- 1
      
      # add the sphere coordinates to the current voxel coordinates
      curr_sphere <- t(ctr_inds[i,] + sphere_coords)
      
      # remove voxels that are out of bounds (i.e. out of image dimensions)    
      dims <- dim(vox_num_array)
      curr_sphere[which(curr_sphere[,1]>dims[1]),] <- NA
      curr_sphere[which(curr_sphere[,2]>dims[2]),] <- NA
      curr_sphere[which(curr_sphere[,3]>dims[3]),] <- NA
      curr_sphere[which(curr_sphere[,1]<1),] <- NA
      curr_sphere[which(curr_sphere[,2]<1),] <- NA
      curr_sphere[which(curr_sphere[,3]<1),] <- NA
      
      # remove voxels that are not in the feature mask
      curr_sphere[!feat_mask_array[curr_sphere],] <- NA
      
      # store the voxel numbers for this sphere
      vox.id <- vox_num_array[ctr_inds[i,1], ctr_inds[i,2], ctr_inds[i,3]]
      lookup_tbl[vox.id,] <- vox_num_array[curr_sphere]
  }
  
  # replace the zeroes with NA so can sort better at runtime
  to_remove <- which(lookup_tbl == 0, arr.ind=TRUE)
  lookup_tbl[to_remove] <- NA
  
  # remove rows that don't have the minimum amount of voxels (do this here because this speeds up all later computations)
  lookup_tbl <- lookup_tbl[rowSums(!is.na(lookup_tbl)) > min_voxels,]

  # write the table, gzipped to save file size
  write.table(lookup_tbl, gzfile(file.path(in_df$out_path, sprintf("lookup_radius%d", in_df$radius, ".txt.gz"))), row.names = TRUE)   
  
  # vector to split lookupt table into chunks (last chunk will be slightly smaller)
  r  <- rep(1:in_df$n_chunks,each=ceiling(nrow(lookup_tbl)/in_df$n_chunks))[1: nrow(lookup_tbl)]

  for (i_chunk in 1:in_df$n_chunks){
    
    # extract current chunk and write to file
    curr_chunk <- lookup_tbl[r == i_chunk,]
    write.table(curr_chunk, 
                gzfile(file.path(in_df$chunk_path,
                                 sprintf("lookup_radius%d_chunk_%02d.txt.gz", in_df$radius, i_chunk))), 
                row.names = TRUE)   
  }
}

# run for all datasets
for (i in 1:nrow(in_df)){
  prepare_searchlight_lut(in_df = in_df[i,])
}

We want to create some diagnostic images of the spheres we created to make sure the resulting search spheres look as expected.
For this a random subject is picked and a number of random spheres is plotted on the gray matter mask.

# pick random subject
rand_sub <- sample(1:n_subs)[1]

# read their lookup-table
lookup_tbl<-read.table(gzfile(
  file.path(in_df$out_path[rand_sub], 
            sprintf("lookup_radius%d", in_df$radius[rand_sub], ".txt.gz"))))   

# load their graymatter mask and the nifti linking indices
gm_mask_nii <- in_df[rand_sub,]$feat_mask_fn %>% readNIfTI2()
vox_num_nii <- file.path(in_df$out_path[rand_sub], "vox_num.nii.gz") %>% readNIfTI2()

# pick 10 searchlights to plot at random
do.searchlights <- sample(1:nrow(lookup_tbl))[1:10]

# make a blank 3d matrix 'brain' to put the searchlights into
sphere_test_array <- array(0, dim(gm_mask_nii))

for (i in 1:length(do.searchlights)) {    # i <- 1

    # voxels of this example sphere
    voxs <- unlist(lookup_tbl[do.searchlights[i],], use.names=FALSE)
    voxs <- voxs[which(!is.na(voxs))]  # get rid of NAs
    print(sprintf('sphere %02d: %d voxels', i, length(voxs)))

    # put integers into the sphere test array for each searchlight
    for (j in 1:length(voxs)) {    # j <- 1
        coords <- which(vox_num_nii == voxs[j], arr.ind=TRUE)   # location of this voxel
        if (ncol(coords) != 3 | nrow(coords) != 1) { stop("wrong sized coords")}
        sphere_test_array[coords[1], coords[2], coords[3]] <- i   # assign this voxel the searchlight number
    }
    
  # visualize the sphere at the most frequent coordinate
  ortho2(gm_mask_nii, y=sphere_test_array==i, col.y = "red",  col.crosshairs = "lightgrey",
         xyz = c(statip::mfv1(which(sphere_test_array==i, arr.ind = TRUE)[,1]),
                 statip::mfv1(which(sphere_test_array==i, arr.ind = TRUE)[,2]),
                 statip::mfv1(which(sphere_test_array==i, arr.ind = TRUE)[,3])))
}

# create nifti with same header info as input data and save
sphere_test_nii = copyNIfTIHeader(img = gm_mask_nii, arr = sphere_test_array)
write_nifti(sphere_test_nii, 
            file.path(dirs$searchlight, 
                      sprintf("first_level_%dvox_radius", radius), 
                      sprintf("sphere_test_sub%s", rand_sub)))    

Dataframe with input for analysis

We start by preparing a data frame with one line for each chunk of each subject. The entries define the files to be used for this chunk and some basic analysis parameters. The function to be defined subsequently takes one row of this data frame as input.

in_df <- tibble(subject = rep(subjects, each=n_chunks))
in_df$chunk <- rep(1:n_chunks, length(subjects))
in_df$lut_file <- file.path(dirs$searchlight, sprintf("first_level_%dvox_radius", radius), rep(subjects, each=n_chunks), 
                            "chunks", sprintf("lookup_radius%d_chunk_%02d.txt.gz", 
                                              radius, rep(1:n_chunks, length(subjects))))
in_df$pred_file <- file.path(dirs$searchlight, "rsa_predictions", 
                             sprintf("%s_rsa_info.txt", rep(subjects, each=n_chunks)))

# input files with relevant functional volumes
in_df$rel_vol_pre_fn <- rep(file.path(dirs$searchlight, "rel_vols_4D",
                                      sprintf("%s_run%02d_rel_vols.nii.gz", 
                                              subjects, 1)), each = n_chunks) 
in_df$rel_vol_post_fn <- rep(file.path(dirs$searchlight, "rel_vols_4D",
                                       sprintf("%s_run%02d_rel_vols.nii.gz", 
                                               subjects, 2)), each = n_chunks) 

# file to use to bring data back into brain shape
in_df$vox_num_fn <- rep(file.path(dirs$searchlight, sprintf("first_level_%dvox_radius", radius), subjects, "vox_num.nii.gz"), each = n_chunks)

# output directory
in_df$out_path <- rep(file.path(dirs$searchlight, sprintf("first_level_%dvox_radius", radius), subjects, "chunks"), each = n_chunks)

# minimum number of features
in_df$min_voxels <- min_voxels

head(in_df)

Searchlight implementation

Here, we define the function that implements RSA within each searchlight. The logic of running the searchlight analysis via the look-up table is inspired by and partly based on this blogpost by Joset A. Etzel and the code accompanying it.

For each search sphere, we implemented linear models to quantify the relationship between representational change and the learned temporal structure. Specifically, we assessed the relationship of pattern similarity change and absolute virtual temporal distances, separately for event pairs from the same sequences and from pairs from different sequences. In a third model, we included all event pairs and tested for an interaction effect of a sequence (same or different) predictor and virtual temporal distances. The t-values of the respective regressors of interest were stored at the center voxel of a given search sphere.

The function goes through the same steps as the ROI-based analysis. First, the data of the voxels in each sphere is extracted. The resulting multi-voxel patterns are correlated between picture presentations to yield a trial-by-trial (200-by-200) correlation matrix. For each pair of images, the trial-wise correlations (10-by-10) are averaged such that comparisons of trials from the same block are excluded (i.e. excluding the diagonal of the 10-by-10 squares). This results in the condition-by-condition similarity matrix (20-by-20), which is then subjected to further analysis based on the temporal structure of events. Specifically, linear models are implemented for the different analyses. The resulting t-values are stored at the location of the center voxels of the respective sphere. Thus, for each model we test, we end up with a nifti image that has t-values at the voxel locations of the current chunk.

# FUNCTION DEFINITION
run_searchlight <- function(in_df){ #in_df <- in_df[1,]

  ########## SET UP A FEW PARAMS
  sprintf("working on: %s", in_df$lut_file)
  n_pics <- 20
  n_blocks <- 10
  
  # for all 10x10 comparisons we will be averaging all comparisons apart from the diagonal
  # to exclude same_block comparisons
  no_diag <- matrix(data = TRUE, nrow=n_blocks, ncol=n_blocks)
  diag(no_diag)<- FALSE
  
  # indices to extract upper triangle excluding diagonal from pic-by-pic matrices
  triu_idx <- which(upper.tri(matrix(TRUE, n_pics, n_pics), diag = FALSE), arr.ind = TRUE)
  
  
  ########## LOAD THE DATA TO BE USED
  # load the data about the learned temporal relationships
  rsa_info <- read.table(in_df$pred_file)  

  # initialize dataframe to fill (later ps-change data will be added)
  pics <- which(upper.tri(matrix(TRUE, n_pics, n_pics), diag = FALSE), arr.ind=TRUE)
  rsa_df <- tibble(pic1 = pics[,1], pic2 = pics[,2])
  
  # make sure the data frames have the same order    
  if(!(all.equal(rsa_df$pic1, rsa_info$pic1) & all.equal(rsa_df$pic2, rsa_info$pic2))){
    stop('order of data frames does not match [RSA preparations]')}
    
  # store the columns we need for RSA in the data frame
  rsa_df <- cbind(rsa_df, rsa_info[names(rsa_info) %in% c("same_day", "same_day_dv", 
                                                          "vir_time_diff", "order_diff", 
                                                          "real_time_diff")])
  
  # read the look up table
  lookup_tbl <- read.table(gzfile(in_df$lut_file), row.names = 1)
  
  # figure out which voxels to run in this chunk
  n_voxels <- nrow(lookup_tbl)
  
  # read the functional images (takes ~ 2 mins)
  print("starting to load functional images")
  func_nii_pre <- readNIfTI(in_df$rel_vol_pre_fn, reorient=FALSE)
  func_nii_post <- readNIfTI(in_df$rel_vol_post_fn, reorient=FALSE)
  if(!all.equal(dim(func_nii_pre),dim(func_nii_post))){
    stop("functional images of different size!")}# make sure inputs have same dimensions
  
  # load the image with voxel numbers (linear subscripts) that will be used to sort back into brain shape
  vox_num_nii <- readNIfTI(in_df$vox_num_fn, reorient=FALSE)
  if (!all.equal(dim(func_nii_pre)[1:3], dim(vox_num_nii))){
      stop("voxel number image dimensions don't match functional data!")}
  print("finished loading images")
  
  ########## BEGIN THE ACTUAL ANALYSIS
  # initialize the output as all 0 (because FSL does not like NAs)
  rsa_out_array_same_day_vir_time <- array(0, dim(vox_num_nii))
  rsa_out_array_diff_day_vir_time <- array(0, dim(vox_num_nii))
  rsa_out_array_all_pairs_vir_time <- array(0, dim(vox_num_nii))
  rsa_out_array_interaction <- array(0, dim(vox_num_nii))
  #rsa_out_array_n_in_sphere <- array(0, dim(vox_num_nii))

  # loop over all voxels (in this chunk)
  for (v in 1:n_voxels) {  # v <- do.centers[500]

    # print a message to show progress
    if (v%%100 == 0) { print(paste("at", v, "of", n_voxels))}
    
    # find which voxels belong in this center voxel's searchlight
    voxs <- unlist(lookup_tbl[v,], use.names=FALSE)
    voxs <- voxs[which(!is.na(voxs))];  # get rid of NAs. There will be NA entries if some surrounding voxels not in gray matter or brain
    
    # how many surrounding voxels must be in the searchlight? Smaller ones (edge of brain) will be skipped.
    if (length(voxs) > in_df$min_voxels) {
      
      # initialize arrays to store data of current sphere for the pre and post runs
      # images in the rows (voxels in this searchlight only), voxels in the columns
      curr_dat_pre <- array(NA, c(n_pics*n_blocks, length(voxs)))  
      curr_dat_post <- array(NA, c(n_pics*n_blocks, length(voxs)))

      # put the data into a matrix to run analysis
      for (i in 1:length(voxs)) {   # i <- 1
          
        # for the current voxel, take the data from all conditions and store it (separately for pre and post)
        coords <- which(vox_num_nii == voxs[i], arr.ind=TRUE)
        if (ncol(coords) != 3 | nrow(coords) != 1) { stop("wrong sized coords")}
        curr_vox <- func_nii_pre[coords[1], coords[2], coords[3],] # pre
        if (sd(curr_vox) > 0) {curr_dat_pre[,i] <- curr_vox} else { stop("zero variance voxel")} 
        curr_vox <- func_nii_post[coords[1], coords[2], coords[3],] # post
        if (sd(curr_vox) > 0) {curr_dat_post[,i] <- curr_vox} else { stop("zero variance voxel")} 
      }
      
      # data is in repetition (row) by voxel (col) format, so we transpose 
      # to get a voxel x repetition format
      curr_dat_pre <- t(curr_dat_pre)
      curr_dat_post <- t(curr_dat_post)
      
      # calculate correlation matrix (trial by trial) for pre and post run
      cor_mat_pre_trial <- cor(curr_dat_pre, curr_dat_pre)
      cor_mat_post_trial <- cor(curr_dat_post, curr_dat_post)
      
      # initialize condition by condition correlation matrix for pre and post run
      corr_mat_pre <- matrix(nrow = 20, ncol = 20)
      corr_mat_post <- matrix(nrow = 20, ncol = 20)

      # loop over all picture comparisons
      for(i_pic1 in 1:20){
        for(i_pic2 in 1:20){
          
          # extract the current 10x10 correlation matrix
          i1 <- (1+(i_pic1-1)*10):(i_pic1*10)
          i2 <- (1+(i_pic2-1)*10):(i_pic2*10)
          curr_mat_pre <- cor_mat_pre_trial[i1, i2]
          curr_mat_post <- cor_mat_post_trial[i1, i2]
          
          # average the correlations while excluding diagonal (same block comparisons)
          corr_mat_pre[i_pic1, i_pic2] <- mean(curr_mat_pre[no_diag])
          corr_mat_post[i_pic1, i_pic2] <- mean(curr_mat_post[no_diag])
        }
      }
        
      # calculate pattern similarity change based on FisherZ-transformed upper 
      # triangles of the correlation matrices
      rsa_df$ps_change <- FisherZ(corr_mat_post[triu_idx]) - FisherZ(corr_mat_pre[triu_idx])
      
      # find 3D-coordinates of this searchlight center to save output
      coords <- which(vox_num_nii == as.numeric(rownames(lookup_tbl[v,])), arr.ind=TRUE)
      if (ncol(coords) != 3 | nrow(coords) != 1) { stop("wrong sized coords"); }
      
      ########## RUN RSA FOR THIS SPHERE
      # same day virtual time
      fit <- lm(ps_change ~ vir_time_diff, rsa_df[rsa_df$same_day,])
      rsa_out_array_same_day_vir_time[coords[1], coords[2], coords[3]] <- coef(summary(fit))[, "t value"][2]
      
      # different day virtual time
      fit <- lm(ps_change ~ vir_time_diff, rsa_df[!rsa_df$same_day,])
      rsa_out_array_diff_day_vir_time[coords[1], coords[2], coords[3]] <- coef(summary(fit))[, "t value"][2]
      
      # all pairs virtual time 
      fit <- lm(ps_change ~ vir_time_diff, rsa_df)
      rsa_out_array_all_pairs_vir_time[coords[1], coords[2], coords[3]] <- coef(summary(fit))[, "t value"][2]
      
      # day*time interaction
      fit <- lm(ps_change ~ vir_time_diff*same_day_dv, rsa_df)
      rsa_out_array_interaction[coords[1], coords[2], coords[3]] <- coef(summary(fit))[, "t value"][4]
      
      # number of features
      #rsa_out_array_n_in_sphere[coords[1], coords[2], coords[3]] <- length(voxs)
      
    } else{
      stop("number of features too small!")
    }
  }
  ########## SAVE RESULTS
  # create nifti with same header info as input data and write to file
  # same day virtual time
  rsa_out_nii = copyNIfTIHeader(img = vox_num_nii, arr = rsa_out_array_same_day_vir_time)
  write_nifti(rsa_out_nii, file.path(in_df$out_path, 
                                     sprintf("%s_searchlight_same_day_vir_time_chunk%02d", 
                                             in_df$subject, in_df$chunk)))
  
  # different day virtual time
  rsa_out_nii = copyNIfTIHeader(img = vox_num_nii, arr = rsa_out_array_diff_day_vir_time)
  write_nifti(rsa_out_nii, file.path(in_df$out_path, 
                                     sprintf("%s_searchlight_diff_day_vir_time_chunk%02d", 
                                             in_df$subject, in_df$chunk)))
  
  # all pairs virtual time
  rsa_out_nii = copyNIfTIHeader(img = vox_num_nii, arr = rsa_out_array_all_pairs_vir_time)
  write_nifti(rsa_out_nii, file.path(in_df$out_path, 
                                     sprintf("%s_searchlight_all_pairs_vir_time_chunk%02d", 
                                             in_df$subject, in_df$chunk))) 
  
  # day*time interaction
  rsa_out_nii = copyNIfTIHeader(img = vox_num_nii, arr = rsa_out_array_interaction)
  write_nifti(rsa_out_nii, file.path(in_df$out_path, 
                                     sprintf("%s_searchlight_day_time_interaction_chunk%02d", 
                                             in_df$subject, in_df$chunk)))
  
  # no. voxels in sphere
  #rsa_out_nii = copyNIfTIHeader(img = vox_num_nii, arr = rsa_out_array_n_in_sphere)
  #write_nifti(rsa_out_nii, file.path(in_df$out_path, sprintf("%s_searchlight_n_vox_sphere_chunk%02d", in_df$subject, in_df$chunk)))
}

Now actually run the function for each chunk; either in parallel or serially. Because the analysis is slow (several hours per chunk when using 50 chunks), it is not feasible to run it serially.

# next step depends on whether we are in parallel or serial mode
if (!run_parallel){ # run serially

  print("Running the searchlight analysis serially. Do this only for testing purposes because the code will run forever")
  
  # run the function for each row of the data frame,
  # i.e. for each block in each run for each subject
  for(i in 1:nrow(in_df)){
    
    tic(sprintf("Subject %s, chunk %d",
                in_df$subject[i], in_df$chunk[i]))
    run_searchlight(in_df = in_df[i,])
    toc()
  }
  
} else if (run_parallel){ # run in parallel, assumes CBS HTCondor is available
  
  # write the data frame to file
  fn <- file.path(here("data","mri", "rsa", "searchlight", sprintf("first_level_%dvox_radius", radius)),
                  "htc_config_run_searchlight.txt")
  fn_def <- cat(sprintf('"fn <- "%s"',fn))
  write.table(in_df, fn,
              append = FALSE, sep = ",", dec = ".", row.names = FALSE, col.names = TRUE)
  
  # store the function definition as text
  func_def <- capture.output(run_searchlight)
  func_def[1] <- paste0("run_searchlight <- ",func_def[1])
  #func_def <- func_def[-length(func_def)]
  
  #write the Rscript that we want to run
  rscript_fn <- here("data","mri", "rsa", "searchlight", sprintf("first_level_%dvox_radius", radius), "run_searchlight.R")
  con <- file(rscript_fn)
  open(con, "w")
  writeLines(c(
    "\n# handle input",
    "args = commandArgs()",
    "i <- as.numeric(args[length(args)])",
    "\n#load required packages",
    noquote(sprintf('lib_dir <- "%s"',"/data/pt_02261/virtem/virtem_code/R3.6.1/library/Linux")),
    '.libPaths(c(lib_dir,.libPaths()))',
    'lapply(c("oro.nifti", "assertthat", "dplyr", "neurobase", "DescTools"), library, character.only = TRUE)',
    "\n# read the data and transform ROI column back to list",
    noquote(sprintf('fn <- "%s"',fn)),
    'func_dat_df <- read.table(fn, sep = ",", header = TRUE, stringsAsFactors = FALSE)',
    "\n#define the function to run motion GLM",
    func_def,
    "\n# run the function on one line of the data frame",
    "run_searchlight(in_df = func_dat_df[i,])"),con)
  close(con)
  
  # folder for condor output
  htc_dir <- here("htc_logs", "first_level_searchlight")
  if(!dir.exists(htc_dir)){dir.create(htc_dir, recursive = TRUE)}
  
  # write the submit script
  fn <- here("data","mri", "rsa", "searchlight", sprintf("first_level_%dvox_radius", radius), "run_searchlight.submit")
  con <- file(fn)
  open(con, "w")
  writeLines(c(
    "universe = vanilla",
    "executable = /afs/cbs.mpg.de/software/scripts/envwrap",
    "request_memory = 13500", # "request_memory = 13500", # works for 3 voxel radius & 40 chunks
    "notification = error"
    ),con)
    
    c <- 0
    for (i in 1:nrow(in_df)){
      if (!file.exists(file.path(in_df[i,]$out_path, 
                                 sprintf("%d_searchlight_all_pairs_vir_time_chunk%02d.nii.gz",
                                         as.numeric(in_df[i,]$subject), in_df[i,]$chunk)))){
        c <- c + 1
        
          writeLines(c(
          sprintf("\narguments = R+ --version 3.6.1 Rscript %s %d", rscript_fn, i),
          sprintf("log = %s/%d.log", htc_dir, i),
          sprintf("output = %s/%d.out", htc_dir, i),
          sprintf("error = %s/%d.err", htc_dir, i),
          sprintf("Queue\n")),con)
      }
    }
  close(con)
  
  # submit to condor and play the waiting game
  batch_id <- system(paste("condor_submit", fn), intern = TRUE)
  batch_id <- regmatches(batch_id[2], gregexpr("[[:digit:]]+", batch_id[2]))[[1]][2]
  cat(sprintf("submitted jobs (ID = %s) for 1st-level searchlights. Time to wait...", batch_id))
  pause_until_batch_done(batch_id = batch_id, wait_interval = 1800) # check every half hour if done 
  #system("condor_q")
}

Combine the chunks

Lastly, we need to combine the results across chunks to get the whole picture.

# analyses that we ran the searchlights for
searchlights <- c("same_day_vir_time", 
                  "diff_day_vir_time", 
                  "day_time_interaction", 
                  "all_pairs_vir_time"#, 
                  #"n_vox_sphere"
                  )

for (i_sub in subjects){
  
  # where are the chinks
  in_dir <- file.path(dirs$searchlight, sprintf("first_level_%dvox_radius", radius), i_sub, "chunks")
  
  for (i_srchlght in searchlights){
    
    # file names of the individual chunks
    in_files <- file.path(in_dir, sprintf("%d_searchlight_%s_chunk%02d.nii.gz", 
                                          as.numeric(i_sub), i_srchlght, 1:n_chunks))
    
    # load the first chunk image, this will serve as the basis for combining all chunks
    comb_nii <- readNIfTI(in_files[1], reorient = FALSE)
    
    for (i_chunk in 2:n_chunks){
      
      # load this chunk's nifti  
      new_nii <- readNIfTI(in_files[i_chunk], reorient = FALSE)
      
      # find where the data is (i.e. where the image is non-zero)
      #new_nii[is.na(new_nii)] <- 0
      coords <- which(new_nii !=0, arr.ind = TRUE)
      
      # the combined image should be 0 at all these coordinates
      if(!(sum(comb_nii[coords]==0) == nrow(coords))){
          stop("chunks overlap!")}
      
      # add this chunk's data to the combined nifti
      comb_nii[coords] <- new_nii[coords]
    }
    
    # make a simple plot to check that the output is shaped like a brain  
    fn <- file.path(dirs$searchlight, sprintf("first_level_%dvox_radius", radius), 
                    i_sub, sprintf("%s_%s", i_sub, i_srchlght))
    jpeg(file = paste0(fn,".jpg"), width = 1000, height = 1000, units = "px")
    ortho2(comb_nii, NA.x=TRUE)
    dev.off()
    
    # write nifti to file
    write_nifti(comb_nii, fn)
  }
}

7.3 Group-level searchlight analysis

After running the first level analysis, we want to run group-level statistics. Group-level stats will be run using permutation-based one sample t-tests as implemented by the sign-flipping procedure in FSL Randomise. This test will be run for all voxels in our field of view as well as in a mask comprising the aHPC and alEC to correct for multiple comparisons within our a priori regions of interest.

First, let’s define some parameters and folders for these analyses.

# searchlight radius (used in file names)
radius <- 3

# analyses that we ran the searchlights for
searchlights <- c("same_day_vir_time", 
                  "diff_day_vir_time", 
                  "day_time_interaction", 
                  "all_pairs_vir_time"
                  )

# what smoothing to apply
FWHM = 3 # in mm
sigma = FWHM/2.3548 # fslmaths needs sigma not FWHM of kernel

# Output folder
invisible(lapply(file.path(dirs$data4analysis, "searchlight_results",
                           searchlights), 
                 function(x) if(!dir.exists(x)) dir.create(x, recursive=TRUE)))

# folder for condor output
htc_dir <- here("htc_logs", "srchlght")
if(!exists(htc_dir)){dir.create(htc_dir, recursive = TRUE)}

Move first-level images to MNI space

The resulting t-maps were registered to MNI space for group level statistics and spatially smoothed (FWHM 3mm).

The first level searchlight output is in each participant’s common functional space. For group-level stats we need to register the searchlight outputs to MNI (1mm) space.

# name of transformation matrix file to move from the shared functional space to MNI 1mm
func2standard <- here("data", "mri", "processed", "wholebrain", 
                      paste0("VIRTEM_P", rep(subjects, each = length(searchlights)), ".feat"),
                      "reg", "example_func2standard.mat")

# searchlight result in whole-brain functional space
in_nii_fn <- file.path(dirs$searchlight, sprintf("first_level_%dvox_radius", radius), 
                       rep(subjects, each = length(searchlights)),
                       sprintf("%s_%s.nii.gz", rep(subjects, each = length(searchlights)), searchlights))

# output folder & file name in MNI space
invisible(lapply(file.path(dirs$searchlight, sprintf("mni_%dvox_radius", radius), 
                           searchlights, "3d"), 
                 function(x) if(!dir.exists(x)) dir.create(x, recursive=TRUE)))
out_nii_fn <- file.path(dirs$searchlight, sprintf("mni_%dvox_radius", radius),
                        rep(searchlights,n_subs), "3d",
                        sprintf("%s_%s.nii.gz", 
                                rep(subjects, each = length(searchlights)), searchlights))

# apply FSL flirt to move from whole-brain functional space to 1mm MNI space
invisible(mapply(flirt_apply,
                 infile = in_nii_fn, 
                 reffile = mni_fname("1"),
                 initmat = func2standard,
                 outfile = out_nii_fn,
                 verbose = FALSE, retimg = FALSE))

Merge and smooth first-level images

Now that all first-level images are in MNI space, we are ready to concatenate the images across participants. Then, we subject them to Gaussian smoothing.

# field of view mask
fov_mask <- file.path(dirs$mask_dir, "fov", "fov_mask_mni.nii.gz")

# open the task list
fn <- file.path(htc_dir, "smooth_searchlights_tasklist.txt")
con <- file(fn)
open(con, "w")
  
for (i_srchlght in 1:length(searchlights)){
    
  # file names of files to merge
  in_files <- file.path(dirs$searchlight, sprintf("mni_%dvox_radius", radius),
                        searchlights[i_srchlght], "3d",
                        sprintf("%s_%s.nii.gz", subjects,searchlights[i_srchlght]))
  
  # file name of 4D output file
  fn_4d <- file.path(dirs$searchlight, sprintf("mni_%dvox_radius", radius),
                     searchlights[i_srchlght], sprintf("%s_4d.nii.gz", 
                                                       searchlights[i_srchlght]))
    
  # concatenate the images
  fslmerge(infiles = in_files, direction = "t", outfile = fn_4d, retimg = FALSE, verbose = FALSE)
    
    
  # name of smoothed output file
  fn_4d_smooth <- file.path(dirs$data4analysis, "searchlight_results",
                            searchlights[i_srchlght],
                            sprintf("%s_4d_smooth_fwhm%d.nii.gz", 
                                    searchlights[i_srchlght], FWHM))
  
  # write smoothing command to file
  writeLines(sprintf("fslmaths %s -s %f -mas %s %s", 
                     fn_4d, sigma, fov_mask, fn_4d_smooth), con)
}
close(con)

# submit to cluster
cmd <- sprintf("fsl_sub -T 30 -t %s -l %s -M bellmund@cbs.mpg.de -N smooth_srchlghts", fn, htc_dir)
batch_id <- system(cmd, intern = TRUE)

pause_until_batch_done(batch_id = batch_id, wait_interval = 300)

Run FSL Randomise

Now we are ready to run FSL Randomise.

Group level statistics were carried out using random sign flipping implemented with FSL Randomise and threshold-free cluster enhancement. We corrected for multiple comparisons using a small volume correction mask including our a priori regions of interest, the anterior hippocampus and the anterior-lateral entorhinal cortex.

# masks to use
gm_mask_fn <- file.path(dirs$mask_dir, "gray_matter", "gray_matter_mask.nii.gz")
svc_mask_fn <- file.path(dirs$mask_dir, "svc", "svc_mask.nii.gz")

# open the tasklist
fn <- file.path(htc_dir, "randomise_searchlights_tasklist.txt")
con <- file(fn)
open(con, "w")

for (i_srchlght in 1:length(searchlights)){
  
  # do we want to look at the positive or negative contrast?
  if (any(searchlights[i_srchlght] == c("same_day_vir_time", "day_time_interaction"))){
    test_side = ""
  } else {
    
    # we want to test for a negative effect for these searchlights
    test_side = "_neg"
    
    # multiply by -1 to get the negative contrast 
    orig_file <- file.path(dirs$data4analysis, "searchlight_results",
                           searchlights[i_srchlght], 
                           sprintf("%s_4d_smooth_fwhm%d.nii.gz", 
                                   searchlights[i_srchlght], FWHM))
    mul_neg1_fn <- file.path(dirs$data4analysis, "searchlight_results",
                             searchlights[i_srchlght],
                             sprintf("%s%s_4d_smooth_fwhm%d.nii.gz", 
                                     searchlights[i_srchlght], test_side, FWHM))
    fslmaths(file = orig_file, outfile = mul_neg1_fn, opts = "-mul -1")
  }
  
  # 4D input image to run randomise on
  in_fn <- file.path(dirs$data4analysis, "searchlight_results",
                     searchlights[i_srchlght],
                     sprintf("%s%s_4d_smooth_fwhm%d.nii.gz", 
                             searchlights[i_srchlght], test_side, FWHM))
    
  # output file name for FOV
  out_fn <- file.path(dirs$data4analysis, "searchlight_results",
                      searchlights[i_srchlght], 
                      sprintf("%s%s_randomise_fov_fwhm%d",
                              searchlights[i_srchlght], test_side, FWHM))
  
  # define randomise command for full FOV and write to file
  writeLines(sprintf("randomise -i %s -o %s -1 -T --uncorrp -m %s -n 5000",
                     in_fn, out_fn, gm_mask_fn),con)
  
  # output file name for SVC
  out_fn <- file.path(dirs$data4analysis, "searchlight_results",
                      searchlights[i_srchlght], 
                      sprintf("%s%s_randomise_svc_fwhm%d", 
                              searchlights[i_srchlght], test_side, FWHM))
  
  # define randomise command for small-volume correction and and write to file
  writeLines(sprintf("randomise -i %s -o %s -1 -T --uncorrp -m %s -n 5000",
                     in_fn, out_fn, svc_mask_fn),con)
}
close(con)

# submit to cluster
cmd <- sprintf("fsl_sub -T 300 -t %s -l %s -M bellmund@cbs.mpg.de -N randomise_srchlghts", fn, htc_dir)
batch_id <- system(cmd, intern = TRUE)

pause_until_batch_done(batch_id = batch_id, wait_interval = 600)

Find searchlight clusters and atlas labels

Next, we do an automated search in the Randomise results. This is done via FSL cluster and the result is stored in a text file. Then, we add atlas labels based on the Harvard-Oxford Cortical/Subcortial Structural Atlas using FSL atlasquery.

Here is a function that runs the atlasquery command and removes some excess characters around the return string.

find_FSL_atlas_label <- function(atlas = "Harvard-Oxford Cortical Structural Atlas", x=-20, y=-3, z=-26){
  
  # build FSL atlasquery command and send to command line
  cmd<- sprintf('atlasquery -a "%s" -c %s', atlas,sprintf("%f,%f,%f",x,y,z))
  cmd<-sprintf("%s | sed 's/<b>//g' | sed 's/<\\/b><br>/\\,/g'", cmd) # based on FSL autoaq (l.106)
  string <- system(cmd,intern=TRUE)
  
  # remove atlas name
  label <- stringr::str_remove(string, sprintf("%s,", atlas))
  
  return(label)
}

We extract clusters that are significant at p<0.05 after correcting for multiple comparisons using small volume correction. To explore the searchlight results beyond the hippocampal-entorhinal region, we look for clusters at a threshold of p<0.001 uncorrected with a minimum extent of 30 voxels.

We corrected for multiple comparisons using a small volume correction mask including our a priori regions of interest, the anterior hippocampus and the anterior-lateral entorhinal cortex. Further, we used a liberal threshold of puncorrected<0.001 to explore the data for additional effects within our field of view. Exploratory searchlight results are shown in Supplemental Figure 9 and clusters with a minimum extent of 30 voxels are listed in Supplemental Tables 9, 11 and 12.

for (i_srchlght in 1:length(searchlights)){
  
  # searchlight with positive or negative contrast?
  if (any(searchlights[i_srchlght] == c("same_day_vir_time", "day_time_interaction"))){
    test_side <- ""
  } else {
    test_side <- "_neg"
  }
  
  # file with t-values
  t_fn <- file.path(dirs$data4analysis, "searchlight_results",
                    searchlights[i_srchlght], 
                    sprintf("%s%s_randomise_fov_fwhm%d_tstat1.nii.gz",
                            searchlights[i_srchlght], test_side, FWHM))

  # SVC-CORRECTED CLUSTERS
  # file with corrected p-values based on SVC
  corrp_fn <- file.path(dirs$data4analysis, "searchlight_results",
                          searchlights[i_srchlght], 
                          sprintf("%s%s_randomise_svc_fwhm%d_tfce_corrp_tstat1.nii.gz",
                                  searchlights[i_srchlght], test_side, FWHM))
  
  # output text file for clusters significant after small volume correction
  cluster_fn <- file.path(dirs$data4analysis, "searchlight_results",
                                searchlights[i_srchlght], 
                                sprintf("cluster_%s%s_svc_corrp.txt",
                                        searchlights[i_srchlght], test_side, FWHM))
  cmd <- sprintf('cluster -i %s -t 0.95 -c %s --mm > %s', 
                 corrp_fn, t_fn, cluster_fn)
  system(cmd)
  
  # read the cluster file
  cluster_df <- readr::read_tsv(cluster_fn, 
                              col_types = c("dddddddddd____"))
  colnames(cluster_df) <- c("cluster_index", "n_voxel", "max_1minusp",
                            "x", "y", "z", "cog_x", "cog_y", "cog_z", "t_extr")
  
  # add columns for atlas label info
  cluster_df <- cluster_df %>% 
    tibble::add_column(Harvard_Oxford_Cortical = NA, .before = "cluster_index") %>%
    tibble::add_column(Harvard_Oxford_Subcortical = NA, .before = "cluster_index")
    
  # get atlas label info
  if (nrow(cluster_df)>0){
    for (i in 1:nrow(cluster_df)){
      cluster_df$Harvard_Oxford_Cortical[i] <- find_FSL_atlas_label(
        atlas = "Harvard-Oxford Cortical Structural Atlas", 
        x=cluster_df$x[i], y=cluster_df$y[i], z=cluster_df$z[i])
      
      cluster_df$Harvard_Oxford_Subcortical[i] <- find_FSL_atlas_label(
        atlas = "Harvard-Oxford Subcortical Structural Atlas", 
        x=cluster_df$x[i], y=cluster_df$y[i], z=cluster_df$z[i])
    }
  }
  
  # write to new file
  fn <- file.path(dirs$data4analysis, "searchlight_results",
                  searchlights[i_srchlght], 
                  sprintf("cluster_%s%s_svc_corrp_atlas.txt",
                          searchlights[i_srchlght], test_side, FWHM))
  write_tsv(cluster_df, path=fn)
  
  # FOV CLUSTERS AT p<0.001 UNCORRECTED
  # file with uncorrected p-values in entire FOV
  uncorrp_fn <- file.path(dirs$data4analysis, "searchlight_results",
                          searchlights[i_srchlght], 
                          sprintf("%s%s_randomise_fov_fwhm%d_tfce_p_tstat1.nii.gz",
                                  searchlights[i_srchlght], test_side, FWHM))
  # output text file for clusters significant after small volume correction
  cluster_fn <- file.path(dirs$data4analysis, "searchlight_results",
                                searchlights[i_srchlght], 
                                sprintf("cluster_%s%s_fov_uncorrp.txt",
                                        searchlights[i_srchlght], test_side, FWHM))
  cmd <- sprintf('cluster -i %s -t 0.999 -c %s --mm --minextent=30 > %s', 
                 uncorrp_fn, t_fn, cluster_fn)
  system(cmd)
  
  # read the cluster file
  cluster_df <- readr::read_tsv(cluster_fn, 
                              col_types = c("dddddddddd____"))
  colnames(cluster_df) <- c("cluster_index", "n_voxel", "max_1minusp",
                            "x", "y", "z", "cog_x", "cog_y", "cog_z", "t_extr")
  
  # add columns for atlas label info
  cluster_df <- cluster_df %>% 
    tibble::add_column(Harvard_Oxford_Cortical = NA, .before = "cluster_index") %>%
    tibble::add_column(Harvard_Oxford_Subcortical = NA, .before = "cluster_index")
    
  # get atlas label info
  for (i in 1:nrow(cluster_df)){
    cluster_df$Harvard_Oxford_Cortical[i] <- find_FSL_atlas_label(
      atlas = "Harvard-Oxford Cortical Structural Atlas", 
      x=cluster_df$cog_x[i], y=cluster_df$cog_y[i], z=cluster_df$cog_z[i])
    
    cluster_df$Harvard_Oxford_Subcortical[i] <- find_FSL_atlas_label(
      atlas = "Harvard-Oxford Subcortical Structural Atlas", 
      x=cluster_df$cog_x[i], y=cluster_df$cog_y[i], z=cluster_df$cog_z[i])
  }
  
  # write to new file
  fn <- file.path(dirs$data4analysis, "searchlight_results",
                  searchlights[i_srchlght], 
                  sprintf("cluster_%s%s_fov_uncorrp_atlas.txt",
                          searchlights[i_srchlght], test_side, FWHM))
  write_tsv(cluster_df, path=fn)
}

Create outlines of significant clusters

To later show which voxels survive corrections for multiple comparisons based on our small-volume correction, we create an outline of the clusters surviving at corrected p<0.05. We do this by dilating a binarized mask of this effect with a spherical kernel with a radius of 2mm.

voxels within black outline are significant after correction for multiple comparisons using small volume correction

Same Day Event Pairs

# to create an outline of the significant cluster, we threshold the small-volume corrected p-image
# at 0.95 (i.e. 1-0.05) and binarize it. Then we dilate it using a spherical kernel.
corrpsvc_fn <- file.path(dirs$data4analysis, "searchlight_results", "same_day_vir_time",
                     "same_day_vir_time_randomise_svc_fwhm3_tfce_corrp_tstat1.nii.gz")
outl_fn <- file.path(dirs$data4analysis, "searchlight_results", "same_day_vir_time",
                     "same_day_vir_time_randomise_svc_fwhm3_tfce_corrp_outline.nii.gz")
fslmaths(file=corrpsvc_fn,outfile = outl_fn, opts = "-thr 0.95 -bin", 
         retimg = FALSE, verbose = FALSE)
outl_nii <- fslmaths(file=outl_fn, outfile = outl_fn, 
                     opts = sprintf("-kernel sphere 2 -dilM -sub %s", outl_fn),
                     verbose = FALSE)

Sequence-Time Interaction

# to create an outline of the significant cluster, we threshold the small-volume corrected p-image
# at 0.95 (i.e. 1-0.05) and binarize it. Then we dilate it using a spherical kernel.
corrpsvc_fn <- file.path(dirs$data4analysis, "searchlight_results","day_time_interaction",
                         "day_time_interaction_randomise_svc_fwhm3_tfce_corrp_tstat1.nii.gz")
outl_fn <- file.path(dirs$data4analysis, "searchlight_results","day_time_interaction",
                     "day_time_interaction_randomise_svc_fwhm3_tfce_corrp_outline.nii.gz")
fslmaths(file=corrpsvc_fn,outfile = outl_fn, opts = "-thr 0.95 -bin", 
         retimg = FALSE, verbose = FALSE)
outl_nii <- fslmaths(file=outl_fn, outfile = outl_fn, 
                     opts = sprintf("-kernel sphere 2 -dilM -sub %s", outl_fn), 
                     verbose = FALSE)

7.4 Prepare RSA in searchlight peak

To show that overlapping clusters of voxels drive both the within- and across sequence effects, we will run the across-sequence analysis in the cluster of voxels that show the within-day effect. Because these are independent comparisons, we can use the within-day searchlight to define a region of interest for the across-day analysis.

To test whether within- and across-sequence representations overlap, we defined an ROI based on the within-sequence searchlight analysis. Specifically, voxels belonging to the cluster around the peak voxel, thresholded at p<0.01 uncorrected within our small volume correction mask, were included. The analysis of representational change was then carried out as described for the other ROIs above.

Create the ROI mask

The first steps are to prepare the ROI mask we want to use. Thus, we need to threshold the ROI from the main analysis in MNI space and move the resulting mask to each participant’s functional space for the analysis.

# threshold the searchlight results
in_fn <- file.path(dirs$data4analysis, "searchlight_results", "same_day_vir_time",
              "same_day_vir_time_randomise_svc_fwhm3_tfce_p_tstat1.nii.gz")
svc_fn <- file.path(dirs$mask_dir, "svc", "svc_mask.nii.gz")
bin_fn <- here("data", "mri", "rois", "mni_masks", "searchlight_same-day_svc.nii.gz")
fsl_thresh(file = in_fn, outfile = bin_fn, thresh = 0.99, opts = sprintf("-bin -mas %s", svc_fn),
           verbose = FALSE, retimg = FALSE)

# peak cluster is in left hemisphere, so don't include any voxels in right hemisphere
lh_fn <- here("data", "mri", "rois", "mni_masks", "left_hemi.nii.gz")
fsl_maths(file=bin_fn, outfile = lh_fn, opts = "-mul 0 -add 1 -roi 91 182 0 218 0 182 0 1",
          retimg = FALSE, verbose = FALSE)
fsl_mul(file=bin_fn, file2=lh_fn, outfile=bin_fn,
        retimg = FALSE, verbose = FALSE)

# let's have a look at the mask we created
mni_nii <- readNIfTI2(mni_fname("1"))
roi_nii <- readNIfTI2(bin_fn)
coords <- c(statip::mfv1(which(roi_nii==1, arr.ind = TRUE)[,1]), 
            statip::mfv1(which(roi_nii==1, arr.ind = TRUE)[,2]),
            statip::mfv1(which(roi_nii==1, arr.ind = TRUE)[,3]))
ortho2(mni_nii, y = roi_nii, xyz = coords, add.orient = TRUE)

# check the number of voxels in this ROI
sum(c(roi_nii))

The resulting ROI mask is now coregistered from MNI 1mm space to the analysis space of the wholebrain functional sequence. Finally, it is thresholded at a probability of 0.5.

samespace_dir <- here("data", "mri", "rois", "same-day_searchlight", "samespace")
if (!dir.exists(samespace_dir)){dir.create(samespace_dir, recursive = TRUE)}

# name of transformation matrix file to move from highres to functional space
standard2func <- here("data", "mri", "processed", "wholebrain", 
                      paste0("VIRTEM_P", subjects, ".feat"),
                      "reg", "standard2example_func.mat")

# use the mean EPI of wholebrain image as a reference
mean_epi <- here("data", "mri", "processed", "samespace", paste0("VIRTEM_P", subjects),
                 paste0("VIRTEM_P", subjects, "_wholebrain.nii.gz"))

# define output files in samespace (ss) = analysis space based on the wholebrain EPI
roi_ss <- file.path(samespace_dir, sprintf("P%s_%s_ss.nii.gz", 
                                           subjects, "same-day_searchlight"))

# apply FSL flirt to move ROI from standard to wholebrain functional space
invisible(mapply(flirt_apply, infile = bin_fn, reffile = mean_epi, 
                 initmat = standard2func, outfile = roi_ss,
                 verbose = FALSE, retimg = FALSE))

# use fslmaths to binarize the masked ROIs using a threshold of 0.5
out <- mapply(fsl_thresh, file = roi_ss, outfile = roi_ss, thresh = 0.5, opts = "-bin",
              verbose = FALSE, retimg = FALSE)

Calculating the correlation matrices

The following steps are analogous to the preparation of the functional data in the ROI and searchlight analyses. For the main searchlight analyses we already cleaned the voxel-wise time series and extracted the volumes relevant to RSA. Thus, to calculate the correlation matrices and to calculate pattern similarity changes, we fall back onto the scripts from the main ROI analyses and the searchlight.

# for all 10x10 comparisons we will be averaging all comparisons apart from the diagonal
# to exclude same_block comparisons
no_diag <- matrix(data = TRUE, nrow=10, ncol = 10)
diag(no_diag)<- FALSE

out_dir <- here("data", "mri","rsa","correlation_matrices", "same-day_searchlight")
if (!dir.exists(out_dir)){dir.create(out_dir, recursive = TRUE)}

for (i_sub in subjects){
  
  # load the ROI based on the searchlight
  roi_fn <-file.path(samespace_dir, sprintf("P%s_%s_ss.nii.gz", 
                                            i_sub, "same-day_searchlight"))
  roi_nii <- readNIfTI(roi_fn, reorient=FALSE)
  
  # array indices of ROI voxels
  roi_vox <- which(roi_nii == 1, arr.ind=TRUE)
  sprintf("%s: %d voxels\n", i_sub, nrow(roi_vox))
  
  for (i_run in 1:n_runs){
    
    # load the relevant functional volumes
    rel_vol_fn <- file.path(dirs$searchlight, "rel_vols_4D", 
                                  sprintf("%s_run%02d_rel_vols.nii.gz", i_sub, i_run))
    func_nii <- readNIfTI(rel_vol_fn, reorient=FALSE)
    
    # get the ROI voxels
    rel_dat <- array(NA, c(n_pics*n_blocks, nrow(roi_vox)));  # images in rows (ROI voxels), voxels in columns
    for (i in 1:nrow(roi_vox)) {   # i <- 1
          
        curr_vox <- func_nii[roi_vox[i,1], roi_vox[i,2], roi_vox[i,3],]
        if (sd(curr_vox) > 0) {rel_dat[,i] <- curr_vox} else { stop("zero variance voxel")}
     }

    # data is in repetition (row) by voxel (col) format, so we transpose 
    # to get a voxel x repetition format
    rel_dat <- t(rel_dat)
    
    # calculate correlation matrix (trial by trial) for pre and post run
    cor_mat_trial <- cor(rel_dat, rel_dat)

    # initialize condition by condition correlation matrix for pre and post run
    corr_mat <- matrix(nrow = 20, ncol = 20)
    
    # loop over all picture comparisons
    for(i_pic1 in 1:20){
      for(i_pic2 in 1:20){
        
        # extract the current 10x10 correlation matrix
        i1 <- (1+(i_pic1-1)*10):(i_pic1*10)
        i2 <- (1+(i_pic2-1)*10):(i_pic2*10)
        curr_mat <- cor_mat_trial[i1, i2]

        # average the correlations while excluding diagonal (same block comparisons)
        corr_mat[i_pic1, i_pic2] <- mean(curr_mat[no_diag])
      }
    }
    
    # save the correlation matrix
    fn <- file.path(out_dir, sprintf("%s_%s_%s_corr_mat.txt", 
                                     i_sub, "same-day_searchlight", runs[i_run]))
    write.table(corr_mat, fn, append = FALSE, sep = ",",
                dec = ".", row.names = FALSE, col.names = FALSE)
  }
}

Calculate Pattern Similarity Change

In the next step, we calculate how the correlation between patterns change from the first to the second picture viewing task, i.e. through the day learning task. To do this, we load both correlation matrices and reduce them to the upper triangle, excluding the diagonal. Then, the correlations are Fisher Z-transformed and the similarity values from the pre-learning picture viewing task are subtracted from those of the post-learning picture viewing task to isolate pattern similarity change. The correlations and their changes are then saved together with information about the pictures that were compared.

This part is based on the corresponding script from the main ROI analyses.

in_dir <- here("data", "mri","rsa","correlation_matrices", "same-day_searchlight")
out_dir <- here("data", "mri","rsa","pattern_similarity_change", "same-day_searchlight")
if (!dir.exists(out_dir)){dir.create(out_dir, recursive = TRUE)}

for(i_sub in subjects){

  # load pre correlation matrix
  fn <- file.path(in_dir, sprintf("%s_%s_pre_corr_mat.txt", 
                                  i_sub, "same-day_searchlight"))
  corr_mat_pre <- read.table(fn, sep = ",", dec = ".")    
  
  # load post correlation matrix
  fn <- file.path(in_dir, sprintf("%s_%s_post_corr_mat.txt", 
                                  i_sub, "same-day_searchlight"))
  corr_mat_post <- read.table(fn, sep = ",", dec = ".")
  
  # reduce to upper triangle of correlations (without diagonal)
  pre_corrs <- corr_mat_pre[upper.tri(corr_mat_pre, diag = FALSE)]
  post_corrs <- corr_mat_post[upper.tri(corr_mat_post, diag = FALSE)]
  
  # create a tibble with the correlations 
  pics <- which(upper.tri(corr_mat_post), arr.ind=TRUE)
  corr_df <- tibble(pic1 = pics[,1], pic2 = pics[,2], pre_corrs, post_corrs)
  
  # Fisher Z transform correlations and calculate pattern similarity change
  # by subtracting the pre-correlations from the post-correlations
  corr_df$ps_change <- FisherZ(corr_df$post_corrs) - FisherZ(corr_df$pre_corrs)
  
  # write to file
  fn <- file.path(out_dir, sprintf("%s_%s_pattern_similarity_change.txt", 
                                   i_sub, "same-day_searchlight"))
  write.table(corr_df, fn, append = FALSE, sep = ",",
              dec = ".", row.names = FALSE, col.names = TRUE)
}

Combine behavioral and fMRI data for RSA

To create input for the RSA the next step is to combine the similarity change data with the behavioral data, so that we can do meaningful analyses. First the behavioral data is loaded and brought into the same pair-wise format as the similarity change data. Both datasets are combined for each subject and then written to disk.

This part is based on the corresponding script from the main ROI analyses.

in_dir <- here("data", "mri","rsa","pattern_similarity_change", "same-day_searchlight")
out_dir <- here("data", "mri","rsa","data_for_rsa")
if (!dir.exists(out_dir)){dir.create(out_dir, recursive = TRUE)}

for (i_sub in subjects){
  
  # load the behavioral data
  fn <- file.path(dirs$timeline_dat_dir, sprintf("%s_behavior_tbl_timeline.txt", i_sub))
  col_types_list <- cols_only(sub_id = col_character(), day = col_factor(), 
                              event = col_integer(), pic = col_integer(),
                              virtual_time = col_double(), real_time = col_double(),
                              memory_time = col_double(), memory_order = col_double(), 
                              sorted_day = col_integer())
  beh_dat <- read_csv(fn, col_types = col_types_list)
  
  # sort behavioral data according to picture identity
  beh_dat_ordered <- beh_dat[order(beh_dat$pic),]
  
  # find the order of comparisons
  pairs <- which(upper.tri(matrix(nrow = 20, ncol = 20)), arr.ind=TRUE)
  
  # extract the data for the first and second picture in each pair from the behavioral data
  pic1_dat <- beh_dat_ordered[pairs[,1],]
  pic2_dat <- beh_dat_ordered[pairs[,2],]
  colnames(pic1_dat) <- paste0(colnames(pic1_dat), "1")
  colnames(pic2_dat) <- paste0(colnames(pic2_dat), "2")
  
  # combine the two tibbles
  pair_dat <- cbind(pic1_dat, pic2_dat)
  
  # reorder the tibble columns to make a bit more sense
  pair_dat <- pair_dat %>%
    select(sub_id1,
           day1, day2,
           pic1, pic2,
           event1, event2, 
           virtual_time1, virtual_time2, 
           real_time1, real_time2, 
           memory_order1, memory_order2, 
           memory_time1, memory_time2, 
           sorted_day1, sorted_day2) %>%
    rename(sub_id = sub_id1)
  
  rsa_dat <- tibble()

  # load the pattern similarity change data for this ROI
  fn <- file.path(in_dir, sprintf("%s_%s_pattern_similarity_change.txt", 
                                  i_sub, "same-day_searchlight"))
  ps_change_dat <- read.csv(fn)
  
  # make sure files have the same order
  assertthat::are_equal(c(pair_dat$pic1, pair_dat$pic2), c(ps_change_dat$pic1, ps_change_dat$pic2))
  
  # add column with ROI name
  ps_change_dat <- add_column(ps_change_dat, roi = "same-day_searchlight")

  # collect the data from this ROI and merge into long data frame
  roi_dat <- cbind(pair_dat, ps_change_dat[,3:6])
  rsa_dat <- rbind(rsa_dat, roi_dat)
  
  # write to file
  fn <- file.path(dirs$rsa_dat_dir, sprintf("%s_data_for_rsa_same-day_searchlight.txt",i_sub))
  write.table(rsa_dat, fn, append = FALSE, sep = ",",
              dec = ".", row.names = FALSE, col.names = TRUE)
}

Finally, the datasets are combined across subjects.

# set up a dataframe to collect the data
rsa_dat = tibble()

for (i_sub in subjects){

  # load data from CSV
  fn <- file.path(out_dir, sprintf("%s_data_for_rsa_same-day_searchlight.txt",i_sub))
  col_types_list <- cols_only(
        sub_id = col_character(),
        day1 = col_integer(), day2 = col_integer(),
        event1 = col_integer(), event2 = col_integer(),
        pic1 = col_integer(), pic2 = col_integer(),
        virtual_time1 = col_double(), virtual_time2 = col_double(),
        real_time1 = col_double(), real_time2 = col_double(),
        memory_time1 = col_double(), memory_time2 = col_double(), 
        memory_order1 = col_double(), memory_order2 = col_double(), 
        sorted_day1 = col_integer(), sorted_day2 = col_integer(),
        pre_corrs  = col_double(), post_corrs = col_double(),
        ps_change = col_double(), roi = col_factor()
  )
  sub_dat <- as_tibble(read_csv(fn, col_types = col_types_list))
  
  # append to table with data from all subjects
  rsa_dat <- bind_rows(sub_dat, rsa_dat)
}

# sort the data
rsa_dat <- rsa_dat[with(rsa_dat, order(sub_id, day1, day2, event1, event2)),]

# write to file
fn <- file.path(dirs$data4analysis, "rsa_data_in_same-seq_searchlight_peak.txt")
write.table(rsa_dat, fn, append = FALSE, sep = ",",
            dec = ".", row.names = FALSE, col.names = TRUE)