4 Prepare behavioral data
As a first step, we create a design table for each participant that collects information about the assignment of events to virtual days and when the events occur with respect to virtual time and real time (seconds).
# define the function
<- function(subjects = c("031", "032")){
virtem_behavior_prepare_design_tbl
# load design file with virtual time
<- here("data", "behavior", "design", "daysTime.txt")
fname <- read.csv(fname, header = FALSE)
virtual_time <- c(t(as.matrix(virtual_time)))
virtual_time
# load design file with times of events in actual time in seconds (after day start) --> real time
<- here("data", "behavior", "design", "daysTimeS.txt")
fname <- read.csv(fname, header = FALSE)
real_time <- c(t(as.matrix(real_time)))
real_time
for (i_sub in 1:length(subjects)){
<- subjects[i_sub]
sub_id
# initialize the tibble
<- tibble(sub_id = rep(sub_id, n_days * n_events_day),
design_tbl day=c(rep(1,5),rep(2,5),rep(3,5),rep(4,5)),
event=rep(1:n_events_day,4), virtual_time,
pic = numeric(n_days * n_events_day))
real_time,
# load design file to get stimulus assignments
<- here("data", "behavior", "logs", "input_files", sprintf("P%s_input.txt", sub_id))
fname <- read.table(fname)
input_data
# extract the trials where pics were presented
= input_data[,5] == 1; # picture trial if column 5 is a 1
pic_trial_idx <- input_data[pic_trial_idx,]
pic_trials
# assign the 5 picture IDs of each day to the respective days
for (i_day in 1:n_days){
<- pic_trials[pic_trials[,3]==i_day,6] # all repetitions for this day
all_reps $pic[design_tbl$day==i_day] <- all_reps[1:n_events_day] # extract IDs for first rep
design_tbl
# check assumption that IDs are correct is met
if (!identical(all_reps, rep(all_reps[1:n_events_day],7))){
stop("error when finding picture IDs for subject ", sub_id)
}
}
# sanity check: compare to Lorena's day order
#fname <- here("data", "behavior", "design", sprintf("P%s_daysOrder.txt", sub_id))
#days_order <- read.csv(fname, header = FALSE)
#days_order <- c(t(as.matrix(days_order)))
#stopifnot(all(days_order == design_tbl$pic))
# write design tibble to file for this subject
= here("data", "behavior", "design_tbl")
out_dir if(!dir.exists(out_dir)){dir.create(out_dir)}
write_csv(design_tbl, file.path(out_dir, sprintf("%s_design_tbl.txt", sub_id)))
}
}
# call the function to get the design tibble for each subject and write it to file
virtem_behavior_prepare_design_tbl(subjects = subjects)
4.1 Sorting task
Here is a description of the task from the methods:
The day sorting task (Figure 1D) was performed in front of a computer screen. The 20 event images from the day learning task were presented on the screen in a miniature version. They were arranged in a circle around a central area displaying 4 rectangles. Participants were instructed to drag and drop all events of the same sequence into the same rectangle with a computer mouse. Participants freely chose which rectangle corresponded to which sequence as the sequences were not identifiable by any label and were presented in differing orders across mini-blocks during learning.
Thus, in analysis, we take the grouping as provided by the rectangles and assign the four groups of events to the four days in a way that maximizes the overlap between actual days and sorted days. This may be trivial if performance is perfect or very high, but if several scenes were mixed up between days, it can become difficult to ascertain which day participants were trying to re-assemble. The best fitting permutation is written to file.
For analysis of the sorting task, we took the grouping of event images as provided by the participants and assigned them to the four sequences to ensure maximal overlap between actual and sorted sequence memberships. While the assignment of groupings to sequences is unambiguous when performance is, as in our sample, high, this procedure is potentially liberal at lower performance levels. We then calculated the percentage of correctly sorted event images for each participant, see the raincloud plot(100) in Figure 2A.
# define function to prepare the data from the day sorting task
<- function(subjects = c("036", "037")){
virtem_behavior_prepare_data_day_sorting
# initialize
= numeric(length(subjects))
n_correct = here("data", "behavior", "day_sorting")
out_dir if(!dir.exists(out_dir)){dir.create(out_dir)}
for (i_sub in 1:length(subjects)){
<- subjects[i_sub]
sub_id
# load design file with true assignment of stimuli to days
<- here("data", "behavior", "design_tbl", sprintf("%s_design_tbl.txt", sub_id))
fname <- c("sub_id" = "c", "day"="n", "event"="n", "pic"="n")
col_classes <- as_tibble(read_csv(fname, col_types = col_classes))
design_tbl
# load the data from the day sorting task and store what we need
<- here("data", "behavior", "logs", "day_sorting",
fname sprintf("%s_rectangles_results.mat", sub_id))
<- readMat(fname)
sorting_log <- sorting_log$objectPositions
obj_pos
# extract quadrant for each image
<- numeric(n_days*n_events_day)
pic_q for (i_pic in 1:nrow(obj_pos)){
# coordinates of current image
<- obj_pos[i_pic,1]
curr_x <- obj_pos[i_pic,2]
curr_y
if (curr_x < 0 && curr_y > 0){ # x smaller than 0, y bigger than 0 --> quadrant 1
<- 1
pic_q[i_pic]
} else if (curr_x > 0 && curr_y > 0){ # x bigger than 0, y bigger than 0 --> quadrant 2
<- 2
pic_q[i_pic]
}else if (curr_x < 0 && curr_y < 0){ # x smaller than 0, y smaller than 0 --> quadrant 3
<- 3
pic_q[i_pic]
}else if (curr_x > 0 && curr_y < 0){ # x bigger than 0, y smaller than 0 --> quadrant 4
<- 4
pic_q[i_pic]
}else {stop("Error, can't sort!")}
}
# match quadrant number to days for all possible combinations
# surely there must be a better way to get all combinations as a matrix?!
<- t(array(unlist(permn(1:4)), dim = c(4, 24)))
all_perms <- numeric(nrow(all_perms))
n_matches
for (i_perm in 1:nrow(all_perms)){
# get vector with pic-day assignment for this permutation
<-numeric(20)
comb_to_test for (i_day in 1:n_days){
$pic[design_tbl$day == all_perms[i_perm, i_day]]] <- i_day
comb_to_test[design_tbl
}
# count the number of matches
<- sum(comb_to_test == pic_q)
n_matches[i_perm]
}
# find the best permutation and the number of hits
<- which.max(n_matches)
winner_perm <- max(n_matches)
n_correct[i_sub]
# sort the behavioral responses in the same way as the design table
<- 1:20 # this works because the data in the sorting task logfile follows the picture number
to_sort <- match(design_tbl$pic,to_sort)
sort_idx stopifnot(design_tbl$pic == to_sort[sort_idx]) # make sure we have identical vectors now
<- pic_q[sort_idx]
sorted_pic_q
# store which pic was sorted to which day based on winning permutation
$sorted_day <- numeric(n_days*n_events_day)
design_tblfor (i_day in 1:n_days){
# recode the day labels to match the day labels in the design tibble
<- sorted_pic_q == i_day
idx $sorted_day[idx] <- all_perms[winner_perm,i_day]
design_tbl
}stopifnot(sum(design_tbl$day == design_tbl$sorted_day) == n_correct[i_sub])
# write design tibble to file for this subject
write_csv(design_tbl,
file.path(out_dir,sprintf("%s_behavior_tbl_day_sorting.txt", sub_id)))
}
}
# call the function for all subjects
virtem_behavior_prepare_data_day_sorting(subjects = subjects)
4.2 Timeline task
In this task, participants saw a timeline ranging from 6 a.m. to midnight together with miniature versions of the five event images belonging to one sequence (Figure 1E). Participants were instructed to drag and drop the event images next to the timeline so that scene positions reflected the event times they had inferred in the day learning task. To facilitate precise alignment to the timeline, event images were shown with an outward pointing triangle on their left side, on which participants were instructed to base their responses.
Participants responses are read out from the logfiles of this task and converted to virtual hours. The data are saved in the text file including all behavioral data (virtem_behavioral_data.txt).
# define the function to prepare data from the timeline task
<- function(subjects = c("036", "037", "039")){
virtem_behavior_prepare_data_timeline
# initialize
= here("data", "behavior", "timeline")
out_dir if(!dir.exists(out_dir)){dir.create(out_dir)}
for (i_sub in 1:length(subjects)){
<- subjects[i_sub]
sub_id
# load design file with true assignment of stimuli to days (& data from day sorting)
<- here("data", "behavior", "day_sorting",
fname sprintf("%s_behavior_tbl_day_sorting.txt", sub_id))
<- c("sub_id" = "c", "day"="n", "event"="n", "virtual_time" = "d",
col_classes "real_time" = "d", "pic"="n", "sorted_day" = "d")
<- as_tibble(read_csv(fname, col_types = col_classes))
design_tbl
# add column for remembered time based on timeline task
$memory_time <- numeric(n_days * n_events_day)
design_tbl
for (i_day in 1:n_days){
# load the data from the timeline task from this day
<- here("data", "behavior", "logs", "timeline",
fname sprintf("%s_timeline_results_day%d.mat", sub_id, i_day))
<- readMat(fname)
timeline_log
for (i_pic in 1:n_events_day){
# get the number of this picture
<- timeline_log$ud[[2]][1+(i_pic-1)*2]
pic_name <- as.numeric(str_extract(pic_name, "\\d{1,2}"))
pic
# which row in our table are we looking at
<- which(design_tbl$pic == pic)
tbl_idx
# extract the response from the logfile and transform it to be in virtual hours
$memory_time[tbl_idx] <- (timeline_log$objectPositions[i_pic,2]+0.5)*18+6
design_tbl
}
}
# store the remembered order for each virtual day based on remembered times
<- design_tbl %>%
design_tbl group_by(day) %>%
mutate(
memory_order = rank(memory_time, ties.method = "first"))
# write design tibble to file for this subject
write_csv(design_tbl,
file.path(out_dir, sprintf("%s_behavior_tbl_timeline.txt", sub_id)))
}
}
# run for all subjects
virtem_behavior_prepare_data_timeline(subjects = subjects)
Now we are ready to combine the data from the two memory tests into the final dataframe that we write to file for the actual analyses.
# define function
<- function(subjects = c("036", "037")){
virtem_behavior_prepare_data_combine_across_subjects
# set up a dataframe to collect the data
= tibble()
beh_data
for (i_sub in 1:length(subjects)){
<- subjects[i_sub]
sub_id
# load data from CSV
<- here("data", "behavior", "timeline",
fname sprintf("%s_behavior_tbl_timeline.txt", sub_id))
<- cols_only(
col_types_list sub_id = col_integer(),
day = col_integer(),
event = col_integer(),
pic = col_integer(),
virtual_time = col_double(),
real_time = col_double(),
memory_time = col_double(),
memory_order = col_double(),
sorted_day = col_integer())
<- as_tibble(read_csv(fname, col_types = col_types_list))
timeline_tbl
# append to table with data from all subjects
<- bind_rows(beh_data, timeline_tbl)
beh_data
}
# reorder to have a more intuitive order
<- beh_data[,c(1,2,3,6,4,5,8,9,7)]
beh_data
# write data to file
write_csv(beh_data, file.path(dirs$data4analysis, "behavioral_data.txt"))
}
# combine behavioral data frames across subjects
virtem_behavior_prepare_data_combine_across_subjects(subjects = subjects)
We have a similar dataframe from Nicole Montijn’s study conducted at Utrecht University that we will later use to replicate the generalization bias. Let’s move it to the folder with the analysis data.
# copy to analysis data folder (that will be shared)
<- here("data", "behavior", "replication_data_montijn", "beh_dataNDM.txt")
fname file.copy(fname, dirs$data4analysis)
## [1] FALSE
4.3 Picture viewing tasks
In the picture viewing tasks (Figure 1B), participants viewed a stream of the event images. Their task was to look at the images attentively and to respond via button press whenever a target picture, which showed the father feeding the family’s dog, was presented.
Below, we check how well participants detected the targets.
<- tibble()
pvt_target_detect
for (i_run in 1:n_runs){
for (i_sub in 1:length(subjects)){
# load the logfile for this run (pre or post)
<- file.path(dirs$pvt_log_dir, sprintf('P%s_%svirtem.txt', subjects[i_sub], runs[i_run]))
log_fn <- read.table(log_fn)
log colnames(log) <- c("pic", "fix_start", "pic_start", "volume", "response", "RT", "trial_end")
# add column for block and subject ID to log
<- log %>%
log add_column(run = runs[i_run]) %>%
add_column(sub_id = subjects[i_sub])
# calculate proportion of hits and average RT
<- log %>%
curr_dat filter(pic == 21) %>%
summarise(sub_id = unique(sub_id),
run = unique(run),
perc_hits = sum(response)/nrow(.)*100,
avg_rt = mean(RT[response==1])) # calculate average RT for hits
# add to the overall tibble
<- rbind(pvt_target_detect, curr_dat)
pvt_target_detect
}
}
head(pvt_target_detect)
sub_id | run | perc_hits | avg_rt |
---|---|---|---|
031 | pre | 100 | 921 |
032 | pre | 90 | 993 |
033 | pre | 70 | 904 |
034 | pre | 100 | 948 |
035 | pre | 100 | 710 |
036 | pre | 100 | 747 |
# calculate mean and standard deviation across participants
<- pvt_target_detect %>%
pvt_target_detect_summary group_by(run) %>%
summarise(mean_perc_hits = mean(perc_hits),
sd_perc_hits = sd(perc_hits),
mean_rt = mean(avg_rt),
sd_rt = sd(avg_rt),
.groups="drop")
head(pvt_target_detect_summary)
run | mean_perc_hits | sd_perc_hits | mean_rt | sd_rt |
---|---|---|---|---|
post | 95.7 | 6.9 | 841 | 162 |
pre | 95.7 | 7.9 | 881 | 131 |
Target detection in picture viewing task:
Pre-learning: 95.71% ± 7.90% mean±standard deviation of percentage of hits; 881.34ms ± 131.43ms mean±standard deviation of average reaction times
Post-learning: 95.71% ± 6.90% mean±standard deviation of percentage of hits; 841.40ms ± 162.16ms mean±standard deviation of average reaction times