# script 00: read initial boilerplate data (for students)
# load required packages --------------------------------------------------
if (!requireNamespace("boilerplate", quietly = TRUE)) {
if (!requireNamespace("devtools", quietly = TRUE)) {
install.packages("devtools") # install devtools if missing
}::install_github("go-bayes/boilerplate") # install boilerplate
devtools
}
library(boilerplate) # manage boilerplate data
library(cli) # friendly messages
library(here) # project paths
library(fs) # file system utilities
# ensure correct boilerplate version --------------------------------------
<- "1.0.43"
min_version if (utils::packageVersion("boilerplate") < min_version) {
stop(
"please install boilerplate >= ", min_version, ":\n",
" devtools::install_github('go-bayes/boilerplate')"
)
}
::cli_h1("boilerplate loaded ✔")
cli
# create local data folders ------------------------------------------------
<- here::here("example_boilerplate_data")
path_data <- here::here("quarto")
path_quarto
::dir_create(path_data) # create data folder if needed
fs::cli_h2("data folder ready ✔")
cli
# import student boilerplate data ------------------------------------------
# close connections
closeAllConnections()
# function
<- function() {
load_student_boilerplate <- "https://raw.githubusercontent.com/go-bayes/templates/main/student_boilerplate_data/"
base_url <- c("measures", "methods", "results", "discussion", "appendix", "template")
categories
::cli_text("loading student boilerplate data from GitHub…")
cli
# initialise empty list and assign category names
<- vector("list", length(categories))
student_db names(student_db) <- categories
for (cat in categories) {
::cli_text(" – loading {.strong {cat}} database…")
cli<- paste0(base_url, cat, "_db.rds")
rds_url <- tempfile(fileext = ".rds")
tmp_file
<- tryCatch({
student_db[[cat]] # download to a temporary file, then read it
::download.file(rds_url, tmp_file,
utilsmode = "wb",
quiet = TRUE,
method = "libcurl")
readRDS(tmp_file)
error = function(e) {
}, ::cli_alert_warning("failed to load {.strong {cat}}: {e$message}")
clilist() # fallback empty list
finally = {
}, # always remove the temp file
unlink(tmp_file)
})
}
::cli_text("successfully loaded {length(student_db)} categories")
cli
student_db
}
# after clearing connections, run:
<- load_student_boilerplate()
student_unified_db
# save imported data -------------------------------------------------------
boilerplate_save(
student_unified_db,data_path = path_data,
create_backup = FALSE
)::cli_h1("data saved ✔")
cli
# set up bibliography and APA-7 template -----------------------------------
::dir_create("quarto") # for title.tex
fs
# download title.tex (probably safe to overwrite)
download.file(
url = "https://raw.githubusercontent.com/go-bayes/templates/refs/heads/main/quarto/title.tex",
destfile = "quarto/title.tex",
mode = "wb"
)
::dir_create("bibliography")
fs::dir_create("csl")
fs
# check if references.bib exists before downloading
<- "quarto/references.bib"
bib_path if (fs::file_exists(bib_path)) {
::cli_alert_info("references.bib already exists - skipping download")
cli::cli_text(" existing file: {.file {bib_path}}")
clielse {
} download.file(
url = "https://raw.githubusercontent.com/go-bayes/templates/refs/heads/main/bib/references.bib",
destfile = bib_path,
mode = "wb"
)::cli_alert_success("references.bib downloaded")
cli
}
# download apa7.csl (style file - probably safe to update)
download.file(
url = "https://raw.githubusercontent.com/go-bayes/templates/refs/heads/main/csl/apa-7.csl",
destfile = "quarto/apa7.csl",
mode = "wb"
)
::cli_h1("bibliography and CSL setup complete ✔")
cli::cli_h1("bibliography and CSL setup complete ✔")
cli
# end of script: do not rerun this file ------------------------------------
cat(student_unified_db$methods$analytic_approach$general_approach_cate_long)
Part 1: Quarto manuscripts
Required Download Quarto here: - Use the prelease
version: https://quarto.org/docs/download/ Optional - (Bulbulia 2024) link - (Hoffman et al. 2023) link
- Entering bibliographic details
- Writing up your manuscript
Code review: YOUR analysis
- Create a new Rstudio project
- Modify these scripts
Quarto with boilerplate
Script 0: Set up your library/
Note – rerun this script if you are missing updated templates, as they have been updated 23 May 2025
Type y
when R asks whether to overwrite the boilerplate files; this replaces them with the latest versions. Press n
(or just hit Enter) to keep what you already have.
Your own edits—such as custom rows in the measures table – are stored elsewhere, so they will not be overwritten. One tap, no tears.
Take-away: y
refreshes the templates; n
preserves the status quo.
This is how the the printout in your R console should look (depending on how recently you’ve run your script):
> # save imported data -------------------------------------------------------
> boilerplate_save(
+ student_unified_db,
+ data_path = path_data,
+ create_backup = FALSE
+ )
6 databases
ℹ preparing to save database (1/6)
ℹ processing measures in measures database
ℹ no changes detected /Users/your_machine_path-/example_boilerplate_data/measures_db.rds
ℹ saving measures database to
✔ saved measures databasedatabase (2/6)
ℹ processing methods 1 new entries will be added:
ℹ + student_target_population
ℹ ! 2 entries will be removed:
! - target_population
! - sample
/n]: y
Save methods database with these changes? [y/Users/joseph/GIT/psych-434-2025/example_boilerplate_data/methods_db.rds
ℹ saving methods database to
✔ saved methods databasedatabase (3/6)
ℹ processing results in results database
ℹ no changes detected /Users/joseph/GIT/psych-434-2025/example_boilerplate_data/results_db.rds
ℹ saving results database to
✔ saved results databasedatabase (4/6)
ℹ processing discussion 2 existing entries will be modified:
ℹ ~ student_authors_statement
ℹ ~ student_ethics
ℹ /n]: y
Save discussion database with these changes? [y/Users/joseph/GIT/psych-434-2025/example_boilerplate_data/discussion_db.rds
ℹ saving discussion database to
✔ saved discussion databasedatabase (5/6)
ℹ processing appendix in appendix database
ℹ no changes detected /Users/joseph/GIT/psych-434-2025/example_boilerplate_data/appendix_db.rds
ℹ saving appendix database to
✔ saved appendix databasedatabase (6/6)
ℹ processing template in template database
ℹ no changes detected /Users/joseph/GIT/psych-434-2025/example_boilerplate_data/template_db.rds
ℹ saving template database to
✔ saved template database6 databases ✔ successfully saved all
Script 1: Adding/Revising Measures
Note – rerun this script if you are missing updated templates, as they have been updated 23 May 2025
# script 01: add or revise measures (for students)
# load required packages --------------------------------------------------
if (!requireNamespace("boilerplate", quietly = TRUE)) {
if (!requireNamespace("devtools", quietly = TRUE)) {
install.packages("devtools") # install devtools if missing
}::install_github("go-bayes/boilerplate") # install boilerplate
devtools
}
library(boilerplate) # tools for measure management
library(cli) # user-friendly messages
library(here) # project-friendly file paths
# ensure correct boilerplate version --------------------------------------
if (utils::packageVersion("boilerplate") < "1.0.41") {
stop("please install boilerplate >= 1.0.41: \
devtools::install_github('go-bayes/boilerplate')")
}
::cli_h1("boilerplate loaded ✔")
cli
# define data paths --------------------------------------------------------
<- here::here("example_boilerplate_data")
path_src <- here::here("final_boilerplate_data")
path_final
# create final data directory if needed -----------------------------------
if (!dir.exists(path_final)) {
dir.create(path_final)
}::cli_h1("data folder ready ✔")
cli
# import unified database --------------------------------------------------
<- boilerplate_import(data_path = path_src)
unified_db
# save a copy to avoid overwriting original --------------------------------
boilerplate_save(
unified_db,data_path = path_final,
output_file = "unified_db"
)
::cli_h2("database imported and saved ✔")
cli
# inspect structure and existing measures ----------------------------------
str(unified_db, max.level = 2) # glance at top-level structure
print(names(unified_db$measures)) # list defined measures
# example: check for a specific measure ------------------------------------
<- "emp_job_satisfaction"
measure_name if (is.null(unified_db$measures[[measure_name]])) {
::cli_alert_info("{measure_name} not defined yet")
clielse {
} print(unified_db$measures[[measure_name]])
}
# add a new measure --------------------------------------------------------
$measures[[measure_name]] <- list(
unified_dbname = "Job Satisfaction",
description = "job satisfaction was measured with a single item.",
reference = "eisenbarth2022aspects",
waves = "1-present",
keywords = c("employment", "mental health"),
items = list("how satisfied are you with your current job?")
)
$measures[["cyberbulling"]] <- list( # use the measure name in `colnames(df_nz_long)`
unified_db# note that cyberbulling is not actually in our simulated dataset
# this is just an llustration
name = "Cyberbulling",
description = "Cyberbulling was measured with a single item.",
reference = "wang2019cyberbullying",
waves = "6-11",
keywords = c("mental health"),
items = list("Has someone ever used the internet, a mobile phone, or digital camera to hurt, intimidate or embarrass you?")
)
# saving a new measure
$measures[["pwi"]] <- list(
unified_dbname = "Personal Well-Being Index",
description = "The Personal Well-Being Index consists of three items, asking 'How satisfied are you with...' ",
reference = "cummins2003development",
waves = "1-present",
keywords = c("employment", "mental health"),
items = list("'Your standard of living.'", "'Your health.'", "'Your future security.'", "'Your personal relationships.'")
)
# save with backup ---------------------------------------------------------
boilerplate_save(
unified_db,data_path = path_final,
create_backup = TRUE
)::cli_h2("new measure added and saved ✔")
cli
# revise an existing measure ------------------------------------------------
<- "family_time_binary"
revise_name ::cli_h1("revising {revise_name}")
cli
# use modifyList to update only changed fields
$measures[[revise_name]] <- modifyList(
unified_db$measures[[revise_name]],
unified_dblist(
name = "Family Time (binary)",
description = "code string (binary): 0 = none, 1 = any time",
reference = "@sibley2020",
waves = "10-13",
keywords = c("cooperation")
)
)
# view revised measure ------------------------------------------------------
print(unified_db$measures[[revise_name]])
# save all changes -----------------------------------------------------------
boilerplate_save(
unified_db,data_path = path_final,
confirm = TRUE
)::cli_h2("measure revised and saved ✔")
cli
# to reload updated database, uncomment the following line --------------
# unified_db <- boilerplate_import(data_path = path_final)
- remember to type ‘y’ to save changes
When re-running your scripts, the printout should allow you to save to your working database folder. When saving tick ‘y’
> # save a copy to avoid overwriting original --------------------------------
> boilerplate_save(
+ unified_db,
+ data_path = path_final,
+ output_file = "unified_db"
+ )
6 databases
ℹ preparing to save database (1/6)
ℹ processing measures in measures database
ℹ no changes detected /Users/joseph/GIT/psych-434-2025/final_boilerplate_data/measures_db.rds
ℹ saving measures database to
✔ saved measures databasedatabase (2/6)
ℹ processing methods 1 new entries will be added:
ℹ + student_target_population
ℹ ! 2 entries will be removed:
! - target_population
! - sample
/n]: y
Save methods database with these changes? [y: /Users/your_path/final_boilerplate_data/methods_db.rds.20250523_131930.bak
ℹ created backup at/Users/your_path/final_boilerplate_data/methods_db.rds
ℹ saving methods database to
✔ saved methods databasedatabase (3/6)
ℹ processing results in results database
ℹ no changes detected /Users/your_path/final_boilerplate_data/results_db.rds
ℹ saving results database to
✔ saved results databasedatabase (4/6)
ℹ processing discussion 2 existing entries will be modified:
ℹ ~ student_authors_statement
ℹ ~ student_ethics
ℹ /n]: y
Save discussion database with these changes? [y: /Users/joseph/GIT/psych-434-2025/final_boilerplate_data/discussion_db.rds.20250523_131933.bak
ℹ created backup at/Users/joseph/GIT/psych-434-2025/final_boilerplate_data/discussion_db.rds
ℹ saving discussion database to
✔ saved discussion databasedatabase (5/6)
ℹ processing appendix in appendix database
ℹ no changes detected /Users/joseph/GIT/psych-434-2025/final_boilerplate_data/appendix_db.rds
ℹ saving appendix database to
✔ saved appendix databasedatabase (6/6)
ℹ processing template in template database
ℹ no changes detected /Users/joseph/GIT/psych-434-2025/final_boilerplate_data/template_db.rds
ℹ saving template database to
✔ saved template database6 databases ✔ successfully saved all
Script 3: Setting Up Your Manuscript Document
- save this document as a
.qmd
file in a directory called ‘quarto’ (in your rstudio project) - please run the code line-by-line (ctr/cmd + ENTER) before rendering
- to render a document click ‘Render’
Before you run the analysis, check whether you inverted any outcomes.
Set:
<- TRUE # ← make TRUE if you flipped outcomes; FALSE otherwise use_flipped
---
: "Your Title"
title: |
abstract**Background**: (Brief few sentences)
**Objectives**:
1. Estimate the causal effect of YOUR EXPOSURE on YOUR OUTCOMES measured one year later.
2. Evaluate whether these effects vary across the population.
3. Provide policy guidance on which individuals might benefit most.
**Method**: We conducted a three-wave retrospective cohort study (waves XX-XXX, October XXXX--October XXXX) using data *SIMULATED* from the New Zealand Attitudes and Values Study, a nationally representative panel. Participants were eligible if they participated in the NZAVS in the baseline wave (XXXX,...). We defined the exposure as (XXXX > NUMBER on a 1-7 Likert Scale (1 = yes, 0 = no)). To address attrition, we applied inverse probability of censoring weights; to improve external validity, we applied weights to the population distribution of Age, Ethnicity, and Gender. We computed expected mean outcomes for the population in each exposure condition (high XXXX/low XXXXX). Under standard causal assumptions of unconfoundedness, the contrast provides an unbiased average treatment effect. We then used causal forests to detect heterogeneity in these effects and employed policy tree algorithms to identify individuals ("strong responders") likely to experience the greatest benefits.
**Results**: Increasing XXXXX leads to XXXXX. Heterogeneous responses to (e.g. *Forgiveness*, *Personal Well-Being*, and *Life-Satisfaction*...) reveal structural variability in subpopulations...
**Implications**: (Brief few sentences)
**Keywords**: *Causal Inference*; *Cross-validation*; *Distress*; *Employment*; *Longitudinal*; *Machine Learning*; *Religion*; *Semi-parametric*; *Targeted Learning*.
:
author- name: YOUR NAME
: Victoria University of Wellington, New Zealand
affiliation: XXXXX
email: yes
corresponding: [Causal Inference, Cross-validation,...]
keywords:
editor_options: console
chunk_output_type: "last-modified"
date: libertinus
fontfamily: references.bib
bibliography: apa7.csl
csl:
format: # comment this out if you want pdf
docx: false # comment this out if you want pdf
default:
pdf-engine: lualatex
pdf: true
sanitise-tex: true
keep-citations: true
link: true
colorlinks: article
documentclass: ["single column"]
classoption: false
lof: false
lot:
geometry- top=30mm
- left=25mm
- heightrounded
- headsep=22pt
- headheight=11pt
- footskip=33pt
- ignorehead
- ignorefoot
-includes:
header- \let\oldtabular\tabular
- \renewcommand{\tabular}{\small\oldtabular}
- \setlength{\tabcolsep}{4pt} # adjust this value as needed
:
execute: false
echo: false
warning: true
include: true
eval---
```{r}
#| label: setup
#| echo: false
#| include: false
#| eval: true
# ── initialisation and setup ──────────────────────────────────────────────────
# save this file in your project root (e.g. 'quarto/1_setup.R')
# load here to manage paths
dep <- requireNamespace("here", quietly = TRUE)
if (!dep) install.packages("here")
library(here)
# create required folders (these will likely already exist)
dirs <- c(
here("quarto"),
here("bibliography"),
here("save_directory"),
here("csl")
)
for (d in dirs) {
if (!dir.exists(d)) dir.create(d, recursive = TRUE)
}
# ensure tinytex for PDF rendering
if (!requireNamespace("tinytex", quietly = TRUE)) {
install.packages("tinytex")
tinytex::install_tinytex()
}
# ensure pacman for package management
if (!requireNamespace("pacman", quietly = TRUE)) {
install.packages("pacman")
}
# min version of margot
if (packageVersion("margot") < "1.0.54") {
stop(
"please install margot >= 1.0.54 for this workflow\n
run:
devtools::install_github('go-bayes/margot')
"
)
}
# call library
library("margot")
# check package version
packageVersion(pkg = "margot")
# load (and install if needed) all required packages
pacman::p_load(
boilerplate, shiny, tidyverse,
kableExtra, glue, patchwork, stringr,
ggplot2, ggeffects, parameters,
table1, knitr, extrafont, here, cli
)
# load fonts (requires prior extrafont::font_import())
if (requireNamespace("extrafont", quietly = TRUE)) {
extrafont::loadfonts(device = "all")
} else {
message("'extrafont' not installed; skipping font loading")
}
# reproducibility
set.seed(123)
# copy CSL and BibTeX into quarto folder
src_files <- list(
c(here("csl", "apa7.csl"), here("quarto", "apa7.csl")),
c(here("bibliography", "references.bib"), here("quarto", "references.bib"))
)
for (f in src_files) {
if (!file.exists(f[2]) && file.exists(f[1])) {
file.copy(f[1], f[2])
}
}
# ── define paths and import data ──────────────────────────────────────────────
push_mods <- here::here("save_directory")
final_boilerplate_data <- here::here("final_boilerplate_data")
unified_db <- boilerplate_import(data_path = final_boilerplate_data)
# ── read study variables ──────────────────────────────────────────────────────
baseline_vars <- margot::here_read("baseline_vars")
exposure_var <- margot::here_read("exposure_var")
outcome_vars <- margot::here_read("outcome_vars")
# ── define study waves ────────────────────────────────────────────────────────
baseline_wave <- margot::here_read("baseline_wave")
exposure_waves <- margot::here_read("exposure_waves")
outcome_wave <- margot::here_read("outcome_wave")
baseline_wave_glued <- glue::glue(baseline_wave)
# ── define study parameters ───────────────────────────────────────────────────
study_years <- "2018-2021"
name_exposure <- here_read("name_exposure")
name_outcome_variables <- "MY OUTCOME VARIABLES IN THIS STUDY"
name_exposure_lower <- tolower(name_exposure)
# ── read tables for manuscript ────────────────────────────────────────────────
markdown_table_baseline <- margot::here_read("baseline_table")
markdown_table_exposures <- margot::here_read("exposure_table")
markdown_table_outcomes <- margot::here_read("outcomes_table")
# margot_bind_tables_markdown <- margot::here_read("margot_bind_tables_markdown")
# ── sample size information ───────────────────────────────────────────────────
n_total <- margot::here_read("n_total")
n_participants <- here_read("n_participants")
# ── variable labels and mappings ──────────────────────────────────────────────
var_labels_measures <- here_read("var_labels_measures")
label_mapping_all <- here_read("label_mapping_all")
# ── import data for visualisation ─────────────────────────────────────────────
original_df <- margot::here_read('df_wide', push_mods)
df_grf <- margot::here_read('df_grf', push_mods)
# co-variates
E <- margot::here_read('E', push_mods)
# select covariates and drop numeric attributes
X <- margot::remove_numeric_attributes(df_grf[E])
# ── define nice names and regimes ─────────────────────────────────────────────
nice_name_exposure <- stringr::str_to_sentence(name_exposure)
name_outcomes_lower <- "multi-dimensional wellbeing"
nice_name_outcome <- stringr::str_to_sentence(name_outcomes_lower)
# title
ate_title <- glue("ATE Effects of {nice_name_exposure} on {nice_name_outcome}")
# ── define exposure thresholds and regimes ────────────────────────────────────
lower_cut <- here_read("lower_cut")
upper_cut <- here_read("upper_cut")
threshold <- here_read("threshold")
inverse_threshold <- here_read("inverse_threshold")
scale_range <- margot::here_read("scale_range")
# create and check variables
value_exposure <- glue::glue(threshold,"", upper_cut,", ", scale_range)
value_control <- glue::glue(inverse_threshold, upper_cut,", ", scale_range)
# regimes
name_control_regime_lower <- glue::glue("low {name_exposure_lower}")
value_exposure_regime <- glue::glue("Set {name_exposure} {threshold} {upper_cut} {scale_range}")
value_control_regime <- glue::glue("Set {name_exposure} {inverse_threshold} {upper_cut} {scale_range}")
contrast_template <- "We used causal forests to estimate an average treatment effect as a contrast between *{name_control_regime_lower}* and *{name_exposure_lower}* on {name_outcomes_lower}."
contrast_text <- glue(contrast_template)
# ── import histogram of binary exposure ───────────────────────────────────────
graph_cut <- margot::here_read("graph_cut")
# ── verify assumptions (positivity) ───────────────────────────────────────────
transition_tables <- margot::here_read("transition_tables")
transition_tables_binary <- here_read("transition_tables_binary")
# ── generate measures text for methods section ────────────────────────────────
baseline_measures_text <- boilerplate_generate_measures(
variable_heading = "Baseline Covariates",
variables = baseline_vars,
db = unified_db,
heading_level = 3,
subheading_level = 4,
print_waves = FALSE,
label_mappings = var_labels_measures
)
exposure_measures_text <- boilerplate_generate_measures(
variable_heading = "Exposure Variable",
variables = name_exposure,
db = unified_db,
heading_level = 3,
subheading_level = 4,
print_waves = FALSE,
label_mappings = var_labels_measures
)
outcome_measures_text <- boilerplate_generate_measures(
variable_heading = "Outcome Variables",
variables = outcome_vars,
db = unified_db,
heading_level = 3,
subheading_level = 4,
print_waves = FALSE,
label_mappings = var_labels_measures
)
# ── exposure description from database ────────────────────────────────────────
measures_exposure <- glue::glue(unified_db$measures[[name_exposure]]$description)
# ── set plot defaults for ate plots ───────────────────────────────────────────
base_defaults_binary <- list(
type = "RD",
title = ate_title,
e_val_bound_threshold = 1.2,
colors = c(
"positive" = "#E69F00",
"not reliable" = "grey50",
"negative" = "#56B4E9"
),
x_offset = -0.25,
x_lim_lo = -0.25,
x_lim_hi = 0.25,
text_size = 5,
linewidth = 0.75,
estimate_scale = 1,
base_size = 20,
point_size = 4,
title_size = 20,
subtitle_size = 16,
legend_text_size = 10,
legend_title_size = 10,
include_coefficients = FALSE
)
# ── create plot options for outcomes ──────────────────────────────────────────
outcomes_options_all <- margot_plot_create_options(
title = ate_title,
base_defaults = base_defaults_binary,
subtitle = "",
filename_prefix = "grf"
)
# ── load model results ────────────────────────────────────────────────────────
models_binary <- margot::here_read_qs("models_binary", push_mods)
# ──────────────────────────────────────────────────────────────────────────────
# CRITICAL DECISION POINT: FLIPPED OUTCOMES OR NOT
# ──────────────────────────────────────────────────────────────────────────────
# ********** READ THIS CAREFULLY **********
# if you DID NOT flip any outcomes:
# - set: use_flipped <- FALSE
# - this will use models_binary throughout
# - this will use label_mapping_all throughout
#
# if you DID flip outcomes:
# - set: use_flipped <- TRUE
# - ensure models_binary_flipped_all exists
# - ensure flip_outcomes vector exists
# - this will create label_mapping_all_flipped
use_flipped <- TRUE # <- MAKE TRUE IF YOU FLIPPED OUTCOMES, `FALSE` otherwise
# set up variables based on whether outcomes were flipped
if (use_flipped) {
# check that required objects exist
if (!exists("models_binary_flipped_all")) {
models_binary_flipped_all <- here_read_qs("models_binary_flipped_all", push_mods)
}
# try to read flip_outcomes and flipped_names
# use tryCatch to handle missing files gracefully
flip_outcomes <- tryCatch(
here_read("flip_outcomes"),
error = function(e) {
stop("flip_outcomes file not found. Please ensure it exists if use_flipped = TRUE")
}
)
flipped_names <- tryCatch(
here_read("flipped_names"),
error = function(e) {
stop("flipped_names file not found. Please ensure it exists if use_flipped = TRUE")
}
)
# create flipped label mapping
label_mapping_all_flipped <- margot_reversed_labels(label_mapping_all, flip_outcomes)
# use flipped models and labels
models_for_analysis <- models_binary_flipped_all
labels_for_analysis <- label_mapping_all_flipped
flipped_list <- paste(flipped_names, collapse = ", ")
} else {
# use standard models and labels (no flipping)
models_for_analysis <- models_binary
labels_for_analysis <- label_mapping_all
# ensure flipped_names is a character vector, not a function
# remove any existing flipped_names object that might be a function
if (exists("flipped_names") && is.function(flipped_names)) {
rm(flipped_names)
}
flipped_names <- character(0) # empty character vector
flipped_list <- ""
}
devtools::load_all("/Users/joseph/GIT/margot/")
# ── average treatment effects (ate) analysis ──────────────────────────────────
ate_results <- margot_plot(
models_binary$combined_table,
options = outcomes_options_all,
label_mapping = label_mapping_all, # always use standard labels for ate
include_coefficients = FALSE,
save_output = FALSE,
order = "evaluebound_asc",
original_df = original_df,
e_val_bound_threshold = 1.2,
rename_ate = TRUE,
adjust = "bonferroni",
alpha = 0.05
)
# check original table
models_binary$combined_table
# check results table:
ate_results$transformed_table
# check interpretation:
cat(ate_results$interpretation)
# check plot data is correct
# plot_data <- ate_results$plot$data
# print(plot_data[plot_data$outcome == "Social Belonging", c("2.5 %", "97.5 %")])
# ── make nice markdown table -----------------──────────────────────────────────
margot_bind_tables_markdown <- margot_bind_tables(
ate_results$transformed_table,
sort_E_val_bound = "desc",
e_val_bound_threshold = 1.2,
# ← choose threshold
highlight_color = NULL,
bold = TRUE,
rename_cols = TRUE,
col_renames = list("E-Value" = "E_Value", "E-Value bound" = "E_Val_bound"),
rename_ate = TRUE,
threshold_col = "E_Val_bound",
output_format = "markdown",
kbl_args = list(
booktabs = TRUE,
caption = NULL,
align = NULL
)
)
# ──────────────────────────────────────────────────────────────────────────────
# HETEROGENEITY ANALYSIS
# this section adapts based on whether outcomes were flipped
# ──────────────────────────────────────────────────────────────────────────────
# check package version for heterogeneity analysis
stopifnot(utils::packageVersion("margot") >= "1.0.54")
# helper function for printing tables
print_rate <- function(tbl) {
tbl |>
mutate(across(where(is.numeric), \(x) round(x, 2))) |>
kbl(format = "markdown")
}
# ── 1. screen for heterogeneity (rate autoc + rate qini) ─────────────────────
rate_results <- margot_rate(
models = models_for_analysis,
policy = "treat_best",
alpha = 0.20,
adjust = "fdr",
label_mapping = labels_for_analysis
)
# interpret rate results
if (use_flipped) {
rate_interp <- margot_interpret_rate(
rate_results,
flipped_outcomes = flipped_names,
adjust_positives_only = TRUE
)
} else {
rate_interp <- margot_interpret_rate(
rate_results,
flipped_outcomes = NULL, # no flipped outcomes
adjust_positives_only = TRUE
)
}
cat(rate_interp$comparison, "\n")
cli_h2("Analysis ready for Appendix ✔")
# organise model names by evidence strength
model_groups <- list(
autoc = rate_interp$autoc_model_names,
qini = rate_interp$qini_model_names,
either = rate_interp$either_model_names,
exploratory = rate_interp$not_excluded_either
)
# ── 2. plot rate autoc curves (if any exist) ─────────────────────────────────
if (length(model_groups$autoc) > 0) {
autoc_plots <- margot_plot_rate_batch(
models = models_for_analysis,
save_plots = FALSE,
label_mapping = labels_for_analysis,
model_names = model_groups$autoc
)
# store first autoc name if it exists
if (nrow(rate_results$rate_autoc) > 0) {
autoc_name_1 <- rate_results$rate_autoc$outcome[[1]]
}
} else {
autoc_plots <- list()
message("no significant rate autoc results found")
}
# ── 3. qini curves + gain interpretation ──────────────────────────────────────
# define plot settings (move here so they exist even if not used)
policy_tree_defaults <- list(
point_alpha = 0.5,
title_size = 12,
subtitle_size = 12,
axis_title_size = 12,
legend_title_size = 12,
split_line_color = "red",
split_line_alpha = 0.8,
split_label_color = "red",
split_label_nudge_factor = 0.007
)
decision_tree_defaults <- list(
span_ratio = 0.1,
text_size = 4,
y_padding = 0.5,
edge_label_offset = 0.02,
border_size = 0.01
)
# run initial qini analysis
qini_results <- margot_policy(
models_for_analysis,
save_plots = FALSE,
output_dir = here::here(push_mods),
decision_tree_args = decision_tree_defaults,
policy_tree_args = policy_tree_defaults,
model_names = names(models_for_analysis$results),
original_df = original_df,
label_mapping = labels_for_analysis,
max_depth = 2L,
output_objects = c("qini_plot", "diff_gain_summaries")
)
# interpret qini results
qini_gain <- margot_interpret_qini(
qini_results,
label_mapping = labels_for_analysis
)
print_rate(qini_gain$summary_table)
cat(qini_gain$qini_explanation, "\n")
reliable_ids <- qini_gain$reliable_model_ids
# ── 4. policy trees (only if reliable models exist) ──────────────────────────
if (length(reliable_ids) > 0) {
# recompute for reliable models only
qini_results_valid <- margot_policy(
models_for_analysis,
save_plots = FALSE,
output_dir = here::here(push_mods),
decision_tree_args = decision_tree_defaults,
policy_tree_args = policy_tree_defaults,
model_names = reliable_ids,
original_df = original_df,
label_mapping = labels_for_analysis,
max_depth = 2L,
output_objects = c("qini_plot", "diff_gain_summaries")
)
qini_plots <- map(qini_results_valid, ~ .x$qini_plot)
qini_names <- margot_get_labels(reliable_ids, labels_for_analysis)
# compute policy trees
policy_results_2L <- margot_policy(
models_for_analysis,
save_plots = FALSE,
output_dir = here::here(push_mods),
decision_tree_args = decision_tree_defaults,
policy_tree_args = policy_tree_defaults,
model_names = reliable_ids,
max_depth = 2L,
original_df = original_df,
label_mapping = labels_for_analysis,
output_objects = c("combined_plot")
)
policy_plots <- map(policy_results_2L, ~ .x$combined_plot)
# generate plain language interpretation
policy_text <- margot_interpret_policy_batch(
models = models_for_analysis,
original_df = original_df,
model_names = reliable_ids,
label_mapping = labels_for_analysis,
max_depth = 2L
)
cat(policy_text, "\n")
} else {
qini_plots <- list()
policy_plots <- list()
qini_names <- character(0)
policy_text <- "No reliable heterogeneous treatment effects found."
message("no reliable qini models found - skipping policy tree analysis")
}
cli::cli_h1("Finished: heterogeneity analysis complete ✔")
# ──────────────────────────────────────────────────────────────────────────────
# OPTIONAL: PLANNED SUBGROUP COMPARISON
# uncomment this section if you want to do subgroup analysis
# ──────────────────────────────────────────────────────────────────────────────
# # ── subgroup comparison settings ──────────────────────────────────────────────
# x_offset_comp <- 1.0
# x_lim_lo_comp <- -1.0
# x_lim_hi_comp <- 1.0
#
# base_defaults_comparisons <- list(
# type = "RD",
# title = ate_title,
# e_val_bound_threshold = 1.2,
# label_mapping = "label_mapping_all",
# adjust = "bonferroni",
# alpha = 0.05,
# colors = c(
# "positive" = "#E69F00",
# "not reliable" = "grey50",
# "negative" = "#56B4E9"
# ),
# x_offset = x_offset_comp,
# x_lim_lo = x_lim_lo_comp,
# x_lim_hi = x_lim_hi_comp,
# text_size = 8,
# linewidth = 0.75,
# estimate_scale = 1,
# base_size = 18,
# point_size = 4,
# title_size = 19,
# subtitle_size = 16,
# legend_text_size = 10,
# legend_title_size = 10,
# include_coefficients = FALSE
# )
#
# # define age-based subgroups
# complex_condition_age <- between(X[,"t0_age_z"], -1, 1)
#
# # check age bounds on the raw scale
# mean(original_df$t0_age) + c(-1, 1) * sd(original_df$t0_age)
#
# # age subsets
# subsets_standard_age <- list(
# Younger = list(
# var = "t0_age_z",
# value = -1,
# operator = "<",
# label = "Age < 35"
# ),
# Middle = list(
# var = "t0_age_z",
# subset_condition = complex_condition_age,
# label = "Age 35-62"
# ),
# Older = list(
# var = "t0_age_z",
# value = 1,
# operator = ">",
# label = "Age > 62"
# )
# )
#
# # run batch subgroup analysis
# planned_subset_results <- margot_planned_subgroups_batch(
# domain_models = list(models_binary), # always use models_binary for subgroups
# X = X,
# base_defaults = base_defaults_comparisons,
# subset_types = list(cohort = subsets_standard_age),
# original_df = original_df,
# label_mapping = label_mapping_all, # always use standard labels
# domain_names = "wellbeing",
# subtitles = "",
# adjust = "bonferroni",
# alpha = 0.05
# )
#
# # create comparison plot
# plots_subgroup_age_young_old <- wrap_plots(
# list(
# planned_subset_results$wellbeing$cohort$results$`Age < 35`$plot,
# planned_subset_results$wellbeing$cohort$results$`Age > 62`$plot
# ), ncol = 1) +
# plot_annotation(
# title = "Younger vs Older",
# theme = theme(plot.title = element_text(size = 18, face = "bold"))
# )
#
# # compare young vs old groups
# group_comparison_age_young_old <- margot_compare_groups(
# group1_name = "People Under 35 Years Old",
# group2_name = "People Over 62 Years Old",
# planned_subset_results$wellbeing$cohort$results$`Age < 35`$transformed_table,
# planned_subset_results$wellbeing$cohort$results$`Age > 62`$transformed_table,
# type = "RD",
# decimal_places = 3
# )
# ──────────────────────────────────────────────────────────────────────────────
# DEFINE GLOBAL VARIABLES FOR TEXT GENERATION
# ──────────────────────────────────────────────────────────────────────────────
global_vars <- list(
name_exposure_variable = nice_name_exposure,
n_total = n_total,
ate_adjustment = "bonferroni",
ate_alpha = "0.05",
cate_adjustment = "Benjamini–Hochberg false-discovery-rate adjustment",
cate_alpha = "0.1",
sample_ratio_policy = "70/30",
n_participants = n_participants,
exposure_variable = name_exposure,
name_exposure_lower = name_exposure_lower,
name_control_regime_lower = name_control_regime_lower,
name_outcome_variables = "Self Esteem", # <- adjust to your study
name_outcomes_lower = name_outcomes_lower,
name_exposure_capfirst = nice_name_exposure,
measures_exposure = measures_exposure,
value_exposure_regime = value_exposure_regime,
value_control_regime = value_control_regime,
flipped_list = flipped_list,
appendix_explain_grf = "E",
appendix_assumptions_grf = "F",
name_exposure_threshold = "1",
name_control_threshold = "0",
appendix_measures = "A",
value_control = value_control,
value_exposure = value_exposure,
appendix_positivity = "C",
appendix_rate = "D",
appendix_qini_curve = "D",
train_proportion_decision_tree = ".7",
training_proportion = ".7",
sample_split = "70/30",
sample_ratio_policy = "70/30",
baseline_wave = baseline_wave,
exposure_waves = exposure_waves,
outcome_wave = outcome_wave,
protocol_url = "https://osf.io/ce4t9/", # if used
appendix_timeline = "A" # if used
)
```
< pagebreak >}}
{{
## Introduction
**Your place to shine here**
## Method
```{r, results='asis'}
#| eval: false # <- set to false, copy, delete, modify, and extend text as needed
# run this code, copy and paste contents into your text
cat(
boilerplate::boilerplate_generate_text(
category = "methods",
sections = c(
"student_sample.nzavs",
"student_target_population",
"eligibility.standard",
"causal_intervention.grf_simple_text",
"analytic_approach.general_approach_cate_long",
"exposure_indicator",
"causal_identification_criteria",
"confounding_control.vanderweele",
"statistical_models.grf_short_explanation",
"missing_data.missing_grf_simple",
"sensitivity_analysis.short_evalue"
),
global_vars = global_vars,
db = unified_db
)
)
```
< pagebreak >}}
{{
## Results
### Average Treatment Effects
```{r}
#| label: fig-ate
#| fig-cap: "Average Treatment Effects on Multi-dimensional Wellbeing"
#| eval: true
#| fig-height: 12
#| fig-width: 8
ate_results$plot
```
< pagebreak >}}
{{
```{r}
#| label: tbl-outcomes
#| tbl-cap: "Average Treatment Effects on Multi-dimensional Wellbeing"
#| eval: true
margot_bind_tables_markdown
```
```{r, results = 'asis'}
#| eval: true # - set to false/ copy and change
# run this line, copy and paste text into your document
cat(ate_results$interpretation)
```
< pagebreak >}}
{{
<!-- OPTIONAL SECTION: PLANNED SUBGROUP COMPARISONS -->
<!-- uncomment the section below if you performed subgroup analysis -->
<!-- ### Planned Subgroup Comparisons -->
<!-- Based on theoretical findings we expected that the effects of {name_exposure} would vary by age...\@fig-planned-comparison and @tbl-planned-comparison -->
<!-- ```{r} -->
<!-- #| label: fig-planned-comparison -->
<!-- #| fig-cap: "Planned Comparison Plot" -->
<!-- #| eval: true -->
<!-- #| echo: false -->
<!-- #| fig-height: 10 -->
<!-- #| fig-width: 12 -->
<!-- plots_subgroup_age_young_old -->
<!-- ``` -->
<!-- ```{r} -->
<!-- #| label: tbl-planned-comparison -->
<!-- #| tbl-cap: "Planned Comparison Table" -->
<!-- #| eval: true -->
<!-- #| echo: false -->
<!-- group_comparison_age_young_old$results |> -->
<!-- mutate(across(where(is.numeric), ~ round(., 2))) %>% -->
<!-- kbl(format = "markdown") -->
<!-- ``` -->
<!-- ```{r, results = 'asis'} -->
<!-- #| eval: false # copy and paste your own text -->
<!-- cat(group_comparison_age_young_old$interpretation) -->
<!-- ``` -->
< pagebreak >}}
{{
### Heterogeneous Treatment Effects {#results-qini-curve}
effects (τᵢ) across our sample. @fig-tau-distribution presents the estimated treatment effects for each individual, revealing substantial variability in how people respond to {name_exposure_lower}.
We begin by examining the distribution of individual treatment
```{r}
#| label: fig-tau-distribution
#| fig-cap: "Distribution of Individual Treatment Effects (τᵢ) Across Outcomes"
#| eval: true
#| echo: false
#| fig-height: 12
#| fig-width: 10
#|
# create tau plots showing individual treatment effect distributions
tau_plots <- margot_plot_tau(
models_for_analysis,
label_mapping = labels_for_analysis
)
# display the plot
tau_plots
```
in treatment effects across individuals. To determine whether this variability is systematic (i.e., predictable based on individual characteristics) rather than random noise, we employ two complementary approaches: Qini curves to assess the reliability of heterogeneous effects, and policy trees to identify subgroups with differential treatment responses.
The histograms above show considerable heterogeneity
```{r, results='asis'}
#| eval: false # <- copy and modify text as needed
# copy and paste into your text
cat(
boilerplate::boilerplate_generate_text(
category = "results",
sections = c("grf.interpretation_qini"),
global_vars = global_vars,
db = unified_db
)
)
```
```{r, results = 'asis'}
#| eval: false # <- set to true and use if you have reliable results
# only use if you have reliable qini results
if (length(reliable_ids) > 0) {
cat(qini_gain$qini_explanation)
} else {
cat("No significant heterogeneous treatment effects were detected using Qini curve analysis.")
}
```
<!-- only include this table if you have multiple qini results -->
```{r}
#| tbl-cap: "Qini Curve Results"
#| eval: true # <- set to true if you have qini results
# only use if you have multiple qini results
if (length(reliable_ids) > 1) {
knitr::kable(
qini_gain$summary_table |>
mutate(across(where(is.numeric), ~ round(., 2))),
format = "markdown",
caption = "Qini Curve Results"
)
} else {
cat("*Note: Qini curve table only displayed when multiple significant results are found.*")
}
```
<!-- only include figures if you have qini results -->
```{r}
#| label: fig-qini-combined
#| fig-cap: "Qini Curves for Heterogeneous Treatment Effects"
#| eval: true # <- set to true if you have qini plots
#| echo: false
#| fig-height: 18
#| fig-width: 12
# only run if you have qini plots
if (length(qini_plots) > 0) {
# create blank plot for spacing
blank_plot <- plot_spacer()
# determine grid layout (2 columns preferred)
n_plots <- length(qini_plots)
n_cols <- 2
n_rows <- ceiling(n_plots / n_cols)
# create list of plots including blank spacers for even grid
plot_list <- qini_plots
n_blanks_needed <- (n_rows * n_cols) - n_plots
# add blank plots to fill the grid
if (n_blanks_needed > 0) {
for (i in 1:n_blanks_needed) {
plot_list <- append(plot_list, list(blank_plot))
}
}
# combine plots in a grid
combined_qini <- wrap_plots(
plot_list,
ncol = n_cols,
nrow = n_rows
) +
plot_layout(guides = "collect") +
plot_annotation(
title = "Qini Curves for Reliable Heterogeneous Effects",
subtitle = paste("Models with significant treatment effect heterogeneity (n =",
n_plots, ")")
) &
theme(
legend.position = "bottom",
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)
print(combined_qini)
} else {
message("no qini plots to display")
}
```
< pagebreak >}}
{{
### Decision Rules (Who is Most Sensitive to Treatment?)
```{r, results='asis'}
#| eval: false # <- set to true and modify text as needed
cat(
boilerplate::boilerplate_generate_text(
category = "results",
sections = c("grf.interpretation_policy_tree"),
global_vars = global_vars,
db = unified_db
)
)
```
for each outcome with reliable heterogeneous effects. Each tree shows: (1) the decision rules for treatment assignment, (2) the distribution of treatment effects across subgroups, and (3) visual representation of how covariates split the population into groups with differential treatment responses.
The following pages present policy trees
<!-- the following policy tree figures are optional -->
<!-- only include as many as you have valid policy trees -->
```{r}
#| label: fig-policy-trees
#| fig-cap: "Policy Trees for Treatment Assignment"
#| eval: true # <- set to true if you have policy trees
#| echo: false
#| fig-height: 10
#| fig-width: 12
# display policy trees if they exist - one per page
if (length(policy_plots) > 0) {
# iterate through each policy tree
for (i in seq_along(policy_plots)) {
# add page break before each plot except the first
if (i > 1) {
cat("\n\n{{< pagebreak >}}\n\n")
}
# create individual caption for each tree
cat(paste0("\n\n#### Policy Tree ", i, ": ", qini_names[[i]], "\n\n"))
# print the policy tree
print(policy_plots[[i]])
# add some space after
cat("\n\n")
}
} else {
message("no policy trees to display")
}
```
```{r, results = 'asis'}
#| eval: false # <- copy and paste text a insert jsust below graph
# use this text below your decision tree graphs
if (length(reliable_ids) > 0) {
cat(policy_text, "\n")
}
```
< pagebreak >}}
{{
## Discussion
```{r, results='asis'}
#| eval: false # <- set to true and modify text as needed
#| echo: false
cat(boilerplate_generate_text(
category = "discussion",
sections = c(
"student_ethics",
"student_data",
"student_authors_statement"
),
global_vars = list(
exposure_variable = name_exposure
),
db = unified_db
))
```
< pagebreak >}}
{{
## Appendix A: Measures {#appendix-measures}
### Measures
#### Baseline Covariate Measures
```{r, results='asis'}
cat(baseline_measures_text)
```
#### Exposure Measures
```{r, results='asis'}
cat(exposure_measures_text)
```
#### Outcome Measures
```{r, results='asis'}
cat(outcome_measures_text)
```
< pagebreak >}}
{{
## Appendix B: Sample Characteristics {#appendix-sample}
#### Sample Statistics: Baseline Covariates
@tbl-appendix-baseline presents sample demographic statistics.
::: {#tbl-appendix-baseline}
```{r, results = 'asis'}
#| eval: true
#| include: true
#| echo: false
markdown_table_baseline
```
for New Zealand Attitudes and Values Cohort: {baseline_wave_glued}.
Demographic statistics :::
### Sample Statistics: Exposure Variable {#appendix-exposure}
::: {#tbl-appendix-exposures}
```{r, results = 'asis'}
#| eval: true
#| include: true
#| echo: false
markdown_table_exposures
```
for New Zealand Attitudes and Values Cohort waves 2018.
Demographic statistics :::
< pagebreak >}}
{{
### Sample Statistics: Outcome Variables {#appendix-outcomes}
::: {#tbl-appendix-outcomes}
```{r, results = 'asis'}
#| eval: true
#| include: true
#| echo: false
markdown_table_outcomes
```
Outcome variables measured at {baseline_wave_glued} and {outcome_wave}:::
< pagebreak >}}
{{
## Appendix C: Transition Matrix to Check The Positivity Assumption {#appendix-transition}
```{r, results = 'asis'}
#| label: tbl-transition
#| tbl-cap: "Transition Matrix Showing Change"
#| eval: true
#| include: true
#| echo: false
transition_tables_binary$tables[[1]]
```
```{r, results = 'asis'}
cat(transition_tables_binary$explanation)
```
< pagebreak >}}
{{
## Appendix D: RATE AUTOC and RATE Qini {#appendix-rate}
```{r, results='asis'}
#| eval: false # <- set to true and modify text as needed
#| echo: false
# select appropriate text based on whether outcomes were flipped
if (use_flipped) {
cat(
boilerplate::boilerplate_generate_text(
category = "results",
sections = c("grf.interpretation_rate"),
global_vars = global_vars,
db = unified_db
)
)
} else {
# use no-flip version if available
if (!is.null(unified_db$results$grf$interpretation_rate_no_flip)) {
cat(unified_db$results$grf$interpretation_rate_no_flip)
} else {
cat(
boilerplate::boilerplate_generate_text(
category = "results",
sections = c("grf.interpretation_rate"),
global_vars = global_vars,
db = unified_db
)
)
}
}
```
```{r, results='asis'}
#| eval: false # <- set to true as needed
#| echo: false
cat(rate_interp$comparison)
```
#appendix-cate-validation) for details.
Refer to [Appendix D](
##### RATE AUTOC RESULTS
```{r, results = 'asis'}
# only show if there are autoc results
if (length(model_groups$autoc) > 0) {
cat(rate_interp$autoc_results)
} else {
cat("No significant RATE AUTOC results were found.")
}
```
<!-- only include autoc plots if they exist -->
```{r}
#| label: fig-rate-autoc
#| fig-cap: "RATE AUTOC Curves"
#| eval: false # <- set to true if you have autoc plots
#| echo: false
#| fig-height: 16
#| fig-width: 12
# display autoc plots if they exist
if (length(autoc_plots) > 0) {
# create blank plot for spacing
blank_plot <- plot_spacer()
# determine grid layout (2 columns preferred)
n_plots <- length(autoc_plots)
n_cols <- 2
n_rows <- ceiling(n_plots / n_cols)
# create list of plots including blank spacers for even grid
plot_list <- autoc_plots
n_blanks_needed <- (n_rows * n_cols) - n_plots
# add blank plots to fill the grid
if (n_blanks_needed > 0) {
for (i in 1:n_blanks_needed) {
plot_list <- append(plot_list, list(blank_plot))
}
}
# combine plots in a grid
combined_autoc <- wrap_plots(
plot_list,
ncol = n_cols,
nrow = n_rows
) +
plot_layout(guides = "collect") +
plot_annotation(
title = "RATE AUTOC Curves for Heterogeneous Effects",
subtitle = paste("Outcomes with significant autocorrelation (n =",
n_plots, ")")
) &
theme(
legend.position = "bottom",
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)
print(combined_autoc)
} else {
message("no autoc plots to display")
}
```
< pagebreak >}}
{{
## Appendix E: Estimating and Interpreting Heterogeneous Treatment Effects with GRF {#appendix-explain-grf}
```{r, results='asis'}
#| eval: false # <- set to true as needed
#| echo: false
cat(
boilerplate::boilerplate_generate_text(
category = "appendix",
sections = c("explain.grf_short"),
global_vars = global_vars,
db = unified_db
)
)
```
< pagebreak >}}
{{
## Appendix F: Strengths and Limitations of Causal Forests {#appendix-strengths}
```{r, results='asis'}
#| eval: false # <- set to true and modify text as needed
cat(
boilerplate::boilerplate_generate_text(
category = "discussion",
sections = c("strengths.strengths_grf_short"),
global_vars = global_vars,
db = unified_db
)
)
```
< pagebreak >}}
{{
## References {.appendix-refs}
This code chunk is a template. Run it once to generate boilerplate text, then:
- Copy the printed text into your manuscript.
- Delete any sections you don’t need.
- Modify the remaining text so it fits your variables, sample, and research question.
cat(
::boilerplate_generate_text(
boilerplatecategory = "methods",
sections = c(
"student_sample.nzavs",
"student_target_population",
"eligibility.standard",
"causal_intervention.grf_simple_text",
"analytic_approach.general_approach_cate_long",
"exposure_indicator",
"causal_identification_criteria",
"confounding_control.vanderweele",
"statistical_models.grf_short_explanation",
"missing_data.missing_grf_simple",
"sensitivity_analysis.short_evalue"
),global_vars = global_vars,
db = unified_db
) )
Pro Tip
You can adjust figure by changing:
#| fig-height: 16
and #| fig-width: 9
#| label: fig-policy-6
#| fig-cap: "Decision Tree: {glued_policy_names_6}"
#| eval: true
#| fig-height: 16 # <- change here
#| fig-width: 9 # <- change here
6]] policy_plots[[
What You Have Learned
How to create a publication quality manuscript
How to create a workflow for references
How to import results into your manuscript
How to make graphs of your results (using)
margot
How to report your results
How to interpret your results
Frequently Asked Questions
Do I need to use ‘quarto’ for making my report?
No. The suggested workflow should make your life easier. If, after a fair trial – and after asking us for help – quarto
still feels like extra friction, switch to whatever you like (rmarkdown
, plain R
scripts, or even Word). The science matters more than the wrapper.
👉 Quick-start: https://quarto.org/docs/get-started/
I cannot save or retrieve files from folders
When hunting path errors, ask yourself:
- Packages – have I updated everything, especially
margot
andhere
?
- Clean slate – did I restart R (⌘⇧F10) and rerun the data-prep scripts from the top?
- Project root – am I inside the correct RStudio Project? Check with
here::here()
.
- Search – have I used Edit → Find in Files (⌘⇧F / Ctrl⇧F) to locate the object or path?
- Data really there? – do the objects exist in my data frame?
# waves where `variable_name` was measured
with(df_nz_long, table(variable_name, wave))
- Permissions / cloud sync – is the save folder writable and local? Dropbox/OneDrive may off-load files; right-click and choose Make available offline.
I cannot run ‘margot_causal_forest_parallel()’
- Fallback – does
margot_causal_forest()
work? It is slower but prints progress.
- Formula richness – causal forests need many covariates (Tibshirani et al. 2024). Include the full demographic block.
- Resources – parallel forests spawn one worker per core. Keep some RAM free.
- Reduce load – try fewer trees or a smaller variable pool:
<- list(
grf_defaults seed = 123,
stabilize.splits = TRUE,
num.trees = 1000, # fewer trees
top_n_vars = 10 # smaller policy-tree search space
)
I cannot run ‘margot_flip_forests_parallel()’
First test the serial version margot_flip_forests()
. All parallel caveats above apply.
My R code is not working
- Update packages (
pak::pak()
is handy).
- Re-download the example code and run from the top.
- Read the full error and traceback (
rlang::last_trace()
).
- Compare with the example: what is different in your data or model?
I do not know how ‘margot’ package functions work
See the indexed reference: https://go-bayes.github.io/margot/reference/index.html
What is the recommended workflow for investigating heterogeneity?
- State your causal question. Statistics cannot rescue an ill-posed question.
- Estimate the ATE for the target population.
- Compute CATEs and evaluate gain at 20 % and 50 % budget constraints with the Qini metric.
- For outcomes with convincing heterogeneity, fit shallow policy trees (depth = 2) to obtain transparent rules.
- Optionally, compare pre-specified subgroups directly.
- Report RATE–AUTOC and RATE–Qini tables in an appendix.
Take-away: - start from a clean, updated project, - use recent scripts - read errors carefully, and - ensure each step answers your research question.
I cannot create a ‘quarto’ document
- save the file with a
.qmd
suffix inside thequarto
folder.
- Ensure the boilerplate directory structure exists (run Script 0 and Script 1).
- Run the initial code block line-by-line before hitting Render.
- Define all
global_vars
required by the boiler-plate.
- The
new_initial_quarto_document.qmd
template should work if you remember to set this value correctly:use_flipped <- TRUE
Again, before you run the analysis, check whether you inverted any outcomes.
Set:
<- TRUE # ← make TRUE if you flipped outcomes; FALSE otherwise use_flipped
- If you are sticking with the old template and you are are not flipping outcomes, stub the flip variables:
<- ""
flipped_names <- ""
flip_outcomes <- "" flipped_list
I cannot render the ‘quarto’ document as PDF
Install TinyTeX once:
install.packages("tinytex")
::install_tinytex() tinytex
Then, in your YAML front-matter, comment out unwanted formats. For PDF-only output:
format:
# docx:
# default: false
pdf:
pdf-engine: lualatex
Is it plagiarism to use the boiler-plate outputs verbatim?
No—provided you clearly cite the source. The templates come from the EPIC (Bulbulia) Lab and the data are simulated from New Zealand Attitudes and Values Study. Tailor your text to match your study and give credit. To edit, set eval: false
, render, and copy-paste the plain text blocks.
I do not have a boiler-plate template used in the example script
Run Script 0 and Script 1 from the start; they refresh every template.
::cite_packages() report
- Bulbulia J (2024). _margot: MARGinal Observational Treatment-effects_. doi:10.5281/zenodo.10907724 <https://doi.org/10.5281/zenodo.10907724>, R package version 1.0.58 Functions to obtain MARGinal Observational Treatment-effects from observational data., <https://go-bayes.github.io/margot/>.
- Chang W (2023). _extrafont: Tools for Using Fonts_. doi:10.32614/CRAN.package.extrafont <https://doi.org/10.32614/CRAN.package.extrafont>, R package version 0.19, <https://CRAN.R-project.org/package=extrafont>.
- Grolemund G, Wickham H (2011). "Dates and Times Made Easy with lubridate." _Journal of Statistical Software_, *40*(3), 1-25. <https://www.jstatsoft.org/v40/i03/>.
- Müller K, Wickham H (2023). _tibble: Simple Data Frames_. doi:10.32614/CRAN.package.tibble <https://doi.org/10.32614/CRAN.package.tibble>, R package version 3.2.1, <https://CRAN.R-project.org/package=tibble>.
- R Core Team (2025). _R: A Language and Environment for Statistical Computing_. R Foundation for Statistical Computing, Vienna, Austria. <https://www.R-project.org/>.
- Wickham H (2016). _ggplot2: Elegant Graphics for Data Analysis_. Springer-Verlag New York. ISBN 978-3-319-24277-4, <https://ggplot2.tidyverse.org>.
- Wickham H (2023). _forcats: Tools for Working with Categorical Variables (Factors)_. doi:10.32614/CRAN.package.forcats <https://doi.org/10.32614/CRAN.package.forcats>, R package version 1.0.0, <https://CRAN.R-project.org/package=forcats>.
- Wickham H (2023). _stringr: Simple, Consistent Wrappers for Common String Operations_. doi:10.32614/CRAN.package.stringr <https://doi.org/10.32614/CRAN.package.stringr>, R package version 1.5.1, <https://CRAN.R-project.org/package=stringr>.
- Wickham H, Averick M, Bryan J, Chang W, McGowan LD, François R, Grolemund G, Hayes A, Henry L, Hester J, Kuhn M, Pedersen TL, Miller E, Bache SM, Müller K, Ooms J, Robinson D, Seidel DP, Spinu V, Takahashi K, Vaughan D, Wilke C, Woo K, Yutani H (2019). "Welcome to the tidyverse." _Journal of Open Source Software_, *4*(43), 1686. doi:10.21105/joss.01686 <https://doi.org/10.21105/joss.01686>.
- Wickham H, François R, Henry L, Müller K, Vaughan D (2023). _dplyr: A Grammar of Data Manipulation_. doi:10.32614/CRAN.package.dplyr <https://doi.org/10.32614/CRAN.package.dplyr>, R package version 1.1.4, <https://CRAN.R-project.org/package=dplyr>.
- Wickham H, Henry L (2025). _purrr: Functional Programming Tools_. doi:10.32614/CRAN.package.purrr <https://doi.org/10.32614/CRAN.package.purrr>, R package version 1.0.4, <https://CRAN.R-project.org/package=purrr>.
- Wickham H, Hester J, Bryan J (2024). _readr: Read Rectangular Text Data_. doi:10.32614/CRAN.package.readr <https://doi.org/10.32614/CRAN.package.readr>, R package version 2.1.5, <https://CRAN.R-project.org/package=readr>.
- Wickham H, Vaughan D, Girlich M (2024). _tidyr: Tidy Messy Data_. doi:10.32614/CRAN.package.tidyr <https://doi.org/10.32614/CRAN.package.tidyr>, R package version 1.3.1, <https://CRAN.R-project.org/package=tidyr>.
- Xie Y (2025). _tinytex: Helper Functions to Install and Maintain TeX Live, and Compile LaTeX Documents_. R package version 0.57, <https://github.com/rstudio/tinytex>. Xie Y (2019). "TinyTeX: A lightweight, cross-platform, and easy-to-maintain LaTeX distribution based on TeX Live." _TUGboat_, *40*(1), 30-32. <https://tug.org/TUGboat/Contents/contents40-1.html>.
My code was working with the old template, where can I find it?
Here:
---
: "Your Title"
title: |
abstract**Background**: (Brief few sentences)
**Objectives**:
1. Estimate the causal effect of YOUR EXPOSURE on YOUR OUTCOMES measured one year later.
2. Evaluate whether these effects vary across the population.
3. Provide policy guidance on which individuals might benefit most.
**Method**: We conducted a three-wave retrospective cohort study (waves XX-XXX, October XXXX--October XXXX) using data *SIMULATED* from the New Zealand Attitudes and Values Study, a nationally representative panel. Participants were eligible if they participated in the NZAVS in the baseline wave (XXXX,...). We defined the exposure as (XXXX > NUMBER on a 1-7 Likert Scale (1 = yes, 0 = no)). To address attrition, we applied inverse probability of censoring weights; to improve external validity, we applied weights to the population distribution of Age, Ethnicity, and Gender. We computed expected mean outcomes for the population in each exposure condition (high XXXX/low XXXXX). Under standard causal assumptions of unconfoundedness, the contrast provides an unbiased average treatment effect. We then used causal forests to detect heterogeneity in these effects and employed policy tree algorithms to identify individuals ("strong responders") likely to experience the greatest benefits.
**Results**: Increasing XXXXX leads to XXXXX. Heterogeneous responses to (e.g. *Forgiveness*, *Personal Well-Being*, and *Life-Satisfaction*...) reveal structural variability in subpopulations...
**Implications**: (Brief few sentences)
**Keywords**: *Causal Inference*; *Cross-validation*; *Distress*; *Employment*; *Longitudinal*; *Machine sLearning*; *Religion*; *Semi-parametric*; *Targeted Learning*.
:
author- name: YOUR NAME
: Victoria University of Wellington, New Zealand
affiliation: XXXXX
email: yes
corresponding: [Causal Inference, Cross-validation,...]
keywords:
editor_options: console
chunk_output_type: "last-modified"
date: libertinus
fontfamily: references.bib
bibliography: apa7.csl
csl:
format: # comment this out if you want pdf
docx: false # comment this out if you want pdf
default:
pdf-engine: lualatex
pdf: true
sanitise-tex: true
keep-citations: true
link: true
colorlinks: article
documentclass: ["single column"]
classoption: false
lof: false
lot:
geometry- top=30mm
- left=25mm
- heightrounded
- headsep=22pt
- headheight=11pt
- footskip=33pt
- ignorehead
- ignorefoot
-includes:
header- \let\oldtabular\tabular
- \renewcommand{\tabular}{\small\oldtabular}
- \setlength{\tabcolsep}{4pt} # adjust this value as needed
:
execute: false
echo: false
warning: true
include: true
eval---
```{r}
#| label: setup
#| echo: false
#| include: false
#| eval: true
# save this file in your project root (e.g. 'quarto/1_setup.R')
# load here to manage paths
dep <- requireNamespace("here", quietly = TRUE)
if (!dep) install.packages("here")
library(here)
# create required folders (these will likely already exist)
dirs <- c(
here("quarto"),
here("bibliography"),
here("save_directory"),
here("csl")
)
for (d in dirs) {
if (!dir.exists(d)) dir.create(d, recursive = TRUE)
}
# ensure tinytex for PDF rendering
if (!requireNamespace("tinytex", quietly = TRUE)) {
install.packages("tinytex")
tinytex::install_tinytex()
}
# ensure pacman for package management
if (!requireNamespace("pacman", quietly = TRUE)) {
install.packages("pacman")
}
# min version of margot
if (packageVersion("margot") < "1.0.54") {
stop(
"please install margot >= 1.0.54 for this workflow\n
run:
devtools::install_github('go-bayes/margot')
"
)
}
# call library
library("margot")
# check package version
packageVersion(pkg = "margot")
# load (and install if needed) all required packages
pacman::p_load(
boilerplate, shiny, tidyverse,
kableExtra, glue, patchwork, stringr,
ggplot2, ggeffects, parameters,
table1, knitr, extrafont, here, cli
)
# load fonts (requires prior extrafont::font_import())
if (requireNamespace("extrafont", quietly = TRUE)) {
extrafont::loadfonts(device = "all")
} else {
message("'extrafont' not installed; skipping font loading")
}
# reproducibility
set.seed(123)
# copy CSL and BibTeX into quarto folder
src_files <- list(
c(here("csl", "apa7.csl"), here("quarto", "apa7.csl")),
c(here("bibliography", "references.bib"), here("quarto", "references.bib"))
)
for (f in src_files) {
if (!file.exists(f[2]) && file.exists(f[1])) {
file.copy(f[1], f[2])
}
}
# now you're ready to import data and proceed with analysis
# e.g.
# unified_db <- boilerplate_import(data_path = here("final_boilerplate_data"))
# df_grf <- margot::here_read('df_grf', here("save_directory"))
# ---- define paths and import data ----------------------------------------
push_mods <- here::here("save_directory")
final_boilerplate_data <- here::here("final_boilerplate_data")
unified_db <- boilerplate_import(data_path = final_boilerplate_data)
# check paths
# ---- inspect available boilerplate entries -------------------------------
cat(unified_db$methods$confounding_control$vanderweele)
cat(unified_db$methods$sample$nzavs)
cat(unified_db$methods$causal_intervention$grf_simple_text)
cat(unified_db$appendix$explain$grf_short)
# ---- read variable definitions -------------------------------------------
baseline_vars <- margot::here_read("baseline_vars")
exposure_var <- margot::here_read("exposure_var")
outcome_vars <- margot::here_read("outcome_vars")
# ---- define study waves --------------------------------------------------
baseline_wave <- margot::here_read("baseline_wave")
exposure_waves <- margot::here_read("exposure_waves")
outcome_wave <- margot::here_read("outcome_wave")
#
baseline_wave_glued <- glue::glue(baseline_wave)
# ---- define study parameters ---------------------------------------------
study_years <- "2018-2021"
name_exposure <- here_read("name_exposure")
name_outcome_variables <- "MY OUTCOME VARIABLES IN THIS STUDY"
name_exposure_lower <- tolower(name_exposure)
name_exposure_lower
# ---- templates and thresholds --------------------------------------------
eligibility_template <- "Participants were eligible if they participated in the {baseline wave}"
percent_missing_baseline <- margot::here_read("percent_missing_baseline")
# ---- read tables for manuscript ------------------------------------------
markdown_table_baseline <- margot::here_read("baseline_table")
markdown_table_exposures <- margot::here_read("exposure_table")
markdown_table_outcomes <- margot::here_read("outcomes_table")
margot_bind_tables_markdown <- margot::here_read("margot_bind_tables_markdown")
# ---- sample size information --------------------------------------------
n_total <- margot::here_read("n_total")
n_participants <- here_read("n_participants")
# ---- variable labels and mappings ----------------------------------------
var_labels_measures <- here_read("var_labels_measures")
label_mapping_all <- here_read("label_mapping_all")
# ---- plot titles and analysis settings -----------------------------------
# ate_title <- here_read("ate_title")
flipped_names <- here_read("flipped_names")
flip_outcomes <- here_read("flip_outcomes")
flipped_list <- paste(flipped_names, collapse = ", ")
# ---- import data for visualisation --------------------------------------
original_df <- margot::here_read('df_wide', push_mods)
df_grf <- margot::here_read('df_grf', push_mods)
# co-variates
E <- margot::here_read('E', push_mods)
# select covariates and drop numeric attributes
X <- margot::remove_numeric_attributes(df_grf[E])
# ---- define nice names and regimes ---------------------------------------
nice_name_exposure <- stringr::str_to_sentence(name_exposure)
name_outcomes_lower <- "multi-dimensional wellbeing"
nice_name_outcome <- stringr::str_to_sentence(name_outcomes_lower)
# title
ate_title = glue("ATE Effects of {nice_name_exposure} on {nice_name_outcome}")
# ---- define exposure thresholds and regimes ------------------------------
lower_cut <- here_read("lower_cut")
upper_cut <- here_read("upper_cut")
threshold <- here_read("threshold")
inverse_threshold <- here_read("inverse_threshold")
scale_range <- margot::here_read("scale_range")
# create and check variables
value_exposure = glue::glue( threshold,"", upper_cut,", ", scale_range)
value_control = glue::glue(inverse_threshold, upper_cut,", ", scale_range)
# regimes
name_control_regime_lower <- glue::glue("low {name_exposure_lower}")
value_exposure_regime <- glue::glue("Set {name_exposure} {threshold} {upper_cut} {scale_range}")
value_control_regime <- glue::glue("Set {name_exposure} {inverse_threshold} {upper_cut} {scale_range}")
contrast_template <- "We used causal forests to estimate an average treatment effect as a contrast between *{name_control_regime_lower}* and *{name_exposure_lower}* on {name_outcomes_lower}."
contrast_text <- glue(contrast_template)
#check
contrast_text
# ---- import histogram of binary exposure ---------------------------------
graph_cut <- margot::here_read("graph_cut")
# ---- verify assumptions (positivity) -------------------------------------
transition_tables <- margot::here_read("transition_tables")
transition_tables_binary <- here_read("transition_tables_binary")
# ---- generate measures text for methods section -------------------------
baseline_measures_text <- boilerplate_generate_measures(
variable_heading = "Baseline Covariates",
variables = baseline_vars,
db = unified_db,
heading_level = 3,
subheading_level = 4,
print_waves = FALSE,
label_mappings = var_labels_measures
)
cat(baseline_measures_text)
exposure_measures_text <- boilerplate_generate_measures(
variable_heading = "Exposure Variable",
variables = name_exposure,
db = unified_db,
heading_level = 3,
subheading_level = 4,
print_waves = FALSE,
label_mappings = var_labels_measures
)
outcome_measures_text <- boilerplate_generate_measures(
variable_heading = "Outcome Variables",
variables = outcome_vars,
db = unified_db,
heading_level = 3,
subheading_level = 4,
print_waves = FALSE,
label_mappings = var_labels_measures
)
# ---- exposure description from database --------------------------------
measures_exposure <- glue::glue(unified_db$measures[[name_exposure]]$description)
# ---- set plot defaults for ate plots -------------------------------------
base_defaults_binary <- list(
type = "RD",
title = ate_title,
e_val_bound_threshold = 1.2,
colors = c(
"positive" = "#E69F00",
"not reliable"= "grey50",
"negative" = "#56B4E9"
),
x_offset = -0.25,
x_lim_lo = -0.25,
x_lim_hi = 0.25,
text_size = 5,
linewidth = 0.75,
estimate_scale = 1,
base_size = 20, #<- change to make outcome labels bigger or smaller
point_size = 4,
title_size = 20,
subtitle_size = 16,
legend_text_size = 10,
legend_title_size = 10,
include_coefficients = FALSE
)
# ---- create plot options for outcomes -----------------------------------
outcomes_options_all <- margot_plot_create_options(
title = ate_title,
base_defaults = base_defaults_binary,
subtitle = "",
filename_prefix = "grf"
)
# ---- load and check model results ---------------------------------------
# takes more time but allows you to flexibly modify plots in the quarto document
# if you use the option comment out this code above
# takes less time if you used the pre-processed results but a little harder to adjust
# ate_results <- margot::here_read_qs("ate_results", push_mods)
# margot::margot_size(ate_results)
models_binary <- margot::here_read_qs("models_binary", push_mods)
# make ate plots ----------------------------------------------------------
# ************* NEW - CORRECTION FOR FAMILY-WISE ERROR **********
# then pass to the results
ate_results <- margot_plot(
models_binary$combined_table, # <- now pass the corrected results.
options = outcomes_options_all,
label_mapping = label_mapping_all,
include_coefficients = FALSE,
save_output = FALSE,
order = "evaluebound_asc",
original_df = original_df,
e_val_bound_threshold = 1.2,
rename_ate = TRUE,
adjust = "bonferroni", #<- new
alpha = 0.05 # <- new
)
# check
ate_results$plot
# view
cat(ate_results$interpretation)
# ---- heterogeneity analysis ---------------------------------------------
models_binary_flipped_all <- here_read_qs("models_binary_flipped_all", push_mods)
# this is a new function requires margot 1.0.48 or higher
# only useful if you flip labels outcomes -- if so replace "label_mapping_all"
# with "label_mapping_all_flipped"
label_mapping_all_flipped <- margot_reversed_labels(label_mapping_all,
flip_outcomes)
# optional
# could be used in an appendix
# result_ominbus_hetero <- margot_omnibus_hetero_test(
# models_binary_flipped_all,
# label_mapping = label_mapping_all_flipped,
# alpha = 0.05,
# detail_level = "standard",
# format = "markdown"
# )
# result_ominbus_hetero$summary_table |> kbl("markdown")
# cat(result_ominbus_hetero$brief_interpretation)
# ──────────────────────────────────────────────────────────────────────────────
# SCRIPT: HETEROGENEITY WORKFLOW
# PURPOSE: screen outcomes for heterogeneity, plot RATE & Qini curves,
# fit shallow policy trees, and produce plain-language summaries.
# REQUIREMENTS:
# • margot ≥ 1.0.52
# • models_binary_flipped_all – list returned by margot_causal_forest()
# • original_df – raw data frame used in the forest
# • label_mapping_all_flipped – named vector of pretty labels
# • flipped_names – vector of outcomes that were flipped
# • decision_tree_defaults – list of control parameters
# • policy_tree_defaults – list of control parameters
# • push_mods – sub-folder for caches/outputs
# • use 'models_binary', `label_mapping_all`, and set `flipped_names = ""` if no outcome flipped
# ──────────────────────────────────────────────────────────────────────────────
# check package version early
stopifnot(utils::packageVersion("margot") >= "1.0.52")
# helper: quick kable printer --------------------------------------------------
print_rate <- function(tbl) {
tbl |>
mutate(across(where(is.numeric), \(x) round(x, 2))) |>
kbl(format = "markdown")
}
# 1 SCREEN FOR HETEROGENEITY (RATE AUTOC + RATE Qini) ----------------------
rate_results <- margot_rate(
models = models_binary_flipped_all,
policy = "treat_best",
alpha = 0.20, # keep raw p < .20
adjust = "fdr", # false-discovery-rate correction
label_mapping = label_mapping_all_flipped
)
print_rate(rate_results$rate_autoc)
print_rate(rate_results$rate_qini)
# convert RATE numbers into plain-language text
rate_interp <- margot_interpret_rate(
rate_results,
flipped_outcomes = flipped_names,
adjust_positives_only = TRUE
)
cat(rate_interp$comparison, "\n")
cli_h2("Analysis ready for Appendix ✔")
# organise model names by evidence strength
model_groups <- list(
autoc = rate_interp$autoc_model_names,
qini = rate_interp$qini_model_names,
either = rate_interp$either_model_names,
exploratory = rate_interp$not_excluded_either
)
# 2 PLOT RATE AUTOC CURVES ---------------------------------------------------
autoc_plots <- margot_plot_rate_batch(
models = models_binary_flipped_all,
save_plots = FALSE, # set TRUE to store .png files
label_mapping = label_mapping_all_flipped,
model_names = model_groups$autoc
)
# inspect the first curve - note there may be more/none.
# if none, comment out
autoc_plots[[1]]
autoc_name_1 <- rate_results$rate_autoc$outcome[[1]]
# 3 QINI CURVES + GAIN INTERPRETATION ---------------------------------------
qini_results <- margot_policy(
models_binary_flipped_all,
save_plots = FALSE,
output_dir = here::here(push_mods),
decision_tree_args = decision_tree_args,
policy_tree_args = policy_tree_args,
model_names = names(models_binary_flipped_all$results),
original_df = original_df,
label_mapping = label_mapping_all_flipped,
max_depth = 2L,
output_objects = c("qini_plot", "diff_gain_summaries")
)
qini_gain <- margot_interpret_qini(
qini_results,
label_mapping = label_mapping_all_flipped
)
print_rate(qini_gain$summary_table)
cat(qini_gain$qini_explanation, "\n")
reliable_ids <- qini_gain$reliable_model_ids
# (re-)compute plots only for models that passed Qini reliability
qini_results_valid <- margot_policy(
models_binary_flipped_all,
save_plots = FALSE,
output_dir = here::here(push_mods),
decision_tree_args = decision_tree_args,
policy_tree_args = policy_tree_args,
model_names = reliable_ids,
original_df = original_df,
label_mapping = label_mapping_all_flipped,
max_depth = 2L,
output_objects = c("qini_plot", "diff_gain_summaries")
)
qini_plots <- map(qini_results_valid, ~ .x$qini_plot)
# grab pretty outcome names
qini_names <- margot_get_labels(reliable_ids, label_mapping_all_flipped)
cli_h1("Qini curves generated ✔")
# 4 POLICY TREES (max depth = 2) -------------------------------------------
# ---- policy tree graph settings
policy_tree_defaults <- list(
point_alpha = 0.5,
title_size = 12,
subtitle_size = 12,
axis_title_size = 12,
legend_title_size = 12,
split_line_color = "red",
split_line_alpha = 0.8,
split_label_color = "red",
split_label_nudge_factor = 0.007
)
decision_tree_defaults <- list(
span_ratio = 0.1,
text_size = 4,
y_padding = 0.5,
edge_label_offset = 0.02,
border_size = 0.01
)
policy_results_2L <- margot_policy(
models_binary_flipped_all,
save_plots = FALSE,
output_dir = here::here(push_mods),
decision_tree_args = decision_tree_defaults,
policy_tree_args = policy_tree_defaults,
model_names = reliable_ids, # only those passing Qini
max_depth = 2L,
original_df = original_df,
label_mapping = label_mapping_all_flipped,
output_objects = c("combined_plot")
)
policy_plots <- map(policy_results_2L, ~ .x$combined_plot)
# policy_plots[[1]]
# ️5 PLAIN-LANGUAGE INTERPRETATION OF TREES ----------------------------------
policy_text <- margot_interpret_policy_batch(
models = models_binary_flipped_all,
original_df = original_df,
model_names = reliable_ids,
label_mapping = label_mapping_all_flipped,
max_depth = 2L
)
cat(policy_text, "\n")
cli::cli_h1("Finished: depth-2 policy trees analysed ✔")
# ───────────────────────────── EOF ────────────────────────────────────────────
# names of valid models
# you might have fewer models
glued_policy_names_1 <- glue( qini_names[[1]])
glued_policy_names_2 <-glue( qini_names[[2]])
glued_policy_names_3 <- glue(qini_names[[3]])
glued_policy_names_4 <- glue(qini_names[[4]])
glued_policy_names_5 <-glue( qini_names[[5]])
glued_policy_names_6 <- glue(qini_names[[6]])
glued_policy_names_7 <-glue( qini_names[[7]])
# cli::cli_h1("Names of Reliable HTE Models Set")
# GROUP COMPARISON EXAMPLE ------------------------------------------------
# play around with these values
x_offset_comp <- 1.0
x_lim_lo_comp <- -1.0
x_lim_hi_comp <- 1.0
base_defaults_comparisons <- list(
type = "RD",
title = ate_title,
e_val_bound_threshold = 1.2,
label_mapping = "label_mapping_all",
adjust = "bonferroni", #<- new
alpha = 0.05, # <- new
colors = c(
"positive" = "#E69F00",
"not reliable" = "grey50",
"negative" = "#56B4E9"
),
x_offset = x_offset_comp,
# will be set based on type
x_lim_lo = x_lim_lo_comp,
# will be set based on type
x_lim_hi = x_lim_hi_comp,
text_size = 8,
linewidth = 0.75,
estimate_scale = 1,
base_size = 18,
point_size = 4,
title_size = 19,
subtitle_size = 16,
legend_text_size = 10,
legend_title_size = 10,
include_coefficients = FALSE
)
# see
complex_condition_age <- between(X[,"t0_age_z"], -1, 1)
# sanity‑check age bounds on the raw scale
mean(original_df$t0_age) + c(-1, 1) * sd(original_df$t0_age)
# age subsets
subsets_standard_age <- list(
Younger = list(
var = "t0_age_z",
value = -1,
operator = "<",
label = "Age < 35"
),
Middle = list(
var = "t0_age_z",
# operator = "<",
subset_condition = complex_condition_age,
label = "Age 35-62"
),
Older = list(
var = "t0_age_z",
value = 1,
operator = ">",
label = "Age > 62"
)
)
# 3. batch subgroup analysis -----------------------------------------
planned_subset_results <- margot_planned_subgroups_batch(
domain_models = list(models_binary),
X = X,
base_defaults = base_defaults_comparisons,
subset_types = list(cohort = subsets_standard_age),
original_df = original_df,
label_mapping = label_mapping_all, # ← supply it here
domain_names = "wellbeing",
subtitles = "",
adjust = "bonferroni", # ← here
alpha = 0.05 # ← and here
)
# make comparison plot
plots_subgroup_age_young_old<- wrap_plots(
list(
planned_subset_results$wellbeing$cohort$results$`Age < 35`$plot,
planned_subset_results$wellbeing$cohort$results$`Age > 62`$plot
), ncol = 1) +
plot_annotation(
title = "Younger vs Older",
theme = theme(plot.title = element_text(size = 18, face = "bold"))
)
# are groups different from each other?
# example: young (<35) vs older (>62)
group_comparison_age_young_old <- margot_compare_groups(
group1_name = "People Under 35 Years Old",
group2_name = "People Over 62 Years Old",
planned_subset_results$wellbeing$cohort$results$`Age < 35`$transformed_table, # reference
planned_subset_results$wellbeing$cohort$results$`Age > 62`$transformed_table, # comparison
type = "RD", # risk‑difference scale
decimal_places = 3
)
print(group_comparison_age_young_old$results |> kbl("markdown", digits = 3))
cat(group_comparison_age_young_old$interpretation)
# ---- define global variables for text generation ------------------------
global_vars <- list(
name_exposure_variable = nice_name_exposure,
n_total = n_total,
ate_adjustment = "bonferroni",
ate_alpha = "0.05",
cate_adjustment = "Benjamini–Hochberg false-discovery-rate adjustment",
cate_alpha = "0.1",
sample_ratio_policy = "70/30",
n_participants = n_participants,
exposure_variable = name_exposure,
name_exposure_lower = name_exposure_lower,
name_control_regime_lower = name_control_regime_lower,
name_outcome_variables = "Self Esteem", # <- adjust to your study, your decision
name_outcomes_lower = name_outcomes_lower,
name_exposure_capfirst = nice_name_exposure,
measures_exposure = measures_exposure,
value_exposure_regime = value_exposure_regime,
value_control_regime = value_control_regime,
flipped_list = flipped_list, # set flipped_list = "" if nothing flipped
appendix_explain_grf = "E",
appendix_assumptions_grf = "F",
name_exposure_threshold = "1",
name_control_threshold = "0",
appendix_measures = "A",
value_control = value_control, # ← named
value_exposure = value_exposure, # ← named
appendix_positivity = "C",
appendix_rate = "D",
appendix_qini_curve = "D",
train_proportion_decision_tree = ".7",
traning_proportion = ".7",
sample_split = "70/30",
sample_ratio_policy = "70/30",
baseline_wave = baseline_wave,
exposure_waves = exposure_waves,
outcome_wave = outcome_wave,
protocol_url = "https://osf.io/ce4t9/", # if used
appendix_timeline = "A" # if used
)
```
< pagebreak >}}
{{
## Introduction
**Your place to shine here**
## Method
```{r, results='asis'}
#| eval: false # <- set to false/ copy and paste text so you can modify it/change the figure
# run this code, copy and paste contents into your text
cat(
boilerplate::boilerplate_generate_text(
category = "methods", # ← choose the right top-level list
sections = c(
"student_sample.nzavs",
"student_target_population",
"eligibility.standard",
"causal_intervention.grf_simple_text",
"analytic_approach.general_approach_cate_long", # <- new
"exposure_indicator",
"causal_identification_criteria",
"confounding_control.vanderweele",
"statistical_models.grf_short_explanation",
"missing_data.missing_grf_simple",
"sensitivity_analysis.short_evalue"
),
global_vars = global_vars,
db = unified_db
)
)
```
< pagebreak >}}
{{
## Results
### Average Treatement Effects
```{r}
#| label: fig-ate
#| fig-cap: "Average Treatment Effects on Multi-dimensional Wellbeing"
#| eval: true # |eval: false # <- set to false as needed/desired for your own material
#| fig-height: 12
#| fig-width: 8
ate_results$plot
```
< pagebreak >}}
{{
```{r}
#| label: tbl-outcomes
#| tbl-cap: "Average Treatment Effects on Multi-dimensional Wellbeing"
#| eval: true # |eval: false # <- set to false as needed/desired for your own material
# ate_results$transformed_table|> kbl("markdown")
margot_bind_tables_markdown
```
```{r, results = 'asis'}
#| eval: false
# run this line, copy and paste text into your document
cat(ate_results$interpretation)
```
< pagebreak >}}
{{
<!-- Uncomment text below and run if you have performed a subgroup comparison. -->
<!-- ### Planned Subgroup Comparisons (Optional) -->
<!-- Based on theoretical findings we expected that the effects of {name_exposure} would vary by age...\@fig-planned-comparison and @tbl-planned-comparison -->
<!-- ```{r} -->
<!-- #| label: fig-planned-comparison -->
<!-- #| fig-cap: "Planned Comparison Plot" -->
<!-- #| eval: true -->
<!-- #| echo: false -->
<!-- #| fig-height: 10 -->
<!-- #| fig-width: 12 -->
<!-- plots_subgroup_age_young_old -->
<!-- ``` -->
<!-- ```{r} -->
<!-- #| label: tbl-planned-comparison -->
<!-- #| tbl-cap: "Planned Comparison Table" -->
<!-- #| eval: true # <- set to false: copy and paste your own text using this material -->
<!-- #| echo: false -->
<!-- # table (only use if more than one qini gain interpretation) -->
<!-- group_comparison_age_young_old$results |> -->
<!-- mutate(across(where(is.numeric), ~ round(., 2))) %>% -->
<!-- kbl(format = "markdown") -->
<!-- ``` -->
<!-- ```{r, results = 'asis'} -->
<!-- #| eval: false # copy and paste your own text -->
<!-- cat(group_comparison_age_young_old$interpretation) -->
<!-- ``` -->
< pagebreak >}}
{{
### Heterogeneous Treatment Effects {#results-qini-curve}
```{r, results='asis'}
#| eval: false # <- set to false: copy and paste your own text using this material
# copy and paste into your txt
cat(
boilerplate::boilerplate_generate_text(
category = "results", # ← choose the right top-level list
sections = c(
"grf.interpretation_qini"
),
global_vars = global_vars,
db = unified_db
)
)
```
```{r, results = 'asis'}
#| eval: false # <- set to false: copy and paste your own text using this material
# only reliable results
cat(qini_gain$qini_explanation)
```
@tbl-qini presents results for our Qini curve analysis at different spend rates.
```{r}
#| label: tbl-qini
#| tbl-cap: "Qini Curve Results"
#| eval: false # <- set to false: copy and paste your own text using this material
# table (only use if more than one qini gain interpretation)
qini_gain$summary_table |>
mutate(across(where(is.numeric), ~ round(., 2))) %>%
kbl(format = "markdown") #<-- only if you have this, otherwise delete this code
```
@fig-qini-1 presents results for reliable Qini results
```{r}
#| label: fig-qini-1
#| fig-cap: "Qini Graphs"
#| eval: false # <- set to true once you have set up your graph correctly
#| echo: false
#| fig-height: 18
#| fig-width: 12
library(patchwork)
# combine first column of plots (4,6,7,8) and second column (9,11,12)
# these showed reliable qini results
# THIS WILL NEED TO BE MODIFED TO SUIT YOUR STUDY
# Once you get the right arrangement
combined_qini <- (
qini_plots[[1]] /
qini_plots[[2]] /
qini_plots[[3]] /
qini_plots[[4]]
) | (
# remove this block if you don't have plots 9,11,12
qini_plots[[5]] /
qini_plots[[6]] /
qini_plots[[7]]
) +
# collect all legends into one shared guide
plot_layout(guides = "collect") +
# add title (and optionally subtitle)
plot_annotation(
title = "Combined Qini Plots",
subtitle = "Panels arranged with shared legend"
) &
# apply theme modifications to all subplots
theme(
legend.position = "bottom", # place legend below
plot.title = element_text(hjust = 0.5), # centre title
plot.subtitle = element_text(hjust = 0.5) # centre subtitle
)
# draw it
print(combined_qini)
```
< pagebreak >}}
{{
### Decision Rules (Who is Most Sensitive to Treatment?)
```{r, results='asis'}
#| eval: false # <- set to false: copy and paste your own text using this material
cat(
boilerplate::boilerplate_generate_text(
category = "results", # ← choose the right top-level list
sections = c(
"grf.interpretation_policy_tree"
),
global_vars = global_vars,
db = unified_db
)
)
```
< pagebreak >}}
{{
```{r}
#| label: fig-policy-1
#| fig-cap: "Decision Tree: {glued_policy_names_1}"
#| eval: false # <- set to true once you have set up your graph correctly / if you have no qini graph delete skip this
#| echo: false
#| fig-height: 10
#| fig-width: 12
# plot 1
policy_plots[[1]]
```
< pagebreak >}}
{{
```{r}
#| label: fig-policy-2
#| fig-cap: "Decision Tree: {glued_policy_names_2}"
#| eval: false # <- set to true once you have set up your graph correctly / if you have no policy graph delete skip this
#| fig-height: 10
#| fig-width: 12
policy_plots[[2]]
```
< pagebreak >}}
{{
```{r}
#| label: fig-policy-3
#| fig-cap: "Decision Tree: {glued_policy_names_3}"
#| eval: false # <- set to true once you have set up your graph correctly / if you have no policy graph delete skip this
#| echo: false
#| fig-height: 10
#| fig-width: 12
policy_plots[[3]]
```
< pagebreak >}}
{{
```{r}
#| label: fig-policy-4
#| fig-cap: "Decision Tree: {glued_policy_names_4}"
#| eval: false # <- set to true once you have set up your graph correctly / if you have no policy graph delete skip this
#| fig-height: 10
#| fig-width: 12
policy_plots[[4]]
```
< pagebreak >}}
{{
```{r}
#| label: fig-policy-5
#| fig-cap: "Decision Tree: {glued_policy_names_5}"
#| eval: false # <- set to true once you have set up your graph correctly / if you have no policy graph delete skip this
#| fig-height: 10
#| fig-width: 10
policy_plots[[5]]
```
< pagebreak >}}
{{
```{r}
#| label: fig-policy-6
#| fig-cap: "Decision Tree: {glued_policy_names_6}"
#| eval: false # <- set to true once you have set up your graph correctly / if you have no policy graph delete skip this
#| fig-height: 10
#| fig-width: 12
policy_plots[[6]]
```
< pagebreak >}}
{{
```{r}
#| label: fig-policy-7
#| fig-cap: "Decision Tree: {glued_policy_names_7}"
#| eval: false # <- set to true once you have set up your graph correctly / if you have no policy graph delete skip this
#| echo: false
#| fig-height: 10
#| fig-width: 12
policy_plots[[7]]
```
< pagebreak >}}
{{
```{r, results = 'asis'}
#| eval: false # <- set to false if you want to copy and paste your own text
# use this text to copy and paste below your decision tree graphs
cat(policy_text, "\n")
```
< pagebreak >}}
{{
## Discussion
```{r, results='asis'}
#| eval: false # <- set to false: copy and paste your own text using this material
#| echo: false
cat(boilerplate_generate_text(
category = "discussion",
sections = c(
"student_ethics",
"student_data",
"student_authors_statement" ),
global_vars = list(
exposure_variable = name_exposure
),
db = unified_db
))
unified_db$discussion$student_authors_statement
```
< pagebreak >}}
{{
## Appendix A: Measures {#appendix-measures}
### Measures
#### Baseline Covariate Measures
```{r, results='asis'}
cat(baseline_measures_text)
```
#### Exposure Measures
```{r, results='asis'}
cat(exposure_measures_text)
```
#### Outcome Measures
```{r, results='asis'}
cat(outcome_measures_text)
```
< pagebreak >}}
{{
## Appendix B: Sample Characteristics {#appendix-sample}
#### Sample Statistics: Baseline Covariates
@tbl-appendix-baseline presents sample demographic statistics.
::: {#tbl-appendix-baseline}
```{r, results = 'asis'}
#| eval: true
#| include: true
#| echo: false
markdown_table_baseline
```
for New Zealand Attitudes and Values Cohort: {baseline_wave_glued}.
Demographic statistics :::
### Sample Statistics: Exposure Variable {#appendix-exposure}
<!-- @tbl-sample-exposures presents sample statistics for the exposure variable, religious service attendance, during the baseline and exposure waves. This variable was not measured in part of NZAVS time 12 (years 2020-2021) and part of NZAVS time 13 (years 2021-2022). To address missingness, if a value was observed after NZAVS time 14, we carried the previous observation forward and created and NA indicator. If there was no future observation, the participant was treated as censored, and inverse probability of censoring weights were applied, following our standard method for handling missing observations (see mansucript **Method**/**Handling of Missing Data**). Here, our carry-forward imputation approach may result in conservative causal effect estimation because it introduces measurement error. However, this approach would not generally bias causal effect estimation away from the null because the measurement error is unsystematic and random and unrelated to the outcomes. -->
::: {#tbl-appendix-exposures}
```{r, results = 'asis'}
#| eval: true
#| include: true
#| echo: false
markdown_table_exposures
```
for New Zealand Attitudes and Values Cohort waves 2018.
Demographic statistics :::
< pagebreak >}}
{{
### Sample Statistics: Outcome Variables {#appendix-outcomes}
::: {#tbl-appendix-outcomes}
```{r, results = 'asis'}
#| eval: true
#| include: true
#| echo: false
markdown_table_outcomes
```
Outcome variables measured at {baseline_wave_glued} and {outcome_wave}:::
< pagebreak >}}
{{
## Appendix C: Transition Matrix to Check The Positivity Assumption {#appendix-transition}
```{r, results = 'asis'}
#| label: tbl-transition
#| tbl-cap: "Transition Matrix Showing Change"
#| eval: true
#| include: true
#| echo: false
transition_tables_binary$tables[[1]]
```
```{r, results = 'asis'}
cat(transition_tables_binary$explanation)
```
< pagebreak >}}
{{
## Appendix D: RATE AUTOC and RATE Qini {#appendix-rate}
```{r, results='asis'}
#| eval: false # <- set to true as needed/desired
#| echo: false
cat(
boilerplate::boilerplate_generate_text(
category = "results",
# ← choose the right top-level list
sections = c("grf.interpretation_rate"),
global_vars = global_vars,
db = unified_db
)
)
# if you did not flip, use this cat(unified_db$results$grf$interpretation_rate_no_flip)
```
```{r, results='asis'}
#| eval: false # <- set to true as needed/desired
#| echo: false
cat(rate_interp$comparison)
```
#appendix-cate-validation) for details.
Refer to [Appendix D](
##### RATE AUTOC RESULTS
```{r, results = 'asis'}
# only reliable results
cat(rate_interp$autoc_results)
```
```{r}
#| label: fig-rate-1
#| fig-cap: "RATE AUTOC Graphs"
#| eval: false # <- set to true as needed/desired
#| echo: false
#| fig-height: 10
#| fig-width: 12
autoc_plots[[1]]
```
@fig-rate-1 presents the RATE AUTOC curve for `r autoc_name_1`
< pagebreak >}}
{{
## Appendix E. Estimating and Interpreting Heterogeneous Treatment Effects with GRF {#appendix-explain-grf}
```{r, results='asis'}
#| eval: false
#| echo: false
library(boilerplate)
cat(
boilerplate::boilerplate_generate_text(
category = "appendix",
# ← choose the right top-level list
sections = c("explain.grf_short"),
global_vars = global_vars,
db = unified_db
)
)
```
< pagebreak >}}
{{
## Appendix F: Strengths and Limitations of Causal Forests {#appendix-rate}
```{r, results='asis'}
#| eval: false # <- set to true as needed/desired or copy and paste text
cat(
boilerplate::boilerplate_generate_text(
category = "discussion",
# ← choose the right top-level list
sections = c("strengths.strengths_grf_short"),
global_vars = global_vars,
db = unified_db
)
)
```
< pagebreak >}}
{{
## References {.appendix-refs}
However, be careful to use ‘models_binary’ and ‘labels_mapped_all’ in place of models_binary_flipped
and labels_mapped_all_flipped
throughout if you did not flip your models. I would rather advise that you use your the new code.