Working with JSON Databases in boilerplate • boilerplate

library(boilerplate)
#> boilerplate 1.1.0

Introduction

The boilerplate package now supports JSON format for all database operations. JSON provides several advantages over the traditional RDS format:

Human-readable: JSON files can be opened and edited in any text editor
Version control friendly: Changes are easily tracked in Git
Language agnostic: JSON files can be read by any programming language
Web-friendly: JSON is the standard format for web applications
Template variables: Support for {{variable}} placeholders in your text

Basic JSON Operations

Importing and Saving JSON Databases

# Create a temporary directory for examples
temp_dir <- tempdir()
json_path <- file.path(temp_dir, "json_example")
dir.create(json_path, showWarnings = FALSE)

# Create a sample database
sample_db <- list()

# Add methods entries
sample_db <- boilerplate_add_entry(
  sample_db,
  path = "methods.sampling",
  value = "Participants were randomly selected from {{population}}."
)

sample_db <- boilerplate_add_entry(
  sample_db,
  path = "methods.analysis.regression",
  value = "We conducted linear regression using {{software}}."
)

# Add measures
sample_db <- boilerplate_add_entry(
  sample_db,
  path = "measures.age",
  value = list(
    name = "Age",
    description = "Participant age in years",
    type = "continuous",
    range = c(18, 65)
  )
)

# Save as JSON
boilerplate_save(
  sample_db,
  data_path = json_path,
  format = "json",
  confirm = FALSE,
  quiet = TRUE
)

# Import JSON database (auto-detects format)
imported_db <- boilerplate_import(
  data_path = json_path,
  quiet = TRUE
)

# Check structure
str(imported_db, max.level = 3)
#> List of 2
#>  $ measures:List of 1
#>   ..$ age:List of 4
#>   .. ..$ name       : chr "Age"
#>   .. ..$ description: chr "Participant age in years"
#>   .. ..$ type       : chr "continuous"
#>   .. ..$ range      :List of 2
#>  $ methods :List of 2
#>   ..$ analysis:List of 1
#>   .. ..$ regression: chr "We conducted linear regression using {{software}}."
#>   ..$ sampling: chr "Participants were randomly selected from {{population}}."

Working with Category-Specific JSON Files

# Save categories with proper structure
methods_db <- list(methods_db = sample_db$methods)
measures_db <- list(measures_db = sample_db$measures)

jsonlite::write_json(
  methods_db,
  file.path(json_path, "methods_db.json"),
  pretty = TRUE,
  auto_unbox = TRUE
)

jsonlite::write_json(
  measures_db,
  file.path(json_path, "measures_db.json"),
  pretty = TRUE,
  auto_unbox = TRUE
)

# Import specific category
methods_only <- boilerplate_import(
  data_path = json_path,
  category = "methods",
  quiet = TRUE
)

names(methods_only)
#> [1] "analysis" "sampling"

Migrating from RDS to JSON

If you have existing RDS databases, you can easily migrate them to JSON format:

# Create RDS databases for migration example
rds_path <- file.path(temp_dir, "rds_example")
dir.create(rds_path, showWarnings = FALSE)

# Save as RDS first
saveRDS(sample_db$methods, file.path(rds_path, "methods_db.rds"))
saveRDS(sample_db$measures, file.path(rds_path, "measures_db.rds"))

# Migrate to JSON
migration_output <- file.path(temp_dir, "migrated_json")
results <- boilerplate_migrate_to_json(
  source_path = rds_path,
  output_path = migration_output,
  format = "unified",  # Creates a single unified JSON file
  backup = TRUE,       # Creates backup of RDS files
  quiet = FALSE
)
#> 
#> ── Migrating 2 RDS files to JSON ──
#> 
#> ℹ Creating backup in /var/folders/q9/lkcn14l97mb6mkhbxsxrpr4w0000gn/T//Rtmp2Xlvpq/migrated_json/backup_20250608_011341
#> ℹ Processing measures_db.rds
#> ℹ Processing methods_db.rds
#> ℹ Saving unified database to boilerplate_unified.json
#> 
#> ── Migration Summary
#> ✔ Migrated 2 databases
#> ℹ 0/1 passed validation

# Check migration results
print(results$migrated)
#> [1] "/var/folders/q9/lkcn14l97mb6mkhbxsxrpr4w0000gn/T//Rtmp2Xlvpq/rds_example/measures_db.rds"
#> [2] "/var/folders/q9/lkcn14l97mb6mkhbxsxrpr4w0000gn/T//Rtmp2Xlvpq/rds_example/methods_db.rds"

# Verify the migrated data  
migrated_db <- boilerplate_import(
  data_path = migration_output,
  quiet = TRUE
)
names(migrated_db)
#> [1] "measures" "methods"

Batch Editing JSON Databases

The package provides tools for batch editing JSON databases:

# Create a measures database for editing
measures_db <- list(
  anxiety_scale = list(
    name = "Generalized Anxiety Disorder 7-item",
    description = "GAD-7 anxiety measure",
    reference = "Spitzer2006",
    items = list(
      "Feeling nervous or on edge",
      "Not being able to stop worrying"
    )
  ),
  depression_scale = list(
    name = "Patient Health Questionnaire",
    description = "PHQ-9 depression measure",
    reference = "Kroenke2001",
    items = list(
      "Little interest or pleasure",
      "Feeling down or hopeless"
    )
  )
)

# Batch update all references to include @ symbol
updated_db <- boilerplate_batch_edit(
  db = measures_db,  # Can also pass file path directly
  field = "reference",
  new_value = "@reference_2024",  # This will update all references
  target_entries = "*",           # Apply to all entries
  preview = FALSE,                # Don't preview, just update
  confirm = FALSE,                # Don't ask for confirmation
  quiet = TRUE                    # Suppress messages
)

# For more complex edits, use boilerplate_batch_clean
# to add @ prefix to existing references
for (measure in names(measures_db)) {
  if (!is.null(measures_db[[measure]]$reference)) {
    ref <- measures_db[[measure]]$reference
    if (!startsWith(ref, "@")) {
      measures_db[[measure]]$reference <- paste0("@", ref)
    }
  }
}

# Check the updates
measures_db$anxiety_scale$reference
#> [1] "@Spitzer2006"
measures_db$depression_scale$reference
#> [1] "@Kroenke2001"

Standardising Measures in JSON Format

# Standardise measures database
standardised <- boilerplate_standardise_measures(
  db = measures_db,
  json_compatible = TRUE,
  quiet = TRUE
)

# Check standardization added missing fields
str(standardised$anxiety_scale)
#> List of 6
#>  $ name             : chr "anxiety_scale"
#>  $ description      : chr "GAD-7 anxiety measure"
#>  $ reference        : chr "spitzer2006"
#>  $ items            :List of 2
#>   ..$ : chr "Feeling nervous or on edge"
#>   ..$ : chr "Not being able to stop worrying"
#>  $ standardised     : logi TRUE
#>  $ standardised_date: Date[1:1], format: "2025-06-08"

Validating JSON Structure and Health

The package provides multiple ways to validate your JSON databases:

Schema Validation

# Save a JSON database
boilerplate_save(
  measures_db,
  data_path = temp_dir,
  category = "measures",
  format = "json",
  confirm = FALSE,
  quiet = TRUE
)
json_file <- file.path(temp_dir, "measures_db.json")

# Validate structure (requires schema files)
validation_errors <- validate_json_database(
  json_file,
  type = "measures"
)

if (length(validation_errors) == 0) {
  message("JSON structure is valid!")
} else {
  message("Validation errors found:")
  print(validation_errors)
}

Database Validation

# Validate the saved JSON file
json_file <- file.path(json_path, "boilerplate_unified.json")
if (file.exists(json_file)) {
  validation_errors <- validate_json_database(json_file, type = "unified")
  
  if (length(validation_errors) == 0) {
    message("JSON database structure is valid!")
  } else {
    warning("Database validation found issues:")
    print(validation_errors)
  }
}
#> Warning: Database validation found issues:
#> [1] "Schema not found for type: unified"

# Check that paths exist
methods_paths <- boilerplate_list_paths(boilerplate_methods(sample_db))
cat("Methods entries:", length(methods_paths), "\n")
#> Methods entries: 3

measures_names <- names(boilerplate_measures(sample_db))
cat("Measures entries:", length(measures_names), "\n")
#> Measures entries: 1

Integration with Existing Workflow

JSON databases work seamlessly with all existing boilerplate functions:

# Generate text using JSON database
text <- boilerplate_generate_text(
  category = "methods",
  sections = "sampling",
  db = imported_db,
  global_vars = list(
    population = "university students"
  )
)
#> ℹ generating methods text with 1 sections
#> ℹ using methods from unified database
#> ℹ processing section: sampling
#> ℹ applying template variables to sampling
#> ✔ successfully generated methods text with 1 section(s)

cat(text)
#> Participants were randomly selected from university students.

# Generate text from nested path
analysis_text <- boilerplate_generate_text(
  category = "methods",
  sections = "analysis.regression",
  db = imported_db,
  global_vars = list(
    software = "R version 4.3.0"
  )
)
#> ℹ generating methods text with 1 sections
#> ℹ using methods from unified database
#> ℹ processing section: analysis.regression
#> ℹ applying template variables to analysis.regression
#> ✔ successfully generated methods text with 1 section(s)

cat(analysis_text)
#> We conducted linear regression using R version 4.3.0.

# Generate measures text
measures_text <- boilerplate_generate_measures(
  variable_heading = "Demographics",
  variables = "age",
  db = imported_db
)
#> ℹ using measures from unified database
#> ℹ generating formatted text for 1 Demographics
#> ℹ using heading level 3 and subheading level 4
#> ℹ processing variable: age
#> ℹ adding description for age
#> ✔ successfully generated formatted text for Demographics

cat(measures_text)
#> ### Demographics
#> 
#> #### Age
#> 
#> Participant age in years.

Best Practices

Use meaningful file names: Name your JSON files descriptively (e.g., study1_methods.json)
Version control: JSON files work great with Git - commit them to track changes

Use template variables: Include {{variable}} placeholders in your text for dynamic content:

db <- boilerplate_add_entry(
  db,
  path = "methods.power",
  value = "Power analysis indicated {{n_required}} participants needed for {{power}}% power."
)

Regular validation: Use validate_json_database() to ensure database structure is correct
Backup before migration: Use the backup = TRUE option when migrating
Choose appropriate format:
- Use unified format for complete databases
- Use separate files for modular management

Track database contents: Regularly check database contents:

# List all paths
methods_paths <- boilerplate_list_paths(boilerplate_methods(db))
measures_names <- names(boilerplate_measures(db))

Conclusion

JSON support in boilerplate provides a modern, flexible way to manage your research text databases. Whether you’re starting fresh or migrating existing RDS databases, the JSON functionality integrates seamlessly with your workflow while providing better visibility and version control.