Skip to contents

This function allows batch cleaning of text fields by removing or replacing specific characters or patterns across multiple entries in a boilerplate database.

Usage

boilerplate_batch_clean(
  db,
  field,
  remove_chars = NULL,
  replace_pairs = NULL,
  trim_whitespace = TRUE,
  collapse_spaces = FALSE,
  target_entries = NULL,
  exclude_entries = NULL,
  category = NULL,
  recursive = TRUE,
  preview = FALSE,
  confirm = TRUE,
  quiet = FALSE
)

Arguments

db

List. The database to clean (can be a single category or unified database).

field

Character. The field to clean (e.g., "reference", "description").

remove_chars

Character vector. Characters to remove (e.g., c("@", "", "")).

replace_pairs

List. Named list for replacements (e.g., list(" " = "_")).

trim_whitespace

Logical. Whether to trim leading/trailing whitespace.

collapse_spaces

Logical. Whether to collapse multiple spaces to single space.

target_entries

Character vector. Entries to clean. Can be:

  • Specific entry names (e.g., c("ban_hate_speech", "born_nz"))

  • Patterns with wildcards (e.g., "anxiety*")

  • "all" to clean all entries

  • NULL to clean all entries with the specified field

exclude_entries

Character vector. Entries to exclude from cleaning. Can use specific names or wildcard patterns like target_entries

category

Character. If db is unified, specifies which category to clean.

recursive

Logical. Whether to search recursively through nested structures.

preview

Logical. If TRUE, shows what would be changed without making changes.

confirm

Logical. If TRUE, asks for confirmation before making changes.

quiet

Logical. If TRUE, suppresses non-essential messages.

Value

The modified database.

Examples

if (FALSE) { # \dontrun{
# Remove @, [, and ] from all references
unified_db <- boilerplate_batch_clean(
  db = unified_db,
  field = "reference",
  remove_chars = c("@", "[", "]"),
  category = "measures"
)

# Clean all entries EXCEPT specific ones
unified_db <- boilerplate_batch_clean(
  db = unified_db,
  field = "reference",
  remove_chars = c("@", "[", "]"),
  exclude_entries = c("forgiveness", "special_measure"),
  category = "measures"
)

# Clean specific entries only
unified_db <- boilerplate_batch_clean(
  db = unified_db,
  field = "reference",
  remove_chars = c("@", "[", "]"),
  target_entries = c("ban_hate_speech", "born_nz"),
  category = "measures"
)

# Clean all entries starting with "emp_" except "emp_special"
unified_db <- boilerplate_batch_clean(
  db = unified_db,
  field = "reference",
  remove_chars = c("@", "[", "]"),
  target_entries = "emp_*",
  exclude_entries = "emp_special",
  category = "measures"
)

# Replace characters and clean
unified_db <- boilerplate_batch_clean(
  db = unified_db,
  field = "reference",
  remove_chars = c("@", "[", "]"),
  replace_pairs = list(" " = "_", "." = ""),
  trim_whitespace = TRUE,
  category = "measures"
)

# Preview changes first
boilerplate_batch_clean(
  db = unified_db,
  field = "reference",
  remove_chars = c("@", "[", "]"),
  exclude_entries = "forgiveness",
  category = "measures",
  preview = TRUE
)
} # }