how-to-create-qnr-metadata-dta
How to create a questionnaire metadata data set from the JSON file
To create a questionnaire metadata data set, one must:
- Extract data from JSON
- Reshape data into rectangular form
- Extract data from reusable categories
- Reshape that data into desired form
- Combine JSON and reusable categories data
- Select data relevant for questions and variables
- Write data to a Stata file
At present, this can be achieved with the R script below.
In the future, this will likely be done through a simple function in the {susometa}
package, and potentially in a selector
command that wraps this function.
# =============================================================================
# Set up paths
# =============================================================================
# NOTE: provide paths with / rather than \
<- ""
json_dir <- ""
out_dir
# =============================================================================
# Install required packages
# =============================================================================
# for package installation
if (!require("pak")) {
install.packages("pak")
}
# install required packages
<- c(
required_packages "stringr",
"lsms-worldbank/susometa",
"fs",
"haven"
)
::pak(required_packages)
pak
# =============================================================================
# Ingest JSON questionnaire metadata
# =============================================================================
<- susometa::parse_questionnaire(path = fs::path(json_dir, "document.json"))
qnr_df
# =============================================================================
# Add reusable categories
# =============================================================================
<- susometa::parse_categories(dir = fs::path(json_dir, "Categories"))
categories_df_raw <- susometa::reshape_categories(categories_df = categories_df_raw)
categories_df <- susometa::join_categories(
qnr_df qnr_json_df = qnr_df,
categories_df = categories_df
)
# =============================================================================
# Make the metadata data frame Stata-friendly in form and content
# =============================================================================
# function to convert Boolean values to integer
<- function(x) {
from_bool_to_int ::case_when(
dplyr== TRUE ~ 1,
x == FALSE ~ 0,
x TRUE ~ NA
%>%
) as.integer()
}
# create a Stata-friendly version of the data frame
<- qnr_df %>%
stata_qnr_df # rename columns with illegal names
::rename_with(
dplyr.fn = ~ stringr::str_replace_all(
string = .x,
pattern = "\\.",
replacement = "_"
)%>%
) ::mutate(
dplyr# remove newline from string variables
::across(
dplyr.cols = tidyselect::where(is.character),
.fns = ~ stringr::str_replace_all(
string = .x,
pattern = "\n",
replacement = ""
)
),::across(
dplyr.cols = tidyselect::starts_with("is_"),
.fns = ~ from_bool_to_int(.x)
)%>%
) # remove columns with stubborn newline characters
::select(-text)
dplyr
# =============================================================================
# Extract data for questions in the export data
# =============================================================================
# get question metadata
<- stata_qnr_df |>
q_df # keep only questions and relevant metadata attributes
::get_questions() |>
susometa# remove columns that may have newline characters
::select(
dplyr-dplyr::matches("_expression") # filter, enablement, and validation expressions
)
# get variable metadata
<- susometa::get_variables(qnr_df = stata_qnr_df) |>
var_df # populate `varname` column with `name_variable`
::mutate(varname = name_variable)
dplyr
# combine question and variable metadata
<- dplyr::bind_rows(q_df, var_df)
qv_df
# =============================================================================
# Save metadata to a Stata file
# =============================================================================
# question and variable metadata
::write_dta(data = qv_df, path = fs::path(out_dir, "question_metadata.dta")) haven