Title: | Facilitates Analysis of CDC NHANES Data |
---|---|
Description: | Tools for downloading and analyzing CDC NHANES data, with a focus on analytical laboratory data. |
Authors: | Herb Susmann [cre, aut], Silent Spring Institute [cph] |
Maintainer: | Herb Susmann <[email protected]> |
License: | Apache License 2.0 | file LICENSE |
Version: | 1.1.1 |
Built: | 2024-10-31 18:38:12 UTC |
Source: | https://github.com/silentspringinstitute/rnhanes |
simplifies downloading and analyzing NHANES data.
Translates cycle years into the correct demography filename suffix, e.g. '2001-2002' returns 'B'
demography_filename(year)
demography_filename(year)
year |
NHANES cycle, e.g. "2001-2002" |
suffix character e.g. "B"
Download an NHANES data file from a given cycle
download_nhanes_file( file_name, year, destination = tempdir(), cache = TRUE, method = "auto" )
download_nhanes_file( file_name, year, destination = tempdir(), cache = TRUE, method = "auto" )
file_name |
file name |
year |
NHANES cycle |
destination |
directory to download the file into |
cache |
whether to cache the file |
method |
download method passed to download.file |
path to the downloaded file
Returns the NHANES file suffix for the given year
file_suffix(year)
file_suffix(year)
year |
NHANES cycle year (e.g. "2001-2002") |
suffix character (e.g. "B" or "C")
Download an NHANES description file
load_nhanes_description( file_name, year, destination = tempdir(), cache = FALSE, method = "auto" )
load_nhanes_description( file_name, year, destination = tempdir(), cache = FALSE, method = "auto" )
file_name |
file name |
year |
NHANES cycle |
destination |
directory to download the file into |
cache |
whether to cache the file |
method |
download method passed to download.file |
data frame containing the file description
Compute quantiles from NHANES weighted survey data
nhanes_analyze( analysis_fun, nhanes_data, column, comment_column = "", weights_column = "", filter = NULL )
nhanes_analyze( analysis_fun, nhanes_data, column, comment_column = "", weights_column = "", filter = NULL )
analysis_fun |
function to use to analyze each variable |
nhanes_data |
data frame containing NHANES data |
column |
column name of the variable to compute quantiles for |
comment_column |
comment column name of the variable |
weights_column |
name of the weights column |
filter |
logical expression used to subset the data |
a data frame
List the valid NHANES cycle years
nhanes_cycle_years()
nhanes_cycle_years()
vector of NHANES cycle years
List the NHANES data files
nhanes_data_files( components = "all", destination = tempfile(), cache = TRUE, method = "auto" )
nhanes_data_files( components = "all", destination = tempfile(), cache = TRUE, method = "auto" )
components |
one of "all", "demographics", "dietary", "examination", "laboratory", "questionnaire" |
destination |
destinatino to save the file lists |
cache |
whether to cache the downloaded file lists so they don't have to be re-downloaded every time |
method |
download.file method |
data frame of NHANES data files available to download
## Not run: # Download a data frame of all the NHANES data files files <- nhanes_data_files() # Download a data frame of just the laboratory files lab_files <- nhanes_data_files(component = "laboratory") ## End(Not run)
## Not run: # Download a data frame of all the NHANES data files files <- nhanes_data_files() # Download a data frame of just the laboratory files lab_files <- nhanes_data_files(component = "laboratory") ## End(Not run)
Compute detection frequencies of NHANES data
nhanes_detection_frequency( nhanes_data, column, comment_column, weights_column = "", filter = NULL )
nhanes_detection_frequency( nhanes_data, column, comment_column, weights_column = "", filter = NULL )
nhanes_data |
data frame containing NHANES data |
column |
column names of the variables to compute detection frequencies for |
comment_column |
comment column names of the variables to compute detection frequencies for |
weights_column |
sample weight column |
filter |
logical expression used to subset the data |
named vector of detection frequencies
## Not run: dat <- nhanes_load_data("UHG_G", "2011-2012", demographics = TRUE) # Compute detection frequency nhanes_detection_frequency(dat, c("URXUHG"), c("URDUHGLC")) ## End(Not run)
## Not run: dat <- nhanes_load_data("UHG_G", "2011-2012", demographics = TRUE) # Compute detection frequency nhanes_detection_frequency(dat, c("URXUHG"), c("URDUHGLC")) ## End(Not run)
Compute geometric means from NHANES weighted survey data
nhanes_geometric_mean(nhanes_data, column, weights_column = "", filter = NULL)
nhanes_geometric_mean(nhanes_data, column, weights_column = "", filter = NULL)
nhanes_data |
data frame containing NHANES data |
column |
column name of the variable to compute geometric means for |
weights_column |
name of the weights column |
filter |
logical expression used to subset the data |
a data frame
## Not run: dat <- nhanes_load_data("UHG_G", "2011-2012", demographics = TRUE) nhanes_geometric_mean(dat, "URXUHG", "URDUHGLC", "WTSA2YR") ## End(Not run)
## Not run: dat <- nhanes_load_data("UHG_G", "2011-2012", demographics = TRUE) nhanes_geometric_mean(dat, "URXUHG", "URDUHGLC", "WTSA2YR") ## End(Not run)
Plot a weighted histogram of an NHANES variable
nhanes_hist( nhanes_data, column, comment_column, weights_column = "", filter = "", transform = "", ... )
nhanes_hist( nhanes_data, column, comment_column, weights_column = "", filter = "", transform = "", ... )
nhanes_data |
data frame containing NHANES data |
column |
column name of the variable to plot |
comment_column |
comment column of the variable to plot |
weights_column |
name of the weights column |
filter |
logical expression used to subset the data |
transform |
transformation to apply to the column. Accepts any function name, for example: "log" |
... |
parameters passed through to svyhist function |
a data frame
## Not run: dat <- nhanes_load_data("PFC_G", "2011-2012", demographics = TRUE) nhanes_hist(dat, "LBXPFOA") ## End(Not run)
## Not run: dat <- nhanes_load_data("PFC_G", "2011-2012", demographics = TRUE) nhanes_hist(dat, "LBXPFOA") ## End(Not run)
Download NHANES data files.
nhanes_load_data( file_name, year, destination = tempdir(), demographics = FALSE, cache = TRUE, recode = FALSE, recode_data = FALSE, recode_demographics = FALSE, allow_duplicate_files = FALSE, method = "auto" )
nhanes_load_data( file_name, year, destination = tempdir(), demographics = FALSE, cache = TRUE, recode = FALSE, recode_data = FALSE, recode_demographics = FALSE, allow_duplicate_files = FALSE, method = "auto" )
file_name |
NHANES file name (e.g. "EPH") or a vector of filenames (e.g c("EPH", "GHB")) |
year |
NHANES cycle year (e.g. "2007-2008") or a vector of cycle years |
destination |
directory to download the files to |
demographics |
include demographics data into the dataset |
cache |
whether to cache the file to disk |
recode |
whether to recode the data and demographics (overrides other parameters) |
recode_data |
whether to recode just the data |
recode_demographics |
whether to recode just the demographics |
allow_duplicate_files |
how to handle a request that has duplicate file names/cycle years. By default duplicates will be removed. |
method |
download method passed to download.file |
If you supply vectors for both file_name and year, then the vectors are paired and each file_name/year pair is downloaded. For example, file_name = c("EPH, GHB"), year = c("2009-2010", "2011-2012") will download "EPH_F.XPT" and "EPH_G.XPT". In other words, the function does not download every possible combination of file_name and year.
You can specify file names in several formats. In order of specificity: You can supply the complete filename: "EPH_F.XPT" You can supply the filename without an extension: "EPH_F" You can supply the filename without a suffix: "EPH", year = "2009-2010"
If you are loading the same file across multiple years, you must supply the filename without a suffix so that the correct suffix for each year can be used.
This function returns either a list or a data frame. If you load multiple files, the return value will always be a list. This is because the columns may not match in between files. If you load one file, the result will be a data frame.
if file_name or year is a vector, returns a list containing a data frame for each file_name. If file_name and year are both singletons, then a data frame is returned.
## Not run: nhanes_load_data("UHG", "2011-2012") # Load data with demographics nhanes_load_data("UHG", "2011-2012", demographics = TRUE) # Download to /tmp directory and overwrite the file if it already exists nhanes_load_data("HDL_E", "2007-2008", destination = "/tmp", cache = FALSE) ## End(Not run)
## Not run: nhanes_load_data("UHG", "2011-2012") # Load data with demographics nhanes_load_data("UHG", "2011-2012", demographics = TRUE) # Download to /tmp directory and overwrite the file if it already exists nhanes_load_data("HDL_E", "2007-2008", destination = "/tmp", cache = FALSE) ## End(Not run)
Download NHANES demography files for a specific cycle.
nhanes_load_demography_data( year, destination = tempdir(), cache = FALSE, method = "auto" )
nhanes_load_demography_data( year, destination = tempdir(), cache = FALSE, method = "auto" )
year |
NHANES cycle year (e.g. "2011-2012") |
destination |
directory to download the file to |
cache |
whether load the file if it already exists on disk |
method |
download method passed to download.file |
## Not run: nhanes_load_demography_data("2011-2012") ## End(Not run)
## Not run: nhanes_load_demography_data("2011-2012") ## End(Not run)
Compute quantiles from NHANES weighted survey data
nhanes_quantile( nhanes_data, column, comment_column = "", weights_column = "", quantiles = seq(0, 1, 0.25), filter = NULL, ... )
nhanes_quantile( nhanes_data, column, comment_column = "", weights_column = "", quantiles = seq(0, 1, 0.25), filter = NULL, ... )
nhanes_data |
data frame containing NHANES data |
column |
column name of the variable to compute quantiles for |
comment_column |
comment column name of the variable for checking if computed quantiles are below the LOD |
weights_column |
name of the weights column |
quantiles |
numeric or vector numeric of quantiles to compute |
filter |
logical expression used to subset the data |
... |
additional arguments passed to svyquantile |
a data frame
## Not run: dat <- nhanes_load_data("UHG_G", "2011-2012", demographics = TRUE) # Compute 50th, 95th, and 99th quantiles nhanes_quantile(dat, "URXUHG", "URDUHGLC", "WTSA2YR", c(0.5, 0.95, 0.99)) ## End(Not run)
## Not run: dat <- nhanes_load_data("UHG_G", "2011-2012", demographics = TRUE) # Compute 50th, 95th, and 99th quantiles nhanes_quantile(dat, "URXUHG", "URDUHGLC", "WTSA2YR", c(0.5, 0.95, 0.99)) ## End(Not run)
Compute the sample size of NHANES data
nhanes_sample_size( nhanes_data, column, comment_column = "", weights_column = "", filter = NULL )
nhanes_sample_size( nhanes_data, column, comment_column = "", weights_column = "", filter = NULL )
nhanes_data |
data frame containing NHANES data |
column |
column name of the variable to compute quantiles for |
comment_column |
comment column name of the variable for checking if computed quantiles are below the LOD |
weights_column |
name of the weights column |
filter |
logical expression used to subset the data |
a data frame
## Not run: dat <- nhanes_load_data("UHG_G", "2011-2012", demographics = TRUE) nhanes_sample_size(dat, "URXUHG", "URDUHGLC") ## End(Not run)
## Not run: dat <- nhanes_load_data("UHG_G", "2011-2012", demographics = TRUE) nhanes_sample_size(dat, "URXUHG", "URDUHGLC") ## End(Not run)
Search the results from nhanes_variables or nhanes_data_files
nhanes_search( nhanes_data, query, ..., fuzzy = FALSE, ignore_case = TRUE, max_distance = 0.2 )
nhanes_search( nhanes_data, query, ..., fuzzy = FALSE, ignore_case = TRUE, max_distance = 0.2 )
nhanes_data |
nhanes variable list, from nhanes_variables function, or data file list, from nhanes_data_files |
query |
regular expression search query |
... |
additional arguments to pass to dplyr::filter |
fuzzy |
whether to use fuzzy string matching for search (based on edit distances) |
ignore_case |
whether search query is case-sensitive |
max_distance |
parameter for tuning fuzzy string matching, 0-1 |
data frame filtered by search query
## Not run: nhanes_files <- nhanes_data_files() # Search for data files about pesticides nhanes_search(nhanes_files, "pesticides") ## End(Not run)
## Not run: nhanes_files <- nhanes_data_files() # Search for data files about pesticides nhanes_search(nhanes_files, "pesticides") ## End(Not run)
Apply a function from the survey package to NHANES data
nhanes_survey( survey_fun, nhanes_data, column, comment_column = "", weights_column = "", filter = NULL, analyze = "values", callback = NULL, ... )
nhanes_survey( survey_fun, nhanes_data, column, comment_column = "", weights_column = "", filter = NULL, analyze = "values", callback = NULL, ... )
survey_fun |
the survey package function (e.g. svyquantile or svymean) |
nhanes_data |
data frame containing NHANES data |
column |
column name of the variable to compute quantiles for |
comment_column |
comment column name of the variable |
weights_column |
name of the weights column |
filter |
logical expression used to subset the data |
analyze |
one of "values" or "comments", whether to apply the survey function to the value or comment column. |
callback |
optional function to execute on each row of the dataframe |
... |
other arguments to pass to the survey function |
This function provides a generic way to apply any function from the survey package to NHANES data. RNHANES provides specific wrappers for computing quantiles (nhanes_quantile) and detection frequencies (nhanes_detection_frequency), and this function provides a general way to use any survey function.
a data frame
## Not run: library(survey) nhanes_data <- nhanes_load_data("EPH", "2011-2012", demographics = TRUE) # Compute the mean of triclosan using the svymean function nhanes_survey(svymean, nhanes_data, "URXTRS", "URDTRSLC", na.rm = TRUE) # Compute the variance using svyvar nhanes_survey(svyvar, nhanes_data, "URXTRS", "URDTRSLC", na.rm = TRUE) ## End(Not run)
## Not run: library(survey) nhanes_data <- nhanes_load_data("EPH", "2011-2012", demographics = TRUE) # Compute the mean of triclosan using the svymean function nhanes_survey(svymean, nhanes_data, "URXTRS", "URDTRSLC", na.rm = TRUE) # Compute the variance using svyvar nhanes_survey(svyvar, nhanes_data, "URXTRS", "URDTRSLC", na.rm = TRUE) ## End(Not run)
Build survey objects for NHANES data
nhanes_survey_design(nhanes_data, weights_column = "")
nhanes_survey_design(nhanes_data, weights_column = "")
nhanes_data |
data frame containing NHANES data |
weights_column |
name of the weights column |
a survey design object
## Not run: dat <- nhanes_load_data("UHG_G", "2011-2012", demographics = TRUE) design <- nhanes_survey_design(dat, "WTSA2YR") svymean(~RIDAGEYR, design) svyglm(URXUHG ~ RIDAGEYR + RIAGENDR, design) ## End(Not run)
## Not run: dat <- nhanes_load_data("UHG_G", "2011-2012", demographics = TRUE) design <- nhanes_survey_design(dat, "WTSA2YR") svymean(~RIDAGEYR, design) svyglm(URXUHG ~ RIDAGEYR + RIAGENDR, design) ## End(Not run)
Load the NHANES comprehensive variable list
nhanes_variables( components = "all", destination = tempfile(), cache = TRUE, method = "auto" )
nhanes_variables( components = "all", destination = tempfile(), cache = TRUE, method = "auto" )
components |
one of "all", "demographics", "dietary", "examination", "laboratory", "questionnaire" |
destination |
where to save the variable list |
cache |
whether to cache the downloaded variable list so it doesn't have to be re-downloaded every time |
method |
download.file method Helper function for nhanes_variables function |
dat
## Not run: # Download the comprehensive NHANES variable list variables <- nhanes_variables() # Download the variable list and cache it in a specific file variables <- nhanes_variables(destination = "./nhanes_data") ## End(Not run)
## Not run: # Download the comprehensive NHANES variable list variables <- nhanes_variables() # Download the variable list and cache it in a specific file variables <- nhanes_variables(destination = "./nhanes_data") ## End(Not run)
Extract variance/covariance matrix from parameters of svymean
nhanes_vcov(nhanes_data, columns, weights_column = "", filter = "")
nhanes_vcov(nhanes_data, columns, weights_column = "", filter = "")
nhanes_data |
data frame containing NHANES data |
columns |
columns to include in svymean for |
weights_column |
name of the weights column |
filter |
logical expression used to subset the data |
a data frame
## Not run: dat <- nhanes_load_data("PFC_G", "2011-2012", demographics = TRUE) nhanes_vcov(dat, c("LBXPFOA", "LBXPFOS")) ## End(Not run)
## Not run: dat <- nhanes_load_data("PFC_G", "2011-2012", demographics = TRUE) nhanes_vcov(dat, c("LBXPFOA", "LBXPFOS")) ## End(Not run)
Processes a file name to make sure it is valid and has the correct suffix and extension File names with an extension (e.g. ".XPT") are not altered
process_file_name(file_name, year, extension = ".XPT")
process_file_name(file_name, year, extension = ".XPT")
file_name |
name of the file |
year |
NHANES cycle year |
extension |
file extension |
Check that the year is in the correct format e.g. '2001-2002' is correct and returns TRUE, '2001' is not correct and returns FALSE
validate_year(year, throw_error = TRUE)
validate_year(year, throw_error = TRUE)
year |
the year or years to validate |
throw_error |
whether to throw an error if the year is invalid |