Skip to content

Commit 0d11a71

Browse files
committed
Incorporated Claude and Codex suggestions - added more tests and Divio vignettes, improved medication function regex, logic, and vectorizations, and fixed income and family history function names; improved informativeness of pkgdown website
1 parent 25c327d commit 0d11a71

File tree

92 files changed

+4495
-5448
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+4495
-5448
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ packrat.lock
7474

7575
# If you use RStudio
7676
.Rproj.user/
77+
.Rproj
7778

7879
# If you use lintr for code linting, ignore these files
7980
.R/lintr_cache/

DESCRIPTION

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@ Package: chmsflow
22
Type: Package
33
Title: Transforming and Harmonizing CHMS Variables
44
Version: 0.1.0
5+
Author: See Authors@R
6+
Maintainer: Rafidul Islam <raislam@ohri.ca>
57
Authors@R: c(
68
person("Rafidul", "Islam", email = "raislam@ohri.ca", role = c("aut", "cre")),
7-
person("Douglas", "Manuel", email = "dmanuel@ohri.ca", role = c("aut")))
9+
person("Douglas", "Manuel", email = "dmanuel@ohri.ca", role = c("aut")),
10+
person("Therese", "Chan", email = "TChan@bruyere.org", role = c("aut")))
811
Description: Supporting the use of the Canadian Health Measures Survey (CHMS)
912
by transforming variables from each cycle into harmonized, consistent
1013
versions that span survey cycles 1-6. This package uses rec_with_table(),
@@ -16,6 +19,7 @@ Depends:
1619
Imports: haven, logger
1720
License: MIT + file LICENSE
1821
URL: https://github.com/Big-Life-Lab/chmsflow, https://big-life-lab.github.io/chmsflow/
22+
BugReports: https://github.com/Big-Life-Lab/chmsflow/issues
1923
Encoding: UTF-8
2024
Roxygen: list(markdown = TRUE)
2125
RoxygenNote: 7.3.2

NAMESPACE

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
export(adjust_DBP)
44
export(adjust_SBP)
55
export(calculate_GFR)
6-
export(calculate_Hhld_Income)
76
export(calculate_WHR)
7+
export(calculate_hhld_income)
88
export(calculate_nonHDL)
99
export(categorize_GFR_to_CKD)
1010
export(categorize_income)
@@ -18,8 +18,8 @@ export(cycles1to2_diabetes_drugs)
1818
export(cycles1to2_diuretics)
1919
export(cycles1to2_nsaid)
2020
export(cycles1to2_other_antiHTN_meds)
21-
export(determine_CVD_Family_History)
22-
export(determine_CVD_Personal_History)
21+
export(determine_CVD_family_history)
22+
export(determine_CVD_personal_history)
2323
export(determine_adjusted_hypertension)
2424
export(determine_controlled_adjusted_hypertension)
2525
export(determine_controlled_hypertension)
@@ -29,7 +29,7 @@ export(determine_inclusive_diabetes)
2929
export(find_totalFV_cycles1and2)
3030
export(find_totalFV_cycles3to6)
3131
export(find_week_accelerometer_average)
32-
export(in_lowest_income_qunitle)
32+
export(in_lowest_income_quintile)
3333
export(is_NSAID)
3434
export(is_ace_inhibitor)
3535
export(is_any_antiHTN_med)

NEWS.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
# chmsflow 0.1.0
2-
2024-11-19
1+
# chmsflow 0.1.0 (First Version)
32

43
## Features
4+
5+
- Added variables.csv that contains list of CHMS variables in chmsflow.
6+
- Added variable-details.csv that maps variables across CHMS cycles 1-6
7+
- Added Vignettes to describe the process of using rec_with_table() (part of recodeflow) to combine the data

R/family-history.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@
1717
#' @examples
1818
#'
1919
#' # Determine CVD personal history for a person with heart disease (CCC_61 = 1).
20-
#' determine_CVD_Personal_History(CCC_61 = 1, CCC_63 = 2, CCC_81 = 2)
20+
#' determine_CVD_personal_history(CCC_61 = 1, CCC_63 = 2, CCC_81 = 2)
2121
#' # Output: 1 (CVD personal history is "Yes" as heart disease is present).
2222
#'
2323
#' @export
24-
determine_CVD_Personal_History <- function(CCC_61, CCC_63, CCC_81) {
24+
determine_CVD_personal_history <- function(CCC_61, CCC_63, CCC_81) {
2525
cardiov <- haven::tagged_na("b")
2626

2727
if (is.na(CCC_61) && is.na(CCC_63) && is.na(CCC_81)) {
@@ -64,11 +64,11 @@ determine_CVD_Personal_History <- function(CCC_61, CCC_63, CCC_81) {
6464
#'
6565
#' @examples
6666
#' # Example 1: Premature CVD due to heart disease diagnosis at age 50
67-
#' determine_CVD_Family_History(FMH_11 = 1, FMH_12 = 50, FMH_13 = 2, FMH_14 = NA)
67+
#' determine_CVD_family_history(FMH_11 = 1, FMH_12 = 50, FMH_13 = 2, FMH_14 = NA)
6868
#' # Output: 1
6969
#'
7070
#' @export
71-
determine_CVD_Family_History <- function(FMH_11, FMH_12, FMH_13, FMH_14) {
71+
determine_CVD_family_history <- function(FMH_11, FMH_12, FMH_13, FMH_14) {
7272
famheart60 <- 0
7373
famstroke60 <- 0
7474
famCVD60 <- haven::tagged_na("b")

R/income.R

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,23 +20,23 @@
2020
#' @examples
2121
#'
2222
#' # Example 1: Respondent with $50,000 income and a household size of 3.
23-
#' calculate_Hhld_Income(THI_01 = 50000, DHHDHSZ = 3)
23+
#' calculate_hhld_income(THI_01 = 50000, DHHDHSZ = 3)
2424
#' # Output: 29411.76
2525
#'
2626
#' # Example 2: Respondent with $75000 income and a household size of 2.
27-
#' calculate_Hhld_Income(THI_01 = 75000, DHHDHSZ = 2)
27+
#' calculate_hhld_income(THI_01 = 75000, DHHDHSZ = 2)
2828
#' # Output: 53571.43
2929
#'
3030
#' # Example 3: Respondent with $90000 income and a household size of 1.
31-
#' calculate_Hhld_Income(THI_01 = 90000, DHHDHSZ = 1)
31+
#' calculate_hhld_income(THI_01 = 90000, DHHDHSZ = 1)
3232
#' # Output: 90000
3333
#'
3434
#' @export
35-
calculate_Hhld_Income <- function(THI_01, DHHDHSZ) {
35+
calculate_hhld_income <- function(THI_01, DHHDHSZ) {
3636
# Step 1 - derive household adjustment based on household size
3737
hh_size_wt <- 0
3838

39-
if (is.na(DHHDHSZ) || DHHDHSZ < 0) {
39+
if (is.na(DHHDHSZ) || DHHDHSZ <= 0) {
4040
return(haven::tagged_na("b"))
4141
}
4242

@@ -110,21 +110,21 @@ categorize_income <- function(adj_hh_inc) {
110110
#' @param incq Categorical value indicating the income category as defined by the categorize_income function.
111111
#'
112112
#' @return A categorical value indicating whether the individual is in the lowest income quintile:
113-
#' - 1: In the lowest income quartile
114-
#' - 2: Not in the lowest income quartile
113+
#' - 1: In the lowest income quntile
114+
#' - 2: Not in the lowest income quntile
115115
#' - NA(b): Missing or invalid input
116116
#'
117117
#' @examples
118118
#' # Example 1: Check if an income category of 3 (between $35,000-50,000) is in the lowest quintile
119-
#' in_lowest_income_qunitle(3)
119+
#' in_lowest_income_quintile(3)
120120
#' # Output: 2
121121
#'
122122
#' # Example 2: Check if an income category of 1 (below or equal to $21,500) is in the lowest quintile
123-
#' in_lowest_income_qunitle(1)
123+
#' in_lowest_income_quintile(1)
124124
#' # Output: 1
125125
#'
126126
#' @export
127-
in_lowest_income_qunitle <- function(incq) {
127+
in_lowest_income_quintile <- function(incq) {
128128
incq1 <- haven::tagged_na("b")
129129

130130
if (is.na(incq) || (!is.na(incq) && incq == "NA(b)") || incq < 0) {

R/kidney.R

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,13 @@
33
#' @description This function calculates the estimated glomerular filtration rate (GFR) according to Finlay's formula,
44
#' where serum creatine is in mg/dL. The calculation takes into account the respondent's ethnicity, sex, and age.
55
#'
6-
#' @param LAB_BCRE Blood creatine (µmol/L). It should be a numeric value.
7-
#' @param PGDCGT Ethnicity (13 categories). It should be an integer value.
8-
#' @param CLC_SEX Sex (Male = 1, Female = 2). It should be an integer value.
9-
#' @param CLC_AGE Age (years). It should be a numeric value.
6+
#' @param LAB_BCRE Blood creatine (µmol/L). It should be a numeric value between 14 and 785.
7+
#' @param PGDCGT Ethnicity (13 categories). It should be an integer value between 1 and 13.
8+
#' @param CLC_SEX Sex (Male = 1, Female = 2). It should be an integer value of either 1 or 2.
9+
#' @param CLC_AGE Age (years). It should be a numeric value between 3 and 79.
1010
#'
1111
#' @return The calculated GFR as a numeric value. If any of the input parameters (LAB_BCRE, PGDCGT, CLC_SEX, CLC_AGE)
12-
#' are non-response values (LAB_BCRE >= 996, PGDCGT >= 96, CLC_SEX >= 6, CLC_AGE >= 996), the GFR will be NA(b)
13-
#' (Not Available).
12+
#' are non-response values (LAB_BCRE >= 996, PGDCGT >= 96, CLC_SEX >= 6, CLC_AGE >= 996) or out of bounds, the GFR will be NA(b).
1413
#'
1514
#' @details The function uses the serum creatine level (LAB_BCRE) in µmol/L to calculate the estimated GFR. First, it
1615
#' checks if any of the input parameters are non-response values. If any non-response values are found, the GFR
@@ -42,7 +41,7 @@ calculate_GFR <- function(LAB_BCRE, PGDCGT, CLC_SEX, CLC_AGE) {
4241
GFR <- 0
4342
serumcreat <- 0
4443

45-
if (any(!LAB_BCRE %in% 0:9995) || (any(!CLC_SEX %in% c(1, 2)) || any(!PGDCGT %in% 1:13)) || any(!CLC_AGE %in% 0:995)) {
44+
if (any(!LAB_BCRE %in% 14:785) || (any(!CLC_SEX %in% c(1, 2)) || any(!PGDCGT %in% 1:13)) || any(!CLC_AGE %in% 3:79)) {
4645
GFR <- haven::tagged_na("b") # GFR is NA if any non-responses found
4746
} else {
4847
serumcreat <- LAB_BCRE / 88.4 # Proceeds without non-responses

R/medications.R

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -72,21 +72,22 @@ is_taking_drug_class <- function(df, class_var_name, med_vars, last_taken_vars,
7272

7373
logger::log_info(paste0("Adding variable '", class_var_name, "' to the data frame."))
7474

75-
# Initialize the class variable column
76-
df[[class_var_name]] <- 0
77-
78-
# Apply the condition function to each pair of med and last_taken vars using a loop
79-
for (i in seq_along(med_vars)) {
80-
med_values <- df[[med_vars[i]]]
81-
last_taken_values <- df[[last_taken_vars[i]]]
82-
class_values <- numeric(nrow(df))
83-
84-
for (j in seq_along(med_values)) {
85-
class_values[j] <- class_condition_fun(med_values[j], last_taken_values[j])
86-
}
87-
88-
df[[class_var_name]] <- df[[class_var_name]] + class_values
89-
}
75+
# Apply the condition function to each pair of med and last_taken vars using mapply
76+
class_values <- mapply(
77+
FUN = function(med_var, last_taken_var) {
78+
mapply(class_condition_fun, med_var, last_taken_var)
79+
},
80+
df[med_vars],
81+
df[last_taken_vars],
82+
SIMPLIFY = FALSE
83+
)
84+
85+
# Sum the results for each row
86+
df[[class_var_name]] <- rowSums(do.call(cbind, class_values), na.rm = TRUE)
87+
88+
# Handle cases where all values for a row are NA
89+
all_na <- Reduce(`&`, lapply(class_values, is.na))
90+
df[[class_var_name]][all_na] <- haven::tagged_na("b")
9091

9192
return(df)
9293
}
@@ -297,7 +298,7 @@ is_any_antiHTN_med <- function(MEUCATC, NPI_25B) {
297298
return(haven::tagged_na("b"))
298299
}
299300

300-
as.numeric(grepl("^C0[2, 3, 7, 8, 9]", MEUCATC) && !(MEUCATC %in% c("C07AA07", "C07AA12", "C07AG02", "C03BA08", "C03CA01", "C02KX01")) && NPI_25B <= 4)
301+
as.numeric(grepl("^(C02|C03|C07|C08|C09)", MEUCATC) && !(MEUCATC %in% c("C07AA07", "C07AA12", "C07AG02", "C03BA08", "C03CA01", "C02KX01")) && NPI_25B <= 4)
301302
}
302303

303304
#' @title Non-steroidal anti-inflammatory drugs (NSAIDs)
@@ -568,7 +569,7 @@ cycles1to2_beta_blockers <- function(
568569

569570
bbmed <- 0
570571

571-
if (bb$BBmed > 0 %in% TRUE) {
572+
if (any(bb$BBmed > 0, na.rm = TRUE)) {
572573
bbmed <- 1
573574
} else if (all(is.na(bb$BBmed))) {
574575
bbmed <- haven::tagged_na("b")
@@ -778,7 +779,7 @@ cycles1to2_ace_inhibitors <- function(
778779

779780
acemed <- 0
780781

781-
if (ace$ACEmed > 0 %in% TRUE) {
782+
if (any(ace$ACEmed > 0, na.rm = TRUE)) {
782783
acemed <- 1
783784
} else if (all(is.na(ace$ACEmed))) {
784785
acemed <- haven::tagged_na("b")
@@ -988,7 +989,7 @@ cycles1to2_diuretics <- function(
988989

989990
diurmed <- 0
990991

991-
if (diur$DIURmed > 0 %in% TRUE) {
992+
if (any(diur$DIURmed > 0, na.rm = TRUE)) {
992993
diurmed <- 1
993994
} else if (all(is.na(diur$DIURmed))) {
994995
diurmed <- haven::tagged_na("b")
@@ -1198,9 +1199,8 @@ cycles1to2_calcium_channel_blockers <- function(
11981199

11991200
ccbmed <- 0
12001201

1201-
if (ccb$CCBmed > 0 %in% TRUE) {
1202+
if (any(ccb$CCBmed > 0, na.rm = TRUE)) {
12021203
ccbmed <- 1
1203-
return(ccbmed)
12041204
} else if (all(is.na(ccb$CCBmed))) {
12051205
ccbmed <- haven::tagged_na("b")
12061206
}
@@ -1409,7 +1409,7 @@ cycles1to2_other_antiHTN_meds <- function(
14091409

14101410
miscmed <- 0
14111411

1412-
if (misc$MISCmed > 0 %in% TRUE) {
1412+
if (any(misc$MISCmed > 0, na.rm = TRUE)) {
14131413
miscmed <- 1
14141414
} else if (all(is.na(misc$MISCmed))) {
14151415
miscmed <- haven::tagged_na("b")
@@ -1619,7 +1619,7 @@ cycles1to2_any_antiHTN_meds <- function(
16191619

16201620
anymed <- 0
16211621

1622-
if (anyHTN$ANYmed > 0 %in% TRUE) {
1622+
if (any(anyHTN$ANYmed > 0, na.rm = TRUE)) {
16231623
anymed <- 1
16241624
} else if (all(is.na(anyHTN$ANYmed))) {
16251625
anymed <- haven::tagged_na("b")
@@ -1829,7 +1829,7 @@ cycles1to2_nsaid <- function(
18291829

18301830
nsaid_drug <- 0
18311831

1832-
if (nsaid$NSAID > 0 %in% TRUE) {
1832+
if (any(nsaid$NSAID > 0, na.rm = TRUE)) {
18331833
nsaid_drug <- 1
18341834
} else if (all(is.na(nsaid$NSAID))) {
18351835
nsaid_drug <- haven::tagged_na("b")
@@ -2039,7 +2039,7 @@ cycles1to2_diabetes_drugs <- function(
20392039

20402040
diab_drug <- 0
20412041

2042-
if (diab$diabetes_drug > 0 %in% TRUE) {
2042+
if (any(diab$diabetes_drug > 0, na.rm = TRUE)) {
20432043
diab_drug <- 1
20442044
} else if (all(is.na(diab$diabetes_drug))) {
20452045
diab_drug <- haven::tagged_na("b")

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
<!-- badges: end -->
88

9+
# WARNING: NOT RELEASED TO THE PUBLIC AND NOT FOR USE YET
10+
911
*chmsflow* supports the use of the Canadian Health Measures Survey (CHMS) by transforming variables from each cycle into harmonized, consistent versions that span survey cycles 1-6 (2007 to 2019).
1012

1113
The CHMS is a cross-sectional survey administered by Statistics Canada that collects questionnaire and directly measured health information from community-dwelling individuals aged 3 to 79 living in the 10 provinces. There are approximately 5,700 respondents per cycle. Studies use multiple CHMS cycles to examine trends overtime and increase sample size to examine sub-groups that are too small to examine in a single cycle. CHMS data is not available to the public, but at Research Data Centres (RDCs) managed by Statistics Canada. Information about the survey is found [here](https://www.statcan.gc.ca/en/survey/household/5071), while information about accessing data at the RDC is found [here](https://crdcn.ca/publications-data/access-crdcn-data/).
@@ -85,7 +87,7 @@ Please follow [this guide](https://github.com/Big-Life-Lab/chmsflow/blob/dev/CON
8587

8688
We encourage PRs for additional variable transformations and derived variables that you believe may be helpful to the broad CHMS community.
8789

88-
Currently, *chmsflow* supports R through the `rec_with_table()` function of *recodeflow*. The CHMS community commonly uses SAS, Stata and other statistical packages. Please feel free to contribute to `chmsflow` by making a PR that creates versions of `rec_with_table()` for other statistical and programming languages.
90+
Currently, *chmsflow* supports R through and wraps around the `rec_with_table()` function of *recodeflow*. The CHMS community commonly uses SAS, Stata and other statistical packages. Please feel free to contribute to `chmsflow` by making a PR that creates versions of `rec_with_table()` for other statistical and programming languages.
8991

9092
## Statistics Canada Attribution
9193

_pkgdown.yml

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
url: https://big-life-lab.github.io/chmsflow/
22
destination: docs
33
template:
4+
bootstrap: 5
45
params:
56
bootswatch: cosmo
67
development:
@@ -21,7 +22,7 @@ navbar:
2122
components:
2223
get_started:
2324
text: Getting started
24-
href: articles/getting-started.html
25+
href: articles/getting_started.html
2526
articles:
2627
text: Articles
2728
menu:
@@ -31,6 +32,12 @@ navbar:
3132
href: articles/variable_details.html
3233
- text: Recoding medications
3334
href: articles/recoding-medications.html
35+
- text: Derived variables
36+
href: articles/derived_variables.html
37+
- text: How to add variables
38+
href: articles/how_to_add_variables.html
39+
- text: How to used tagged_na
40+
href: articles/tagged_na_usage.html
3441
reference:
3542
text: Reference
3643
href: reference/index.html
@@ -49,7 +56,7 @@ reference:
4956
- adjust_DBP
5057
- adjust_SBP
5158
- calculate_GFR
52-
- calculate_Hhld_Income
59+
- calculate_hhld_income
5360
- calculate_WHR
5461
- calculate_nonHDL
5562
- categorize_GFR_to_CKD
@@ -64,8 +71,8 @@ reference:
6471
- cycles1to2_diuretics
6572
- cycles1to2_nsaid
6673
- cycles1to2_other_antiHTN_meds
67-
- determine_CVD_Family_History
68-
- determine_CVD_Personal_History
74+
- determine_CVD_family_history
75+
- determine_CVD_personal_history
6976
- determine_adjusted_hypertension
7077
- determine_controlled_adjusted_hypertension
7178
- determine_controlled_hypertension
@@ -75,7 +82,7 @@ reference:
7582
- find_totalFV_cycles1and2
7683
- find_totalFV_cycles3to6
7784
- find_week_accelerometer_average
78-
- in_lowest_income_qunitle
85+
- in_lowest_income_quintile
7986
- is_NSAID
8087
- is_ace_inhibitor
8188
- is_any_antiHTN_med
@@ -97,4 +104,4 @@ reference:
97104
- cycle3
98105
- cycle3_meds
99106
- cycle4
100-
- cycle5
107+
- cycle5

0 commit comments

Comments
 (0)