Big-Life-Lab
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 5 additions & 1 deletion b/‎DESCRIPTION‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎NAMESPACE‎
Lines changed: 4 additions & 4 deletions b/‎NAMESPACE‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎NEWS.md‎
Lines changed: 5 additions & 2 deletions b/‎NEWS.md‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎R/family-history.R‎
Lines changed: 4 additions & 4 deletions b/‎R/family-history.R‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎R/income.R‎
Lines changed: 10 additions & 10 deletions b/‎R/income.R‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎R/kidney.R‎
Lines changed: 6 additions & 7 deletions b/‎R/kidney.R‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎R/medications.R‎
Lines changed: 25 additions & 25 deletions b/‎R/medications.R‎
Lines changed: 25 additions & 25 deletions
diff --git a/‎README.md‎
Lines changed: 3 additions & 1 deletion b/‎README.md‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎_pkgdown.yml‎
Lines changed: 13 additions & 6 deletions b/‎_pkgdown.yml‎
Lines changed: 13 additions & 6 deletions
@@ -74,6 +74,7 @@ packrat.lock
 
 # If you use RStudio
 .Rproj.user/
+.Rproj
 
 # If you use lintr for code linting, ignore these files
 .R/lintr_cache/
@@ -2,9 +2,12 @@ Package: chmsflow
 Type: Package
 Title: Transforming and Harmonizing CHMS Variables
 Version: 0.1.0
+Author: See Authors@R
+Maintainer: Rafidul Islam <raislam@ohri.ca>
 Authors@R: c(
     person("Rafidul", "Islam", email = "raislam@ohri.ca", role = c("aut", "cre")),
-    person("Douglas", "Manuel", email = "dmanuel@ohri.ca", role = c("aut")))
+    person("Douglas", "Manuel", email = "dmanuel@ohri.ca", role = c("aut")),    
+    person("Therese", "Chan", email = "TChan@bruyere.org", role = c("aut")))
 Description: Supporting the use of the Canadian Health Measures Survey (CHMS)
     by transforming variables from each cycle into harmonized, consistent 
     versions that span survey cycles 1-6. This package uses rec_with_table(), 
@@ -16,6 +19,7 @@ Depends:
 Imports: haven, logger
 License: MIT + file LICENSE
 URL: https://github.com/Big-Life-Lab/chmsflow, https://big-life-lab.github.io/chmsflow/
+BugReports: https://github.com/Big-Life-Lab/chmsflow/issues
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.2
 
@@ -3,8 +3,8 @@
 export(adjust_DBP)
 export(adjust_SBP)
 export(calculate_GFR)
-export(calculate_Hhld_Income)
 export(calculate_WHR)
+export(calculate_hhld_income)
 export(calculate_nonHDL)
 export(categorize_GFR_to_CKD)
 export(categorize_income)
@@ -18,8 +18,8 @@ export(cycles1to2_diabetes_drugs)
 export(cycles1to2_diuretics)
 export(cycles1to2_nsaid)
 export(cycles1to2_other_antiHTN_meds)
-export(determine_CVD_Family_History)
-export(determine_CVD_Personal_History)
+export(determine_CVD_family_history)
+export(determine_CVD_personal_history)
 export(determine_adjusted_hypertension)
 export(determine_controlled_adjusted_hypertension)
 export(determine_controlled_hypertension)
@@ -29,7 +29,7 @@ export(determine_inclusive_diabetes)
 export(find_totalFV_cycles1and2)
 export(find_totalFV_cycles3to6)
 export(find_week_accelerometer_average)
-export(in_lowest_income_qunitle)
+export(in_lowest_income_quintile)
 export(is_NSAID)
 export(is_ace_inhibitor)
 export(is_any_antiHTN_med)
 
@@ -1,4 +1,7 @@
-# chmsflow 0.1.0
-2024-11-19
+# chmsflow 0.1.0 (First Version)
 
 ## Features
+
+- Added variables.csv that contains list of CHMS variables in chmsflow.
+- Added variable-details.csv that maps variables across CHMS cycles 1-6
+- Added Vignettes to describe the process of using rec_with_table() (part of recodeflow) to combine the data
@@ -17,11 +17,11 @@
 #' @examples
 #'
 #' # Determine CVD personal history for a person with heart disease (CCC_61 = 1).
-#' determine_CVD_Personal_History(CCC_61 = 1, CCC_63 = 2, CCC_81 = 2)
+#' determine_CVD_personal_history(CCC_61 = 1, CCC_63 = 2, CCC_81 = 2)
 #' # Output: 1 (CVD personal history is "Yes" as heart disease is present).
 #'
 #' @export
-determine_CVD_Personal_History <- function(CCC_61, CCC_63, CCC_81) {
+determine_CVD_personal_history <- function(CCC_61, CCC_63, CCC_81) {
   cardiov <- haven::tagged_na("b")
 
   if (is.na(CCC_61) && is.na(CCC_63) && is.na(CCC_81)) {
@@ -64,11 +64,11 @@ determine_CVD_Personal_History <- function(CCC_61, CCC_63, CCC_81) {
 #'
 #' @examples
 #' # Example 1: Premature CVD due to heart disease diagnosis at age 50
-#' determine_CVD_Family_History(FMH_11 = 1, FMH_12 = 50, FMH_13 = 2, FMH_14 = NA)
+#' determine_CVD_family_history(FMH_11 = 1, FMH_12 = 50, FMH_13 = 2, FMH_14 = NA)
 #' # Output: 1
 #'
 #' @export
-determine_CVD_Family_History <- function(FMH_11, FMH_12, FMH_13, FMH_14) {
+determine_CVD_family_history <- function(FMH_11, FMH_12, FMH_13, FMH_14) {
   famheart60 <- 0
   famstroke60 <- 0
   famCVD60 <- haven::tagged_na("b")
 
@@ -20,23 +20,23 @@
 #' @examples
 #'
 #' # Example 1: Respondent with $50,000 income and a household size of 3.
-#' calculate_Hhld_Income(THI_01 = 50000, DHHDHSZ = 3)
+#' calculate_hhld_income(THI_01 = 50000, DHHDHSZ = 3)
 #' # Output: 29411.76
 #'
 #' # Example 2: Respondent with $75000 income and a household size of 2.
-#' calculate_Hhld_Income(THI_01 = 75000, DHHDHSZ = 2)
+#' calculate_hhld_income(THI_01 = 75000, DHHDHSZ = 2)
 #' # Output: 53571.43
 #'
 #' # Example 3: Respondent with $90000 income and a household size of 1.
-#' calculate_Hhld_Income(THI_01 = 90000, DHHDHSZ = 1)
+#' calculate_hhld_income(THI_01 = 90000, DHHDHSZ = 1)
 #' # Output: 90000
 #'
 #' @export
-calculate_Hhld_Income <- function(THI_01, DHHDHSZ) {
+calculate_hhld_income <- function(THI_01, DHHDHSZ) {
   # Step 1 - derive household adjustment based on household size
   hh_size_wt <- 0
 
-  if (is.na(DHHDHSZ) || DHHDHSZ < 0) {
+  if (is.na(DHHDHSZ) || DHHDHSZ <= 0) {
     return(haven::tagged_na("b"))
   }
 
@@ -110,21 +110,21 @@ categorize_income <- function(adj_hh_inc) {
 #' @param incq Categorical value indicating the income category as defined by the categorize_income function.
 #'
 #' @return A categorical value indicating whether the individual is in the lowest income quintile:
-#'   - 1: In the lowest income quartile
-#'   - 2: Not in the lowest income quartile
+#'   - 1: In the lowest income quntile
+#'   - 2: Not in the lowest income quntile
 #'   - NA(b): Missing or invalid input
 #'
 #' @examples
 #' # Example 1: Check if an income category of 3 (between $35,000-50,000) is in the lowest quintile
-#' in_lowest_income_qunitle(3)
+#' in_lowest_income_quintile(3)
 #' # Output: 2
 #'
 #' # Example 2: Check if an income category of 1 (below or equal to $21,500) is in the lowest quintile
-#' in_lowest_income_qunitle(1)
+#' in_lowest_income_quintile(1)
 #' # Output: 1
 #'
 #' @export
-in_lowest_income_qunitle <- function(incq) {
+in_lowest_income_quintile <- function(incq) {
   incq1 <- haven::tagged_na("b")
 
   if (is.na(incq) || (!is.na(incq) && incq == "NA(b)") || incq < 0) {
 
@@ -3,14 +3,13 @@
 #' @description This function calculates the estimated glomerular filtration rate (GFR) according to Finlay's formula,
 #'              where serum creatine is in mg/dL. The calculation takes into account the respondent's ethnicity, sex, and age.
 #'
-#' @param LAB_BCRE Blood creatine (µmol/L). It should be a numeric value.
-#' @param PGDCGT Ethnicity (13 categories). It should be an integer value.
-#' @param CLC_SEX Sex (Male = 1, Female = 2). It should be an integer value.
-#' @param CLC_AGE Age (years). It should be a numeric value.
+#' @param LAB_BCRE Blood creatine (µmol/L). It should be a numeric value between 14 and 785.
+#' @param PGDCGT Ethnicity (13 categories). It should be an integer value between 1 and 13.
+#' @param CLC_SEX Sex (Male = 1, Female = 2). It should be an integer value of either 1 or 2.
+#' @param CLC_AGE Age (years). It should be a numeric value between 3 and 79.
 #'
 #' @return The calculated GFR as a numeric value. If any of the input parameters (LAB_BCRE, PGDCGT, CLC_SEX, CLC_AGE)
-#'         are non-response values (LAB_BCRE >= 996, PGDCGT >= 96, CLC_SEX >= 6, CLC_AGE >= 996), the GFR will be NA(b)
-#'         (Not Available).
+#'         are non-response values (LAB_BCRE >= 996, PGDCGT >= 96, CLC_SEX >= 6, CLC_AGE >= 996) or out of bounds, the GFR will be NA(b).
 #'
 #' @details The function uses the serum creatine level (LAB_BCRE) in µmol/L to calculate the estimated GFR. First, it
 #'          checks if any of the input parameters are non-response values. If any non-response values are found, the GFR
@@ -42,7 +41,7 @@ calculate_GFR <- function(LAB_BCRE, PGDCGT, CLC_SEX, CLC_AGE) {
   GFR <- 0
   serumcreat <- 0
 
-  if (any(!LAB_BCRE %in% 0:9995) || (any(!CLC_SEX %in% c(1, 2)) || any(!PGDCGT %in% 1:13)) || any(!CLC_AGE %in% 0:995)) {
+  if (any(!LAB_BCRE %in% 14:785) || (any(!CLC_SEX %in% c(1, 2)) || any(!PGDCGT %in% 1:13)) || any(!CLC_AGE %in% 3:79)) {
     GFR <- haven::tagged_na("b") # GFR is NA if any non-responses found
   } else {
     serumcreat <- LAB_BCRE / 88.4 # Proceeds without non-responses
 
@@ -72,21 +72,22 @@ is_taking_drug_class <- function(df, class_var_name, med_vars, last_taken_vars,
 
   logger::log_info(paste0("Adding variable '", class_var_name, "' to the data frame."))
 
-  # Initialize the class variable column
-  df[[class_var_name]] <- 0
-
-  # Apply the condition function to each pair of med and last_taken vars using a loop
-  for (i in seq_along(med_vars)) {
-    med_values <- df[[med_vars[i]]]
-    last_taken_values <- df[[last_taken_vars[i]]]
-    class_values <- numeric(nrow(df))
-
-    for (j in seq_along(med_values)) {
-      class_values[j] <- class_condition_fun(med_values[j], last_taken_values[j])
-    }
-
-    df[[class_var_name]] <- df[[class_var_name]] + class_values
-  }
+  # Apply the condition function to each pair of med and last_taken vars using mapply
+  class_values <- mapply(
+    FUN = function(med_var, last_taken_var) {
+      mapply(class_condition_fun, med_var, last_taken_var)
+    },
+    df[med_vars],
+    df[last_taken_vars],
+    SIMPLIFY = FALSE
+  )
+
+  # Sum the results for each row
+  df[[class_var_name]] <- rowSums(do.call(cbind, class_values), na.rm = TRUE)
+
+  # Handle cases where all values for a row are NA
+  all_na <- Reduce(`&`, lapply(class_values, is.na))
+  df[[class_var_name]][all_na] <- haven::tagged_na("b")
 
   return(df)
 }
@@ -297,7 +298,7 @@ is_any_antiHTN_med <- function(MEUCATC, NPI_25B) {
     return(haven::tagged_na("b"))
   }
 
-  as.numeric(grepl("^C0[2, 3, 7, 8, 9]", MEUCATC) && !(MEUCATC %in% c("C07AA07", "C07AA12", "C07AG02", "C03BA08", "C03CA01", "C02KX01")) && NPI_25B <= 4)
+  as.numeric(grepl("^(C02|C03|C07|C08|C09)", MEUCATC) && !(MEUCATC %in% c("C07AA07", "C07AA12", "C07AG02", "C03BA08", "C03CA01", "C02KX01")) && NPI_25B <= 4)
 }
 
 #' @title Non-steroidal anti-inflammatory drugs (NSAIDs)
@@ -568,7 +569,7 @@ cycles1to2_beta_blockers <- function(
 
   bbmed <- 0
 
-  if (bb$BBmed > 0 %in% TRUE) {
+  if (any(bb$BBmed > 0, na.rm = TRUE)) {
     bbmed <- 1
   } else if (all(is.na(bb$BBmed))) {
     bbmed <- haven::tagged_na("b")
@@ -778,7 +779,7 @@ cycles1to2_ace_inhibitors <- function(
 
   acemed <- 0
 
-  if (ace$ACEmed > 0 %in% TRUE) {
+  if (any(ace$ACEmed > 0, na.rm = TRUE)) {
     acemed <- 1
   } else if (all(is.na(ace$ACEmed))) {
     acemed <- haven::tagged_na("b")
@@ -988,7 +989,7 @@ cycles1to2_diuretics <- function(
 
   diurmed <- 0
 
-  if (diur$DIURmed > 0 %in% TRUE) {
+  if (any(diur$DIURmed > 0, na.rm = TRUE)) {
     diurmed <- 1
   } else if (all(is.na(diur$DIURmed))) {
     diurmed <- haven::tagged_na("b")
@@ -1198,9 +1199,8 @@ cycles1to2_calcium_channel_blockers <- function(
 
   ccbmed <- 0
 
-  if (ccb$CCBmed > 0 %in% TRUE) {
+  if (any(ccb$CCBmed > 0, na.rm = TRUE)) {
     ccbmed <- 1
-    return(ccbmed)
   } else if (all(is.na(ccb$CCBmed))) {
     ccbmed <- haven::tagged_na("b")
   }
@@ -1409,7 +1409,7 @@ cycles1to2_other_antiHTN_meds <- function(
 
   miscmed <- 0
 
-  if (misc$MISCmed > 0 %in% TRUE) {
+  if (any(misc$MISCmed > 0, na.rm = TRUE)) {
     miscmed <- 1
   } else if (all(is.na(misc$MISCmed))) {
     miscmed <- haven::tagged_na("b")
@@ -1619,7 +1619,7 @@ cycles1to2_any_antiHTN_meds <- function(
 
   anymed <- 0
 
-  if (anyHTN$ANYmed > 0 %in% TRUE) {
+  if (any(anyHTN$ANYmed > 0, na.rm = TRUE)) {
     anymed <- 1
   } else if (all(is.na(anyHTN$ANYmed))) {
     anymed <- haven::tagged_na("b")
@@ -1829,7 +1829,7 @@ cycles1to2_nsaid <- function(
 
   nsaid_drug <- 0
 
-  if (nsaid$NSAID > 0 %in% TRUE) {
+  if (any(nsaid$NSAID > 0, na.rm = TRUE)) {
     nsaid_drug <- 1
   } else if (all(is.na(nsaid$NSAID))) {
     nsaid_drug <- haven::tagged_na("b")
@@ -2039,7 +2039,7 @@ cycles1to2_diabetes_drugs <- function(
 
   diab_drug <- 0
 
-  if (diab$diabetes_drug > 0 %in% TRUE) {
+  if (any(diab$diabetes_drug > 0, na.rm = TRUE)) {
     diab_drug <- 1
   } else if (all(is.na(diab$diabetes_drug))) {
     diab_drug <- haven::tagged_na("b")
 
@@ -6,6 +6,8 @@
 
 <!-- badges: end -->
 
+# WARNING: NOT RELEASED TO THE PUBLIC AND NOT FOR USE YET
+
 *chmsflow* supports the use of the Canadian Health Measures Survey (CHMS) by transforming variables from each cycle into harmonized, consistent versions that span survey cycles 1-6 (2007 to 2019).
 
 The CHMS is a cross-sectional survey administered by Statistics Canada that collects questionnaire and directly measured health information from community-dwelling individuals aged 3 to 79 living in the 10 provinces. There are approximately 5,700 respondents per cycle. Studies use multiple CHMS cycles to examine trends overtime and increase sample size to examine sub-groups that are too small to examine in a single cycle. CHMS data is not available to the public, but at Research Data Centres (RDCs) managed by Statistics Canada. Information about the survey is found [here](https://www.statcan.gc.ca/en/survey/household/5071), while information about accessing data at the RDC is found [here](https://crdcn.ca/publications-data/access-crdcn-data/).
@@ -85,7 +87,7 @@ Please follow [this guide](https://github.com/Big-Life-Lab/chmsflow/blob/dev/CON
 
 We encourage PRs for additional variable transformations and derived variables that you believe may be helpful to the broad CHMS community.
 
-Currently, *chmsflow* supports R through the `rec_with_table()` function of *recodeflow*. The CHMS community commonly uses SAS, Stata and other statistical packages. Please feel free to contribute to `chmsflow` by making a PR that creates versions of `rec_with_table()` for other statistical and programming languages.
+Currently, *chmsflow* supports R through and wraps around the `rec_with_table()` function of *recodeflow*. The CHMS community commonly uses SAS, Stata and other statistical packages. Please feel free to contribute to `chmsflow` by making a PR that creates versions of `rec_with_table()` for other statistical and programming languages.
 
 ## Statistics Canada Attribution
 
 
@@ -1,6 +1,7 @@
 url: https://big-life-lab.github.io/chmsflow/
 destination: docs
 template:
+  bootstrap: 5
   params:
     bootswatch: cosmo
 development:
@@ -21,7 +22,7 @@ navbar:
   components:
     get_started:
       text: Getting started
-      href: articles/getting-started.html
+      href: articles/getting_started.html
     articles:
       text: Articles
       menu:
@@ -31,6 +32,12 @@ navbar:
         href: articles/variable_details.html
       - text: Recoding medications
         href: articles/recoding-medications.html
+      - text: Derived variables
+        href: articles/derived_variables.html
+      - text: How to add variables
+        href: articles/how_to_add_variables.html
+      - text: How to used tagged_na
+        href: articles/tagged_na_usage.html
     reference:
       text: Reference
       href: reference/index.html
@@ -49,7 +56,7 @@ reference:
   - adjust_DBP
   - adjust_SBP
   - calculate_GFR
-  - calculate_Hhld_Income
+  - calculate_hhld_income
   - calculate_WHR
   - calculate_nonHDL
   - categorize_GFR_to_CKD
@@ -64,8 +71,8 @@ reference:
   - cycles1to2_diuretics
   - cycles1to2_nsaid
   - cycles1to2_other_antiHTN_meds
-  - determine_CVD_Family_History
-  - determine_CVD_Personal_History
+  - determine_CVD_family_history
+  - determine_CVD_personal_history
   - determine_adjusted_hypertension
   - determine_controlled_adjusted_hypertension
   - determine_controlled_hypertension
@@ -75,7 +82,7 @@ reference:
   - find_totalFV_cycles1and2
   - find_totalFV_cycles3to6
   - find_week_accelerometer_average
-  - in_lowest_income_qunitle
+  - in_lowest_income_quintile
   - is_NSAID
   - is_ace_inhibitor
   - is_any_antiHTN_med
@@ -97,4 +104,4 @@ reference:
   - cycle3
   - cycle3_meds
   - cycle4
-  - cycle5
+  - cycle5