PEG form clarification tracking

Author

Yufan Gong

Published

October 25, 2024

1 Introduction

This document is to track any form that needs to be clarified/cleaned before being entered into the database. For this project, I used clarification_ and incomplete_ variables in the main swichboard table to track the clarification status.

Notes:

The following code is assuming that you are using Windows OS on a HIPAA compliant device.

  • Make sure you have read this post before you run the code.
  • If you are using Mac OS, please check this link for .accdb connection, and please make sure the .accdb file doesn’t have a password if you are using Mac OS.
  • Alternatively, you can also connect to the database using pyodbc package if you prefer using Python.
  • For other database types, please refer to this blog.

2 Data processing

2.1 Load packages and check directory

Code
# Load libraries ----------------------------------------------------------


library(readxl)
library(here)
library(rlang)
library(magrittr)
library(arsenal)
library(lubridate)
library(RODBC)
library(labelled)
library(haven)
library(glue)
library(ggpubr)
library(ggthemes)
library(DT)
library(tidyverse)


`%notin%` <- Negate(`%in%`)

getwd()
[1] "C:/Users/ivangong/github/peg_form_clarification/qmd"

2.2 Connect to database

Code
# Connect to the database -------------------------------------------------


list("case_contact_manager", "case_entry1", 
     "hhctrl_contact_manager", "hhctrl_entry1", 
     "popctrl_contact_manager", "popctrl_entry1") %>% 
  map(function(data){
    odbcConnect(dsn = data, pwd = "PD_08")
  }) %>% 
  set_names("channel_peg3case_cm", "channel_peg3case_e1",
            "channel_peg3hhctrl_cm", "channel_peg3hhctrl_e1",
            "channel_peg3popctrl_cm", "channel_peg3popctrl_e1") %>% 
  list2env(.GlobalEnv) %>% 
  invisible()

2.3 Read in data

Code
#grab tbls interested

tbl_drop_e1 <- c(glue("(?i)main_part_{letters[2:9]}"), "part_e_2", "part_f2",
              "tbllist", "list_", "list -", "DSQlogin", "backup",
              "scopa", "5min", "_old", "navigation", "meddx", "medm", "medw",
              "medo", "supplement", "test", "sub", "antibiotics_1", 
              "antibiotics_2", "mini mental", "pd_screening")

list(channel_peg3case_e1, channel_peg3hhctrl_e1, channel_peg3popctrl_e1) %>% 
  map(function(channel){
    sqlTables(channel)$TABLE_NAME %>% 
      grep("switch", x=., 
           value = TRUE, ignore.case = TRUE) %>% 
      discard(~str_detect(.x, paste(tbl_drop_e1, collapse = "|(?i)")))
  }) %>% 
  set_names("list_tbl_case_e1", "list_tbl_hhctrl_e1",
            "list_tbl_popctrl_e1") %>% 
  list2env(.GlobalEnv) %>% 
  invisible()

list(channel_peg3case_cm, channel_peg3hhctrl_cm, channel_peg3popctrl_cm) %>% 
  map(function(channel){
    sqlTables(channel)$TABLE_NAME %>% 
      grep("patientdetail|ctrldetail|checkra|screening|recruitment|demo|visit2", x = ., 
           value = TRUE, ignore.case = TRUE) %>% 
      discard(~str_detect(
        .x, "(?i)copy|checkra2|tbllist|stool|_0|sample|old|tracking|test|diff|patients_cpdr"))
  }) %>% 
  set_names("list_tbl_case_cm", "list_tbl_hhctrl_cm",
            "list_tbl_popctrl_cm") %>% 
  list2env(.GlobalEnv) %>% 
  invisible()


#read in all tbls in the database and do basic cleaning

var_keep <- c("pegid", "mailid", "interviewer", 
              "mail_status", "mailstatus", "consent_date", "clarification", 
              "incomplete", 
              "enroll_date", "screeningstatus", "finalstatus",
              "parkinsondisease", "msastatus", "peg3status1", "ra_consent", 
              "RA_MainLifeHistoryQuestion", "RA_MoCA_status", "RA_PDMedNo",
              "RA_PDMedLastDose", "sex",
              "RA_PDMedCheck_status", "RA_MedicalCheck_status",
              "RA_GDS_status", "RA_QualityofLife_status", "RA_Timeline_status",
              "RA_PatientQuestion_status", "RA_Constipation_status", 
              "RA_Bristol_status", "RA_Antibiotics_status", "RA_Diet_status",
              "RA_Zarit_status", "RA_Blood", "RA_Stool", "IncidentID_Index",
              "bloodstatus", "stoolstatus", "phys_apptdate_status", "a3")
var_drop <- c("collected1", "date1", "initials1", "pdmsastatus", 
              "entered", "entry", "enter", "collect", "clarification_date",
              "initial", "inputby", "1_date", "1_initial",
              "bloodcenter", "abletoenter", "wgs", "sleep_enter", 
              "cleaned_data", "sleep_collected_date", "hscore_collected_date",
              "phys_apptdate_status2", "mainlifehistoryquestion_date", 
              "lastdose_", "blooddate", "bloodno", "stooldate", "stoolno",
              "mail_status_date", "returned", "refused", "interested", "reply",
              "reason", "mailstatus1date", "mailstatus2date", "mailstatus3date")
# entryinitial_todetect <- c("entered_initial", "enteredby", "entry_initial")
# entrydate_todetect <- c("entered_date", "date_entered", "entry_date")
# collectdate_todetect <- c("collected_date", "date_collected", "collect_date", 
#                           "bs_check_date")


list(
  list(list_tbl_case_e1, list_tbl_hhctrl_e1, list_tbl_popctrl_e1, 
       list_tbl_case_cm, list_tbl_hhctrl_cm, list_tbl_popctrl_cm),
  list(channel_peg3case_e1, channel_peg3hhctrl_e1, channel_peg3popctrl_e1,
       channel_peg3case_cm, channel_peg3hhctrl_cm, channel_peg3popctrl_cm),
  list("case_e1", "hhctrl_e1", "popctrl_e1",
       "case_cm", "hhctrl_cm", "popctrl_cm")
) %>% 
  pmap(function(data1, data2, data3){
    data1 %>% 
      map(function(data){
        sqlQuery(data2,
                 glue::glue("select * from [{data}];")) %>% 
          as_tibble() %>% 
          select(-matches(paste(var_drop, sep = "|"),
                          ignore.case = TRUE)) %>%
          select(matches(paste(var_keep, sep = "|"),
                         ignore.case = TRUE)) %>%
          # modify_at(vars(contains("date")), ymd) %>% 
          mutate_at(vars(matches("date", ignore.case = TRUE)), as.character) %>%
          #rename(pegid = PEGID) %>%
          rename_all(str_to_lower) %>% 
          # rename_with(~replace(.x, str_detect(.x, 
          #                                     paste(entryinitial_todetect, 
          #                                           collapse = "(?i)|")), 
          #                      "entry_initial"), 
          #             contains(entryinitial_todetect)) %>%
          # rename_with(~replace(.x, str_detect(.x, 
          #                                     paste(entrydate_todetect, 
          #                                           collapse = "(?i)|")), 
          #                      "entry_date"), 
          #             contains(entrydate_todetect)) %>%
          # rename_with(~replace(.x, str_detect(.x, 
          #                                     paste(collectdate_todetect, 
          #                                           collapse = "(?i)|")), 
          #                      "collect_date"), 
          #             contains(collectdate_todetect)) %>%
          rename_with(~replace(.x, str_detect(.x, "(?i)mainlifehistory"), 
                               "ra_mainlifehistory_status"), 
                      contains("mainlifehistory")) %>%
          rename_with(~replace(.x, str_detect(.x, "(?i)pdmedno"), 
                               "ra_pdmedno_status"), 
                      contains("pdmedno")) %>%
          rename_with(~replace(.x, str_detect(.x, "(?i)pdmedlastdose"), 
                               "ra_pdmedlastdose_status"), 
                      contains("pdmedlastdose")) %>%
          mutate_at(vars(starts_with("ra_")), as.character) %>%
          # mutate_if(is.character, ~ str_remove_all(.x," ")) %>%
          # mutate_if(is.character, toupper) %>% 
          mutate_if(is.character, ~ replace(.x, .x %in% list(""), NA)) %>% 
          mutate_at(vars(starts_with("ra_")), ~ case_when(. == "0" ~ NA,
                                . == "1" ~ "Completed",
                                TRUE ~ as.character(.))) %>% 
          # filter(pegid %notin% c("G00001SA", "G00001SB", "G00001SC", 
          #                        "1XXXXXXXX", "G10000SW", "G10007SW",
          #                        "g10051LL70", "G20381DD32",
          #                        "777", "456", "123")) %>% 
          # filter(str_length(pegid) > 3) %>%
          distinct()
      }) %>%
      set_names(paste(data1, data3, sep = "_")) %>% 
      list2env(.,envir = .GlobalEnv)
  }) %>% 
  invisible()

2.4 Clean data

Code
list(tbl_mainswitchboard_case_e1, 
     tbl_mainswitchboard_hhctrl_e1,
     tbl_mainswitchboard_popctrl_e1) %>% 
  map(function(data){
    data %>% 
      mutate_all(
        ~ case_when(.x %in% c("0", "No") ~ NA_character_,
                    .x == "Yes" ~ "2",
                    TRUE ~ as.character(.x))
      ) %>% 
      mutate_at(
        vars(starts_with("clarification")), 
        ~ case_when(.x == "1" ~ "Completed",
                    .x == "2" ~ "Pending",
                    .x == "3" ~ "Cannot complete",
                    TRUE ~ as.character(.x))
        ) %>% 
      mutate_at(
        vars(starts_with("incomplete")),
        ~ case_when(.x == 1 ~ "Incomplete and need clarification",
                    .x == 0 ~ NA_character_,
                    TRUE ~ as.character(.x))
      )
  }) %>% 
  set_names(
    "tbl_peg3_mainswitchboard_clean_case_e1",
    "tbl_peg3_mainswitchboard_clean_hhctrl_e1",
    "tbl_peg3_mainswitchboard_clean_popctrl_e1"
  ) %>%
  list2env(.GlobalEnv) %>% 
  invisible()

# create a final clarification tracking table
list(
  tbl_peg3_mainswitchboard_clean_case_e1,
  tbl_peg3_mainswitchboard_clean_hhctrl_e1,
  tbl_peg3_mainswitchboard_clean_popctrl_e1
) %>% 
  map(function(data){
    list(
      list("clarification", "incomplete"),
      list("clarification_status", "clarification")
    ) %>% 
      pmap(function(var, value){
        data %>% 
          select(pegid, starts_with(var)) %>% 
          pivot_longer(
            cols = -pegid,
            names_to = "form",
            values_to = value
          ) %>%
          mutate(form = str_remove(form, paste(var, "_", sep = ""))) %>% 
          mutate_at(vars(form), ~ replace(.x, .x %in% c("main"), "main interview"))
      }) %>% 
      reduce(left_join, by = c("pegid", "form")) %>% 
      filter(!(is.na(clarification_status) & is.na(clarification))
             & clarification_status %notin% c("Completed")
             & form %notin% c("scopa") 
             & pegid %notin% c("1XXXXXXXX", "G10002YG96"))
  }) %>% 
  set_names(
    "tbl_peg3_need_clarification_case_e1",
    "tbl_peg3_need_clarification_hhctrl_e1",
    "tbl_peg3_need_clarification_popctrl_e1"
  ) %>%
  list2env(.GlobalEnv) %>% 
  invisible()

3 Detail tracking tables for those who need clarification

3.1 Cases

Code
datatable(tbl_peg3_need_clarification_case_e1,
          rownames = FALSE,
          extensions = "Buttons",
          options = list(
            dom = "Bfrtip",
            buttons = c("copy", "csv", "excel", "pdf", "print"),
            scrollX = TRUE, 
                         paging=TRUE,
                         # fixedHeader=TRUE,
                         className = 'dt-center', targets = 0:4),
          caption = "Table: Data clarification tracking information for cases")

3.2 Household controls

Code
datatable(tbl_peg3_need_clarification_hhctrl_e1,
          rownames = FALSE,
          extensions = "Buttons",
          options = list(
            dom = "Bfrtip",
            buttons = c("copy", "csv", "excel", "pdf", "print"),
            scrollX = TRUE, 
                         paging=TRUE,
                         # fixedHeader=TRUE,
                         className = 'dt-center', targets = 0:4),
          caption = "Table: Data clarification tracking information for household controls")

3.3 Population controls

Code
datatable(tbl_peg3_need_clarification_popctrl_e1,
          rownames = FALSE,
          extensions = "Buttons",
          options = list(
            dom = "Bfrtip",
            buttons = c("copy", "csv", "excel", "pdf", "print"),
            scrollX = TRUE, 
                         paging=TRUE,
                         # fixedHeader=TRUE,
                         className = 'dt-center', targets = 0:4),
          caption = "Table: Data clarification tracking information for population controls")
Code
odbcCloseAll()