About Me

More

Goals for Today

Toward Better Documentation and Communication of Analysis

Opportunities

Benefits

Challenges

What is REDCap

REDCap API

Wanted for Dynamic Reports

Tools

REDCap

R Steps

GitHub Steps

What To Include in Reports

Useful R Packages

Bringing REDCap Data into R

Getting the Data with REDCapAPI

Langone Remote Login

Successful VPN Connection

R Code to Export Records from REDCap

library(redcapAPI)
library(openxlsx)
library(data.table)
tkns <- read.xlsx("c:/chuck/NYU/NoART/REDCap/tkns.xlsx")
rcon <- redcapConnection(url = 'https://openredcap.nyumc.org/apps/redcap/api/', 
                         token=tkns[4,3])
hth <- data.table(exportRecords(rcon, batch.size = 700))

Data in R, Now What?

R Markdown

Minimal R Markdown Example

---
title: "My Minimal Example"
author: "Chuck Cleland"
date: "March 18, 2018"
output: html_document
---
```{r setup, include=FALSE}

knitr::opts_chunk$set(echo = TRUE)

```
# This is a top-level header

This is an R Markdown document. Markdown is a simple formatting syntax  
for authoring HTML, PDF, and MS Word documents. For more details on  
using R Markdown see <http://rmarkdown.rstudio.com>.

## This is a second-level header

```{r cars}

summary(mtcars$mpg)

```
Some comments about these data:

- These are data on 32 cars
- Variables include miles per gallon, cylinders, and weight
- Heavier cars go fewer miles per gallon of fuel

Rendering R Markdown


Minimal Example Result

YAML Header


---
title: "Weekly Project Report"
author: "Chuck Cleland"
date: "March 23, 2018"
output:
  html_document:
    highlight: tango
    theme: cerulean
    toc: yes
    toc_depth: 5
---

R Code Chunks

```{r}

summary(mtcars)

Model_one <- lm(mpg ~ wt, data = mtcars)

summary(Model_one)

```

Tip

Inline R Expressions

Examples:

Figures

Eligibility Rate Over Time

Day <- seq(as.Date("2014/5/1"), as.Date("2015/4/20"), "days")

Scr_Data <- data.frame(Date = sample(Day, 950, replace = TRUE),
                       Scr_ELG = sample(c('Eligible','Ineligible'), 
                                        950, replace = TRUE)) %>%
  arrange(Date) %>%
  mutate(Day = cut(Date, breaks = "days"),
         Month = cut(Date, breaks = "months"),
         Total = row_number(),
         Eligible = cumsum(Scr_ELG == "Eligible"),
         Inelgible = cumsum(Scr_ELG == "Ineligible"),
         Cummulative = Eligible / Total)

head(Scr_Data) %>% 
  knitr::kable()
Date Scr_ELG Day Month Total Eligible Inelgible Cummulative
2014-05-02 Eligible 2014-05-02 2014-05-01 1 1 0 1.0000000
2014-05-02 Eligible 2014-05-02 2014-05-01 2 2 0 1.0000000
2014-05-03 Ineligible 2014-05-03 2014-05-01 3 2 1 0.6666667
2014-05-03 Ineligible 2014-05-03 2014-05-01 4 2 2 0.5000000
2014-05-03 Ineligible 2014-05-03 2014-05-01 5 2 3 0.4000000
2014-05-04 Eligible 2014-05-04 2014-05-01 6 3 3 0.5000000

library(tidyverse)

p1 <- Scr_Data %>%
  ggplot(aes(x = Date, y = Cummulative)) + 
  geom_point(pch=19, color = "blue", size = 3, alpha = .33) +
  labs(x = "", y = "Proportion Eligible") +
  scale_y_continuous(limits=c(0,1)) +
  theme_minimal()

How Long for Blood Results?

Day1 <- sample(seq(as.Date("2014/5/1"), as.Date("2015/4/20"), "days"), 
               950, replace = TRUE)
Day2 <- Day1 + round(rchisq(950, df = 9))

Scr_Data <- data.frame(Day1, Day2)

head(Scr_Data)
        Day1       Day2
1 2014-05-29 2014-05-30
2 2015-02-16 2015-03-04
3 2014-08-14 2014-08-25
4 2014-11-09 2014-11-25
5 2015-03-29 2015-04-09
6 2014-06-24 2014-07-03

Scr_Data %>%
  mutate(Days_Between = difftime(Day2, Day1, units = "days")) %>%
  ggplot(aes(x = Days_Between)) +
  geom_density(alpha = .3, fill="#4169E1") +
  labs(x = "Days Between Start of Screening and Blood Results") +
  theme_minimal()

Time to Complete Interview

head(DF)
                Start                 End
1 2016-08-04 14:26:00 2016-08-04 15:11:38
2 2016-07-17 12:05:00 2016-07-17 12:41:46
3 2017-11-21 13:21:00 2017-11-21 14:17:59
4 2016-03-23 16:29:00 2016-03-23 17:35:46
5 2017-12-20 16:53:00 2017-12-20 17:58:41
6 2016-04-08 16:17:00 2016-04-08 17:05:03

DF %>% 
  mutate(Minutes = as.numeric(difftime(End, Start, units = "mins"))) %>%
  ggplot(aes(x = Minutes)) + 
  geom_density(fill="#4169E1", alpha = .33) +
  theme_minimal()

Tables

Frequency Table

library(tidyverse)
library(pander)
library(descr)

panderOptions('table.split.table', Inf)

with(iris, 
     freq(Species, plot = FALSE)) %>%
  pander(digits = 1)
  Frequency Percent
setosa 50 33
versicolor 50 33
virginica 50 33
Total 150 100

Crosstabulation

library(tidyverse)
library(pander)
library(descr)

panderOptions('table.split.table', Inf)

with(iris, 
     CrossTable(Species, Sepal.Width > 3,
                prop.chisq = FALSE,
                prop.t = FALSE,
                prop.c = FALSE)) %>%
  pander(digits = 1)
 
Species
Sepal.Width > 3
FALSE
 
TRUE
 
Total
setosa
N
Row(%)
 
8
16.0%
 
42
84.0%
 
50
33.3%
versicolor
N
Row(%)
 
42
84.0%
 
8
16.0%
 
50
33.3%
virginica
N
Row(%)
 
33
66.0%
 
17
34.0%
 
50
33.3%
Total 83 67 150

Descriptives for Multiple Variables

library(tidyverse)

iris %>%
  gather(Variable, Value, Sepal.Length:Petal.Width) %>%
  group_by(Variable) %>%
  summarize(n = n(), 
            Mean = round(mean(Value), 1),
            SD = round(sd(Value), 1),
            Median = median(Value),
            IQR = IQR(Value),
            Min = min(Value),
            Max = max(Value)) %>%
  knitr::kable()
Variable n Mean SD Median IQR Min Max
Petal.Length 150 3.8 1.8 4.35 3.5 1.0 6.9
Petal.Width 150 1.2 0.8 1.30 1.5 0.1 2.5
Sepal.Length 150 5.8 0.8 5.80 1.3 4.3 7.9
Sepal.Width 150 3.1 0.4 3.00 0.5 2.0 4.4

Descriptives By Grouping Variable

iris %>%
  gather(Variable, Value, 
         Sepal.Length:Petal.Width) %>%
  group_by(Variable, Species) %>%
  summarize(n = n(), 
            Mean = round(mean(Value), 1),
            SD = round(sd(Value), 1),
            Median = median(Value),
            IQR = IQR(Value),
            Min = min(Value),
            Max = max(Value)) %>%
  knitr::kable()
Variable Species n Mean SD Median IQR Min Max
Petal.Length setosa 50 1.5 0.2 1.50 0.175 1.0 1.9
Petal.Length versicolor 50 4.3 0.5 4.35 0.600 3.0 5.1
Petal.Length virginica 50 5.6 0.6 5.55 0.775 4.5 6.9
Petal.Width setosa 50 0.2 0.1 0.20 0.100 0.1 0.6
Petal.Width versicolor 50 1.3 0.2 1.30 0.300 1.0 1.8
Petal.Width virginica 50 2.0 0.3 2.00 0.500 1.4 2.5
Sepal.Length setosa 50 5.0 0.4 5.00 0.400 4.3 5.8
Sepal.Length versicolor 50 5.9 0.5 5.90 0.700 4.9 7.0
Sepal.Length virginica 50 6.6 0.6 6.50 0.675 4.9 7.9
Sepal.Width setosa 50 3.4 0.4 3.40 0.475 2.3 4.4
Sepal.Width versicolor 50 2.8 0.3 2.80 0.475 2.0 3.4
Sepal.Width virginica 50 3.0 0.3 3.00 0.375 2.2 3.8

Follow-Up Rates


Further Customizing Tables

library(knitr)
library(tidyverse)
library(kableExtra)

Table 1

dt <- mtcars[1:5, 1:6]

kable(dt, "html") %>%
  kable_styling("striped", full_width = F) %>%
  column_spec(5:7, bold = T) %>%
  row_spec(3:5, bold = T, color = "white", background = "#D7261E")
mpg cyl disp hp drat wt
Mazda RX4 21.0 6 160 110 3.90 2.620
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875
Datsun 710 22.8 4 108 93 3.85 2.320
Hornet 4 Drive 21.4 6 258 110 3.08 3.215
Hornet Sportabout 18.7 8 360 175 3.15 3.440

Table 2

iris[1:10, ] %>%
  mutate_if(is.numeric, function(x) {
    cell_spec(x, "html", bold = T, 
              color = spec_color(x, end = 0.9),
              font_size = spec_font_size(x))}) %>%
  mutate(Species = cell_spec(
    Species, "html", color = "white", bold = T,
    background = spec_color(1:10, end = 0.9, 
                            option = "A", direction = -1))) %>%
  kable("html", escape = F, align = "c") %>%
  kable_styling("striped", full_width = F)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
5.1 3.5 1.4 0.2 setosa
4.9 3 1.4 0.2 setosa
4.7 3.2 1.3 0.2 setosa
4.6 3.1 1.5 0.2 setosa
5 3.6 1.4 0.2 setosa
5.4 3.9 1.7 0.4 setosa
4.6 3.4 1.4 0.3 setosa
5 3.4 1.5 0.2 setosa
4.4 2.9 1.4 0.2 setosa
4.9 3.1 1.5 0.1 setosa

CONSORT Flow Diagram

library(igraph)

DF <- data.frame(from = c('Screening\nInitiated','Screening\nInitiated',
                          'Eligible','Eligible','Enrolled','Enrolled'),
                 to = c('Eligible','Not Eligible','Enrolled','Declined',
                        'Intervention','Control'))

mynodes <- data.frame(Fixed_Name = c('Screening\nInitiated',
                                     'Eligible','Not Eligible',
                                     'Enrolled','Declined',
                                     'Intervention','Control'),
                      Count = c(500, 300, 194, 268, 32, 130, 138))

mynodes$Label = paste(mynodes$Fixed_Name, 
                      "\n(n=", mynodes$Count, ")", 
                      sep = "")

my_ig <- graph.data.frame(DF, vertices = mynodes)

plot(my_ig, layout=layout_as_tree, vertex.shape = "circle", 
     vertex.label = V(my_ig)$Label, 
     vertex.color = "#EAE5EB",
     vertex.size=60, asp = 1.2, margin = c(0.2,0.2,0.2,0.2))

CONSORT Flow Diagram Result

Scoring Instruments

head(DF)
  V1 V2 V3 V4 V5 V6 V7 V8 V9 PID
1 SA  N  N  A  N  A  D SA  N   1
2  N  N  N  N  D  N  N  N  N   2
3 SA  A  D  N  D  A  D SA SA   3
4 SA SA SD SA SD SA  D SA SA   4
5  A  N  D  A  D  N  D  A  A   5
6 SA SA  D  A  D SA  D  A  A   6

Wrangling Instrument Items

DF %>%
  gather(Item, Response, -PID) %>%
  mutate(Response = ordered(Response, 
                            levels = c('SD','D','N','A','SA')),
   Response = as.numeric(Response) - 1,
   Response = ifelse(Item %in% c('V3','V5','V7'),
  4 - Response, Response)) %>%
  spread(Item, Response) %>%
  head()
  PID V1 V2 V3 V4 V5 V6 V7 V8 V9
1   1  4  2  2  3  2  3  3  4  2
2   2  2  2  2  2  3  2  2  2  2
3   3  4  3  3  2  3  3  3  4  4
4   4  4  4  4  4  4  4  3  4  4
5   5  3  2  3  3  3  2  3  3  3
6   6  4  4  3  3  3  4  3  3  3

Calculate Reliability

myalpha <- DF %>%
  gather(Item, Response, -PID) %>%
  mutate(Response = ordered(Response, 
                            levels = c('SD','D','N','A','SA')),
         Response = as.numeric(Response) - 1,
         Response = ifelse(Item %in% c('V3','V5','V7'), 
                           4 - Response, Response)) %>%
  spread(Item, Response) %>%
  select(-PID) %>%
  psych::alpha()

Show Overall Reliability and Item Statistics

myalpha[1:2] %>% pander()

Sharing the Report Via GitHub

Repositories

One Repository

Upload Files and Commit Changes

Interactive Reports

Plotly

library(ggplot2)
library(plotly)
p <- ggplot(data = diamonds, aes(x = cut, fill = clarity)) +
            geom_bar(position = "dodge")
ggplotly(p)

DataTable

library(DT)

datatable(iris, options = list(pageLength = 5))

Can I Do This with Other Tools?

Resources

Questions

Feel free to email me with any questions (cmc13@nyu.edu)