Statistics review 11: Assessing risk¶

R code accompanying paper

Key learning points¶

Relative Risk
Odds Ratio
Measuring the impact of exposure to a risk factor
Measures of the success of a treatment

suppressPackageStartupMessages(library(tidyverse))

options(repr.plot.width=4, repr.plot.height=3)

Data¶

That study investigated the association between surfactant protein B and acute respiratory distress syndrome (ARDS). Patients were classified according to their thymine/cytosine (C/T) gene coding, and patients with the C allele present (genotype CC or CT) were compared with those with genotype TT

ARDS <- c(11,1)
NoARDS <- c(208, 182)
df <- data.frame(row.names=c("CC/CT", "TT"), ARDS=ARDS, NoARDS=NoARDS)
df

	ARDS	NoARDS
CC/CT	11	208
TT	1	182

Relative risk¶

df1 <- df %>% mutate(Total = ARDS+NoARDS) %>% mutate(risk = ARDS/Total)
df1

ARDS	NoARDS	Total	risk
11	208	219	0.050228311
1	182	183	0.005464481

rr <- df1$risk[1]/df1$risk[2]
round(rr, 2)

9.19

rr <- function(tbl) {
    a <- tbl[1,1]
    b <- tbl[1,2]
    c <- tbl[2,1]
    d <- tbl[2,2]
    round((a/(a+b))/(c/(c+d)), 2)
    }

rr_ci <- function(tbl, alpha=0.05) {
    a <- tbl[1,1]
    b <- tbl[1,2]
    c <- tbl[2,1]
    d <- tbl[2,2]
    log.se <- sqrt(1/a - 1/(a+b) + 1/c - 1/(c+d))
    log.rr <- log((a/(a+b))/(c/(c+d)))
    k <- qnorm(1-alpha/2)
    log.ci <- c(log.rr - k*log.se, log.rr + k*log.se)
    round(exp(log.ci), 2)
}

rr(df)

9.19

rr_ci(df)

1.2
70.53

Odds ratio¶

df2 <- df %>% mutate(odds = ARDS/NoARDS)
df2

ARDS	NoARDS	odds
11	208	0.052884615
1	182	0.005494505

or <- df2$odds[1]/df2$odds[2]
or

9.625

or <- function(tbl) {
    a <- tbl[1,1]
    b <- tbl[1,2]
    c <- tbl[2,1]
    d <- tbl[2,2]
    round((a/b)/(c/d), 2)
}

or_ci <- function(tbl, alpha=0.05) {
    a <- tbl[1,1]
    b <- tbl[1,2]
    c <- tbl[2,1]
    d <- tbl[2,2]
    log.se <- sqrt(1/a + 1/b + 1/c - 1/d)
    log.rr <- log((a/b)/(c/d))
    k <- qnorm(1-alpha/2)
    log.ci <- c(log.rr - k*log.se, log.rr + k*log.se)
    round(exp(log.ci), 2)
}

or(df)

9.62

or_ci(df)

1.24
74.5

Advantages of odds ratio¶

Can be estimated in case-control study
OR is a symmetric ratio in that the OR for the disease given the risk factor is the same as the OR for the risk factor given the disease.
form part of the output when carrying out logistic regression

Attributable risk¶

The proportion of cases in a population that could be prevented if the risk factor were to be eliminated. The AR is the difference between the actual number of cases in a sample and the number of cases that would be expected if exposure to the risk factor were eliminated, expressed as a proportion of the former.

ar <- function(tbl) {
    a <- tbl[1,1]
    b <- tbl[1,2]
    c <- tbl[2,1]
    d <- tbl[2,2]
    n <- a + b + c + d
    ar <- ((a+c) - (n*c)/(c+d))/(a+c)
    round(100*ar, 2)
    }

ar_ci <- function(tbl, alpha=0.05) {
    a <- tbl[1,1]
    b <- tbl[1,2]
    c <- tbl[2,1]
    d <- tbl[2,2]
    n <- a + b + c + d
    k <- qnorm(1-alpha/2)
    u <- (k*(a+c)*(c+d))/(a*d-b*c) * sqrt(((a*d*(n-c) + c^2*b)/(n*c*(a+c)*(c+d))))

    hi <- ((a*d - b*c)*exp(u))/(n*c + (a*d-b*c)*exp(u))
    lo <- ((a*d - b*c)*exp(-u))/(n*c + (a*d-b*c)*exp(-u))
    round(100*c(lo, hi), 2)
}

ar(df)

81.69

ar_ci(df)

31.16
97.78

Risk measurements in clinical trials¶

df3 <- data.frame(survived=c(79, 60), died=c(38, 59),
                  row.names=c("early", "standard"))
df3

	survived	died
early	79	38
standard	60	59

rr(df3)

1.34

or(df3)

2.04

ar(df3)

14.39

Risk difference¶

arr <- function(tbl) {
    a <- tbl[1,1]
    b <- tbl[1,2]
    c <- tbl[2,1]
    d <- tbl[2,2]

    r <- (d/(c+d) - b/(a+b))
    round(100*r, 2)
    }

arr(df3)

17.1

Number needed to treat¶

nnt <- function(df) {
    round(100/arr(df), 0)
}

nnt(df3)

6

Exercise¶

1. Write a function to calculate the confidence intervals of the absolute risk reduction (ARR) given a \(2 \times 2\) table of outcomes. What is the 90% CI for the data given in df3?

2 Write a function to calculate the confidence intervals for the number needed to treat (NNT) 2×2 table of outcomes. What is the 90% CI for the data given in df3?