library(gtsummary)
library(gt)
패키지 소개
데이터 요약 & 회귀분석 결과 테이블 만들기
필요한 패키지 설치
1. Baseline characteristics
- data의 일반 특성 요약
- 집단 별 통계량 표시
기본 테이블
library(gtsummary)
library(gt)
library(survival)
library(flextable)
tbl_summary(
data = trial,
by = trt
) |>
add_overall() |>
as_flex_table()
Characteristic | Overall, N = 2001 | Drug A, N = 981 | Drug B, N = 1021 |
---|---|---|---|
Age | 47 (38, 57) | 46 (37, 59) | 48 (39, 56) |
Unknown | 11 | 7 | 4 |
Marker Level (ng/mL) | 0.64 (0.22, 1.39) | 0.84 (0.24, 1.57) | 0.52 (0.19, 1.20) |
Unknown | 10 | 6 | 4 |
T Stage | |||
T1 | 53 (27%) | 28 (29%) | 25 (25%) |
T2 | 54 (27%) | 25 (26%) | 29 (28%) |
T3 | 43 (22%) | 22 (22%) | 21 (21%) |
T4 | 50 (25%) | 23 (23%) | 27 (26%) |
Grade | |||
I | 68 (34%) | 35 (36%) | 33 (32%) |
II | 68 (34%) | 32 (33%) | 36 (35%) |
III | 64 (32%) | 31 (32%) | 33 (32%) |
Tumor Response | 61 (32%) | 28 (29%) | 33 (34%) |
Unknown | 7 | 3 | 4 |
Patient Died | 112 (56%) | 52 (53%) | 60 (59%) |
Months to Death/Censor | 22.4 (16.0, 24.0) | 23.5 (17.4, 24.0) | 21.2 (14.6, 24.0) |
1Median (IQR); n (%) |
통계량 변경
테이블에 표시되는 숫자의 양식을 변경해줄 수 있습니다. 예를 들어, 연속형(continuous) 변수의 경우 기본적으로 median(IQR)로 표시가 됩니다. tbl_summary()의 statistic
인자를 통해 mean ± sd
형태로 표시할 수 있습니다.
주의해야 할 점은 항상 {}
가 붙은 문자 형태(""
)로 사용해야 한다는 것입니다.
tbl_summary(
data = trial,
by = trt,
include = c(age, marker, stage, grade, response, death, ttdeath),
type = list(
c(stage, grade)~ "categorical"
),
statistic = list(
all_continuous() ~"{mean} ± {sd}",
all_categorical() ~ "{n} ({p})"
),
digits = list(
all_continuous() ~ 1,
all_categorical() ~ c(0,1)
),
missing = "no",
) |>
add_overall() |>
as_flex_table()
Characteristic | Overall, N = 2001 | Drug A, N = 981 | Drug B, N = 1021 |
---|---|---|---|
Age | 47.2 ± 14.3 | 47.0 ± 14.7 | 47.4 ± 14.0 |
Marker Level (ng/mL) | 0.9 ± 0.9 | 1.0 ± 0.9 | 0.8 ± 0.8 |
T Stage | |||
T1 | 53 (26.5) | 28 (28.6) | 25 (24.5) |
T2 | 54 (27.0) | 25 (25.5) | 29 (28.4) |
T3 | 43 (21.5) | 22 (22.4) | 21 (20.6) |
T4 | 50 (25.0) | 23 (23.5) | 27 (26.5) |
Grade | |||
I | 68 (34.0) | 35 (35.7) | 33 (32.4) |
II | 68 (34.0) | 32 (32.7) | 36 (35.3) |
III | 64 (32.0) | 31 (31.6) | 33 (32.4) |
Tumor Response | 61 (31.6) | 28 (29.5) | 33 (33.7) |
Patient Died | 112 (56.0) | 52 (53.1) | 60 (58.8) |
Months to Death/Censor | 19.6 ± 5.3 | 20.2 ± 5.0 | 19.0 ± 5.5 |
1Mean ± SD; n (%) |
P-value
tbl_summary(
data = trial,
by = trt,
include = c(age, marker, stage, grade, response, death, ttdeath),
statistic = list(
all_continuous() ~"{mean} ± {sd}",
# statistic = "{median} ({p25}-{p75})",
all_categorical() ~ "{n} ({p})"
),
digits = list(
all_continuous() ~ 1,
all_categorical() ~ c(0,1)
),
missing = "no",
) |>
add_overall() |>
add_p(
test = list(
all_continuous() ~ "t.test",
all_categorical() ~ "chisq.test"
),
pvalue_fun = ~style_pvalue(., digits = 3)
) |>
as_flex_table()
Characteristic | Overall, N = 2001 | Drug A, N = 981 | Drug B, N = 1021 | p-value2 |
---|---|---|---|---|
Age | 47.2 ± 14.3 | 47.0 ± 14.7 | 47.4 ± 14.0 | 0.834 |
Marker Level (ng/mL) | 0.9 ± 0.9 | 1.0 ± 0.9 | 0.8 ± 0.8 | 0.116 |
T Stage | 0.866 | |||
T1 | 53 (26.5) | 28 (28.6) | 25 (24.5) | |
T2 | 54 (27.0) | 25 (25.5) | 29 (28.4) | |
T3 | 43 (21.5) | 22 (22.4) | 21 (20.6) | |
T4 | 50 (25.0) | 23 (23.5) | 27 (26.5) | |
Grade | 0.871 | |||
I | 68 (34.0) | 35 (35.7) | 33 (32.4) | |
II | 68 (34.0) | 32 (32.7) | 36 (35.3) | |
III | 64 (32.0) | 31 (31.6) | 33 (32.4) | |
Tumor Response | 61 (31.6) | 28 (29.5) | 33 (33.7) | 0.637 |
Patient Died | 112 (56.0) | 52 (53.1) | 60 (58.8) | 0.498 |
Months to Death/Censor | 19.6 ± 5.3 | 20.2 ± 5.0 | 19.0 ± 5.5 | 0.108 |
1Mean ± SD; n (%) | ||||
2Welch Two Sample t-test; Pearson's Chi-squared test |
2. Regression table 만들기
단순 회귀분석(Univariable regression)
단순회귀분석의 경우,
library(survival)
<- tbl_uvregression(
uni_tbl data = trial |> select(age, marker, stage, grade, response, death, ttdeath),
method = coxph,
y = Surv(ttdeath, death),
exponentiate = T,
hide_n = T
|>
) modify_footnote(everything() ~ NA)
|> as_flex_table() uni_tbl
Characteristic | HR1 | 95% CI1 | p-value |
---|---|---|---|
Age | 1.01 | 0.99, 1.02 | 0.3 |
Marker Level (ng/mL) | 0.91 | 0.72, 1.15 | 0.4 |
T Stage | |||
T1 | — | — | |
T2 | 1.18 | 0.68, 2.04 | 0.6 |
T3 | 1.23 | 0.69, 2.20 | 0.5 |
T4 | 2.48 | 1.49, 4.14 | <0.001 |
Grade | |||
I | — | — | |
II | 1.28 | 0.80, 2.05 | 0.3 |
III | 1.69 | 1.07, 2.66 | 0.024 |
Tumor Response | 0.50 | 0.31, 0.78 | 0.003 |
1HR = Hazard Ratio, CI = Confidence Interval |
다변량 회귀분석(Multivariable regression)
다중 회귀분석 테이블을 만드는 함수는 tbl_regression()
입니다. 단순 회귀분석을 만드는 tbl_uvregression()
과 다르게, 먼저 회귀식을 작성한 뒤에 tbl_regression()
의 인자로 사용해야 합니다.
<- coxph(Surv(ttdeath, death) ~ age + marker + stage + grade + response, data = trial)
fit <- tbl_regression(
mult_tbl x = fit,
exponentiate = T,
pvalue_fun = ~ style_pvalue(., digits = 3),
estimate_fun = ~style_ratio(., digits = 2)
|>
) bold_p() |>
# modify_column_merge(pattern = "{estimate} ({conf.low}-{conf.high})",
# rows = !is.na(estimate)) |>
add_significance_stars(hide_ci = T, hide_se = T) |>
modify_header(
label = "**Variable**",
estimate = "**OR (95% CI)**",
p.value = "**P value**"
|>
) modify_footnote(everything() ~ NA)
|> as_flex_table() mult_tbl
Variable | OR (95% CI)1 | P value |
---|---|---|
Age | 1.02* | 0.023 |
Marker Level (ng/mL) | 0.89 | 0.398 |
T Stage | ||
T1 | — | |
T2 | 1.33 | 0.376 |
T3 | 1.60 | 0.183 |
T4 | 3.65*** | <0.001 |
Grade | ||
I | — | |
II | 1.27 | 0.395 |
III | 1.75* | 0.025 |
Tumor Response | 0.41*** | <0.001 |
1HR = Hazard Ratio |
두 개의 테이블 합치기
이제 단순 회귀분석 테이블과 다중 회귀분석 테이블을 합쳐보도록 하겠습니다.
tbl_merge(
list(uni_tbl, mult_tbl),
tab_spanner =
c("**Univariable**", "**Multivariable**")
|>
) as_flex_table()
| Univariable | Multivariable | |||
---|---|---|---|---|---|
Characteristic | HR1 | 95% CI1 | p-value | OR (95% CI)1 | P value |
Age | 1.01 | 0.99, 1.02 | 0.3 | 1.02* | 0.023 |
Marker Level (ng/mL) | 0.91 | 0.72, 1.15 | 0.4 | 0.89 | 0.398 |
T Stage | |||||
T1 | — | — | — | ||
T2 | 1.18 | 0.68, 2.04 | 0.6 | 1.33 | 0.376 |
T3 | 1.23 | 0.69, 2.20 | 0.5 | 1.60 | 0.183 |
T4 | 2.48 | 1.49, 4.14 | <0.001 | 3.65*** | <0.001 |
Grade | |||||
I | — | — | — | ||
II | 1.28 | 0.80, 2.05 | 0.3 | 1.27 | 0.395 |
III | 1.69 | 1.07, 2.66 | 0.024 | 1.75* | 0.025 |
Tumor Response | 0.50 | 0.31, 0.78 | 0.003 | 0.41*** | <0.001 |
1HR = Hazard Ratio, CI = Confidence Interval |
참고자료
https://www.danieldsjoberg.com/gtsummary/reference/tbl_summary.html