Complete Example 2

Program

“Complete Example 1” showed how to create a simple demographics table using the fmtr package and Tidyverse. Here is the same table created with only Base R and the sassy system of packages.

The data for this example has been included in the fmtr package as an external data file. It may be accessed using the system.file() function as shown below, or downloaded directly from the fmtr GitHub site here

library(sassy)

# Prepare Log -------------------------------------------------------------


options("logr.autolog" = TRUE,
        "logr.notes" = FALSE)

# Get temp location for log and report output
tmp <- tempdir()

# Open log
lf <- log_open(file.path(tmp, "example2.log"))


# Load and Prepare Data ---------------------------------------------------

sep("Prepare Data")

# Get path to sample data
pkg <- system.file("extdata", package = "fmtr")

# Define data library
libname(sdtm, pkg, "csv") 

# Prepare data
put("Subset DM dataset")
dm_mod <- subset(sdtm$DM, ARM != "SCREEN FAILURE", 
                 v(USUBJID, SEX, AGE, ARM)) |> put()

put("Get ARM population counts")
arm_pop <- proc_freq(dm_mod, tables = ARM, 
                     output = long,
                     options = v(nocum, nopercent, nonobs))
  

# Create Format Catalog --------------------------------------------------
sep("Create format catalog")

fmts <- fcat(AGECAT = value(condition(x >= 18 & x <= 24, "18 to 24"),
                            condition(x >= 25 & x <= 44, "25 to 44"),
                            condition(x >= 45 & x <= 64, "45 to 64"),
                            condition(x >= 65, ">= 65")),
             SEX = value(condition(is.na(x), "Missing", order = 3),
                         condition(x == "M", "Male", order = 1),
                         condition(x == "F", "Female", order = 2)),
             VAR = c("AGE" = "Age", 
                     "AGECAT" = "Age Group", 
                     "SEX" = "Sex"))

numfmts <- fcat(N = "%d", MEAN = "%.1f", STD = "(%04.2f)", MEDIAN = "%d", Q1 = "%.1f",
                Q3 = "%.1f", MIN = "%d", MAX = "%d", CNT = "%d", PCT = "(%4.1f%%)")

numlbls <- c(N = "n", MEANSD = "Mean (SD)", MEDIAN = "Median", Q1Q3 = "Q1 - Q3",
             MINMAX = "Min - Max")

# Age Summary Block -------------------------------------------------------

sep("Create summary statistics for age")

age_block <- proc_means(dm_mod, stats = v(n, mean, std, median, q1, q3, min, max),
                        class = ARM, options = v(nway, notype, nofreq)) |> 
  datastep(format = numfmts,
           keep = v(CLASS, VAR, N, MEANSD, MEDIAN, Q1Q3, MINMAX),
           {
             
             MEANSD <- fapply2(MEAN, STD)
             Q1Q3 <- fapply2(Q1, Q3, sep = " - ")
             MINMAX <- fapply2(MIN, MAX, sep = " - ")
           }) |> 
  proc_transpose(id = CLASS, copy = VAR, 
                 var = v(N, MEANSD, MEDIAN, Q1Q3, MINMAX),
                 name = "LABEL") |> 
  datastep({LABEL <- fapply(LABEL, numlbls)})
    
  
# Age Group Block ----------------------------------------------------------

sep("Create frequency counts for Age Group")

put("Create age group frequency counts")
ageg_block <- dm_mod |> 
  datastep({AGECAT <- fapply(AGE, fmts$AGECAT)}) |> 
  proc_freq(tables = AGECAT, by = ARM,
            options = nonobs) |> 
  datastep(format = numfmts,
           keep = v(VAR, BY, LABEL, CNTPCT),
           { 
             LABEL <- CAT
             CNTPCT <- fapply2(CNT, PCT)
           }) |> 
  proc_transpose(var = CNTPCT, by = LABEL, copy = VAR, id = BY, options = noname)

put("Sort age groups as desired")
ageg_block$LABEL <- factor(ageg_block$LABEL, levels = levels(fmts$AGECAT))
ageg_block <- proc_sort(ageg_block, by = LABEL, as.character = TRUE)


# Sex Block ---------------------------------------------------------------

sep("Create frequency counts for SEX")

put("Create sex frequency counts")
sex_block <- dm_mod |> 
  datastep({SEX <- fapply(SEX, fmts$SEX)}) |> 
  proc_freq(tables = SEX, by = ARM,
            options = nonobs) |> 
  datastep(format = numfmts,
           keep = v(VAR, BY, LABEL, CNTPCT),
           { 
             LABEL <- CAT
             CNTPCT <- fapply2(CNT, PCT)
           }) |> 
  proc_transpose(var = CNTPCT, by = LABEL, copy = VAR, id = BY, options = noname)

put("Sort age groups as desired")
sex_block$LABEL <- factor(sex_block$LABEL, levels = levels(fmts$SEX))
sex_block <- proc_sort(sex_block, by = LABEL, as.character = TRUE)


put("Combine blocks into final data frame")
final <- bind_rows(age_block, ageg_block, sex_block) |> put()

# Report ------------------------------------------------------------------


sep("Create and print report")

# Create Table
tbl <- create_table(final, first_row_blank = TRUE, borders = c("top", "bottom")) |> 
  column_defaults(from = `ARM A`, to = `ARM D`, align = "center", width = 1.25) |> 
  stub(vars = v(VAR, LABEL), "Variable", width = 2.5) |> 
  define(VAR, blank_after = TRUE, dedupe = TRUE, label = "Variable",
         format = fmts$VAR,label_row = TRUE) |> 
  define(LABEL, indent = .25, label = "Demographic Category") |> 
  define(`ARM A`,  label = "Treatment Group 1", n = arm_pop["ARM A"]) |> 
  define(`ARM B`,  label = "Treatment Group 2", n = arm_pop["ARM B"]) |> 
  define(`ARM C`,  label = "Treatment Group 3", n = arm_pop["ARM C"]) |> 
  define(`ARM D`,  label = "Treatment Group 4", n = arm_pop["ARM D"]) 

rpt <- create_report(file.path(tmp, "output/example2.rtf"), 
                     output_type = "RTF", font = "Arial") |> 
  set_margins(top = 1, bottom = 1) |> 
  page_header("Sponsor: Company", "Study: ABC") |> 
  titles("Table 1.0", bold = TRUE, blank_row = "none") |> 
  titles("Analysis of Demographic Characteristics", 
         "Safety Population") |> 
  add_content(tbl) |> 
  footnotes("Program: DM_Table.R",
            "NOTE: Denominator based on number of non-missing responses.") |> 
  page_footer(paste0("Date Produced: ", fapply(Sys.time(), "%d%b%y %H:%M")), 
              right = "Page [pg] of [tpg]")

res <- write_report(rpt)


# Clean Up ----------------------------------------------------------------
sep("Clean Up")

# Close log
log_close()

# View report
# file.show(res$modified_path)

# View log
# file.show(lf)

Output

Here is the report produced by the above sample program:

Log

Here is the log produced by the above sample program:

=========================================================================
Log Path: C:/Users/dbosa/AppData/Local/Temp/RtmpKc23Rz/log/example2.log
Program Path: C:/Users/dbosa/Documents/.active-rstudio-document
Working Directory: C:/Projects/Archytas/Sassy/Code
User Name: dbosa
R Version: 4.3.2 (2023-10-31 ucrt)
Machine: SOCRATES x86-64
Operating System: Windows 10 x64 build 22621
Base Packages: stats graphics grDevices utils datasets methods base Other
Packages: tidylog_1.0.2 reporter_1.4.4 logr_1.3.5 sassy_1.2.1 lubridate_1.9.2
forcats_1.0.0 stringr_1.5.0 purrr_1.0.1 readr_2.1.4 tidyr_1.3.0 tibble_3.2.1
ggplot2_3.4.4 tidyverse_2.0.0 libr_1.2.8 dplyr_1.1.3 fmtr_1.6.2 procs_1.0.4
common_1.1.1
Log Start Time: 2024-01-07 18:26:25.636328
=========================================================================

=========================================================================
Prepare Data
=========================================================================

# library 'sdtm': 1 items
- attributes: csv not loaded
- path: C:/Users/dbosa/AppData/Local/R/win-library/4.3/fmtr/extdata
- items:
  Name Extension Rows Cols    Size        LastModified
1   DM       csv   87   24 45.5 Kb 2023-12-16 23:10:51

Subset DM dataset

# A tibble: 85 × 4
   USUBJID    SEX     AGE ARM  
   <chr>      <chr> <dbl> <chr>
 1 ABC-01-049 M        39 ARM D
 2 ABC-01-050 M        47 ARM B
 3 ABC-01-051 M        34 ARM A
 4 ABC-01-052 F        45 ARM C
 5 ABC-01-053 F        26 ARM B
 6 ABC-01-054 M        44 ARM D
 7 ABC-01-055 F        47 ARM C
 8 ABC-01-056 M        31 ARM A
 9 ABC-01-113 M        74 ARM D
10 ABC-01-114 F        72 ARM B
# ℹ 75 more rows
# ℹ Use `print(n = ...)` to see more rows

Get ARM population counts

proc_freq: input data set 85 rows and 4 columns
           tables: ARM
           output: long
           view: TRUE
           output: 1 datasets

# A tibble: 1 × 6
  VAR   STAT  `ARM A` `ARM B` `ARM C` `ARM D`
  <chr> <chr>   <dbl>   <dbl>   <dbl>   <dbl>
1 ARM   CNT        20      21      21      23

=========================================================================
Create format catalog
=========================================================================

# A user-defined format: 4 conditions
  Name Type        Expression    Label Order
1  obj    U x >= 18 & x <= 24 18 to 24    NA
2  obj    U x >= 25 & x <= 44 25 to 44    NA
3  obj    U x >= 45 & x <= 64 45 to 64    NA
4  obj    U           x >= 65    >= 65    NA

# A user-defined format: 3 conditions
  Name Type Expression   Label Order
1  obj    U   is.na(x) Missing     3
2  obj    U   x == "M"    Male     1
3  obj    U   x == "F"  Female     2

# A format catalog: 3 formats
- $AGECAT: type U, 4 conditions
- $SEX: type U, 3 conditions
- $VAR: type V, 3 elements

# A format catalog: 10 formats
- $N: type S, "%d"
- $MEAN: type S, "%.1f"
- $STD: type S, "(%04.2f)"
- $MEDIAN: type S, "%d"
- $Q1: type S, "%.1f"
- $Q3: type S, "%.1f"
- $MIN: type S, "%d"
- $MAX: type S, "%d"
- $CNT: type S, "%d"
- $PCT: type S, "(%4.1f%%)"

=========================================================================
Create summary statistics for age
=========================================================================

proc_means: input data set 85 rows and 4 columns
            class: ARM
            var: AGE
            stats: n mean std median q1 q3 min max
            view: TRUE
            output: 1 datasets

  CLASS VAR  N     MEAN      STD MEDIAN   Q1 Q3 MIN MAX
1 ARM A AGE 20 53.15000 11.89991   52.5 47.5 60  31  73
2 ARM B AGE 21 47.38095 16.25877   46.0 35.0 61  22  73
3 ARM C AGE 21 45.71429 14.41923   46.0 38.0 53  19  71
4 ARM D AGE 23 49.73913 14.32486   48.0 39.0 62  21  75

datastep: columns decreased from 10 to 7

  CLASS VAR  N       MEANSD MEDIAN        Q1Q3  MINMAX
1 ARM A AGE 20 53.1 (11.90)   52.5 47.5 - 60.0 31 - 73
2 ARM B AGE 21 47.4 (16.26)   46.0 35.0 - 61.0 22 - 73
3 ARM C AGE 21 45.7 (14.42)   46.0 38.0 - 53.0 19 - 71
4 ARM D AGE 23 49.7 (14.32)   48.0 39.0 - 62.0 21 - 75

proc_transpose: input data set 4 rows and 7 columns
                var: N MEANSD MEDIAN Q1Q3 MINMAX
                id: CLASS
                copy: VAR
                name: LABEL
                output dataset 5 rows and 6 columns

  VAR  LABEL        ARM A        ARM B        ARM C        ARM D
1 AGE      N           20           21           21           23
2 AGE MEANSD 53.1 (11.90) 47.4 (16.26) 45.7 (14.42) 49.7 (14.32)
3 AGE MEDIAN         52.5         46.0         46.0         48.0
4 AGE   Q1Q3  47.5 - 60.0  35.0 - 61.0  38.0 - 53.0  39.0 - 62.0
5 AGE MINMAX      31 - 73      22 - 73      19 - 71      21 - 75

datastep: columns started with 6 and ended with 6

  VAR     LABEL        ARM A        ARM B        ARM C        ARM D
1 AGE         n           20           21           21           23
2 AGE Mean (SD) 53.1 (11.90) 47.4 (16.26) 45.7 (14.42) 49.7 (14.32)
3 AGE    Median         52.5         46.0         46.0         48.0
4 AGE   Q1 - Q3  47.5 - 60.0  35.0 - 61.0  38.0 - 53.0  39.0 - 62.0
5 AGE Min - Max      31 - 73      22 - 73      19 - 71      21 - 75

=========================================================================
Create frequency counts for Age Group
=========================================================================

Create age group frequency counts

datastep: columns increased from 4 to 5

# A tibble: 85 × 5
   USUBJID    SEX     AGE ARM   AGECAT  
   <chr>      <chr> <dbl> <chr> <chr>   
 1 ABC-01-049 M        39 ARM D 25 to 44
 2 ABC-01-050 M        47 ARM B 45 to 64
 3 ABC-01-051 M        34 ARM A 25 to 44
 4 ABC-01-052 F        45 ARM C 45 to 64
 5 ABC-01-053 F        26 ARM B 25 to 44
 6 ABC-01-054 M        44 ARM D 25 to 44
 7 ABC-01-055 F        47 ARM C 45 to 64
 8 ABC-01-056 M        31 ARM A 25 to 44
 9 ABC-01-113 M        74 ARM D >= 65   
10 ABC-01-114 F        72 ARM B >= 65   
# ℹ 75 more rows
# ℹ Use `print(n = ...)` to see more rows

proc_freq: input data set 85 rows and 5 columns
           tables: AGECAT
           by: ARM
           view: TRUE
           output: 1 datasets

# A tibble: 16 × 5
   BY    VAR    CAT        CNT   PCT
   <chr> <chr>  <chr>    <dbl> <dbl>
 1 ARM A AGECAT >= 65        3 15   
 2 ARM A AGECAT 18 to 24     0  0   
 3 ARM A AGECAT 25 to 44     4 20   
 4 ARM A AGECAT 45 to 64    13 65   
 5 ARM B AGECAT >= 65        5 23.8 
 6 ARM B AGECAT 18 to 24     1  4.76
 7 ARM B AGECAT 25 to 44     8 38.1 
 8 ARM B AGECAT 45 to 64     7 33.3 
 9 ARM C AGECAT >= 65        2  9.52
10 ARM C AGECAT 18 to 24     3 14.3 
11 ARM C AGECAT 25 to 44     4 19.0 
12 ARM C AGECAT 45 to 64    12 57.1 
13 ARM D AGECAT >= 65        3 13.0 
14 ARM D AGECAT 18 to 24     1  4.35
15 ARM D AGECAT 25 to 44     7 30.4 
16 ARM D AGECAT 45 to 64    12 52.2 

datastep: columns decreased from 5 to 4

# A tibble: 16 × 4
   VAR    BY    LABEL    CNTPCT    
   <chr>  <chr> <chr>    <chr>     
 1 AGECAT ARM A >= 65    3 (15.0%) 
 2 AGECAT ARM A 18 to 24 0 ( 0.0%) 
 3 AGECAT ARM A 25 to 44 4 (20.0%) 
 4 AGECAT ARM A 45 to 64 13 (65.0%)
 5 AGECAT ARM B >= 65    5 (23.8%) 
 6 AGECAT ARM B 18 to 24 1 ( 4.8%) 
 7 AGECAT ARM B 25 to 44 8 (38.1%) 
 8 AGECAT ARM B 45 to 64 7 (33.3%) 
 9 AGECAT ARM C >= 65    2 ( 9.5%) 
10 AGECAT ARM C 18 to 24 3 (14.3%) 
11 AGECAT ARM C 25 to 44 4 (19.0%) 
12 AGECAT ARM C 45 to 64 12 (57.1%)
13 AGECAT ARM D >= 65    3 (13.0%) 
14 AGECAT ARM D 18 to 24 1 ( 4.3%) 
15 AGECAT ARM D 25 to 44 7 (30.4%) 
16 AGECAT ARM D 45 to 64 12 (52.2%)

proc_transpose: input data set 16 rows and 4 columns
                by: LABEL
                var: CNTPCT
                id: BY
                copy: VAR
                name: NAME
                output dataset 4 rows and 6 columns

# A tibble: 4 × 6
  VAR    LABEL    `ARM A`    `ARM B`   `ARM C`    `ARM D`   
  <chr>  <chr>    <chr>      <chr>     <chr>      <chr>     
1 AGECAT >= 65    3 (15.0%)  5 (23.8%) 2 ( 9.5%)  3 (13.0%) 
2 AGECAT 18 to 24 0 ( 0.0%)  1 ( 4.8%) 3 (14.3%)  1 ( 4.3%) 
3 AGECAT 25 to 44 4 (20.0%)  8 (38.1%) 4 (19.0%)  7 (30.4%) 
4 AGECAT 45 to 64 13 (65.0%) 7 (33.3%) 12 (57.1%) 12 (52.2%)

Sort age groups as desired

proc_sort: input data set 4 rows and 6 columns
           by: LABEL
           keep: VAR LABEL ARM A ARM B ARM C ARM D
           order: a
           output data set 4 rows and 6 columns

# A tibble: 4 × 6
  VAR    LABEL    `ARM A`    `ARM B`   `ARM C`    `ARM D`   
  <chr>  <chr>    <chr>      <chr>     <chr>      <chr>     
1 AGECAT 18 to 24 0 ( 0.0%)  1 ( 4.8%) 3 (14.3%)  1 ( 4.3%) 
2 AGECAT 25 to 44 4 (20.0%)  8 (38.1%) 4 (19.0%)  7 (30.4%) 
3 AGECAT 45 to 64 13 (65.0%) 7 (33.3%) 12 (57.1%) 12 (52.2%)
4 AGECAT >= 65    3 (15.0%)  5 (23.8%) 2 ( 9.5%)  3 (13.0%) 

=========================================================================
Create frequency counts for SEX
=========================================================================

Create sex frequency counts

datastep: columns started with 4 and ended with 4

# A tibble: 85 × 4
   USUBJID    SEX      AGE ARM  
   <chr>      <chr>  <dbl> <chr>
 1 ABC-01-049 Male      39 ARM D
 2 ABC-01-050 Male      47 ARM B
 3 ABC-01-051 Male      34 ARM A
 4 ABC-01-052 Female    45 ARM C
 5 ABC-01-053 Female    26 ARM B
 6 ABC-01-054 Male      44 ARM D
 7 ABC-01-055 Female    47 ARM C
 8 ABC-01-056 Male      31 ARM A
 9 ABC-01-113 Male      74 ARM D
10 ABC-01-114 Female    72 ARM B
# ℹ 75 more rows
# ℹ Use `print(n = ...)` to see more rows

proc_freq: input data set 85 rows and 4 columns
           tables: SEX
           by: ARM
           view: TRUE
           output: 1 datasets

# A tibble: 8 × 5
  BY    VAR   CAT      CNT   PCT
  <chr> <chr> <chr>  <dbl> <dbl>
1 ARM A SEX   Female     5  25  
2 ARM A SEX   Male      15  75  
3 ARM B SEX   Female    11  52.4
4 ARM B SEX   Male      10  47.6
5 ARM C SEX   Female     9  42.9
6 ARM C SEX   Male      12  57.1
7 ARM D SEX   Female     7  30.4
8 ARM D SEX   Male      16  69.6

datastep: columns decreased from 5 to 4

# A tibble: 8 × 4
  VAR   BY    LABEL  CNTPCT    
  <chr> <chr> <chr>  <chr>     
1 SEX   ARM A Female 5 (25.0%) 
2 SEX   ARM A Male   15 (75.0%)
3 SEX   ARM B Female 11 (52.4%)
4 SEX   ARM B Male   10 (47.6%)
5 SEX   ARM C Female 9 (42.9%) 
6 SEX   ARM C Male   12 (57.1%)
7 SEX   ARM D Female 7 (30.4%) 
8 SEX   ARM D Male   16 (69.6%)

proc_transpose: input data set 8 rows and 4 columns
                by: LABEL
                var: CNTPCT
                id: BY
                copy: VAR
                name: NAME
                output dataset 2 rows and 6 columns

# A tibble: 2 × 6
  VAR   LABEL  `ARM A`    `ARM B`    `ARM C`    `ARM D`   
  <chr> <chr>  <chr>      <chr>      <chr>      <chr>     
1 SEX   Female 5 (25.0%)  11 (52.4%) 9 (42.9%)  7 (30.4%) 
2 SEX   Male   15 (75.0%) 10 (47.6%) 12 (57.1%) 16 (69.6%)

Sort age groups as desired

proc_sort: input data set 2 rows and 6 columns
           by: LABEL
           keep: VAR LABEL ARM A ARM B ARM C ARM D
           order: a
           output data set 2 rows and 6 columns

# A tibble: 2 × 6
  VAR   LABEL  `ARM A`    `ARM B`    `ARM C`    `ARM D`   
  <chr> <chr>  <chr>      <chr>      <chr>      <chr>     
1 SEX   Male   15 (75.0%) 10 (47.6%) 12 (57.1%) 16 (69.6%)
2 SEX   Female 5 (25.0%)  11 (52.4%) 9 (42.9%)  7 (30.4%) 

Combine blocks into final data frame

      VAR     LABEL        ARM A        ARM B        ARM C        ARM D
1     AGE         n           20           21           21           23
2     AGE Mean (SD) 53.1 (11.90) 47.4 (16.26) 45.7 (14.42) 49.7 (14.32)
3     AGE    Median         52.5         46.0         46.0         48.0
4     AGE   Q1 - Q3  47.5 - 60.0  35.0 - 61.0  38.0 - 53.0  39.0 - 62.0
5     AGE Min - Max      31 - 73      22 - 73      19 - 71      21 - 75
6  AGECAT  18 to 24    0 ( 0.0%)    1 ( 4.8%)    3 (14.3%)    1 ( 4.3%)
7  AGECAT  25 to 44    4 (20.0%)    8 (38.1%)    4 (19.0%)    7 (30.4%)
8  AGECAT  45 to 64   13 (65.0%)    7 (33.3%)   12 (57.1%)   12 (52.2%)
9  AGECAT     >= 65    3 (15.0%)    5 (23.8%)    2 ( 9.5%)    3 (13.0%)
10    SEX      Male   15 (75.0%)   10 (47.6%)   12 (57.1%)   16 (69.6%)
11    SEX    Female    5 (25.0%)   11 (52.4%)    9 (42.9%)    7 (30.4%)

=========================================================================
Create and print report
=========================================================================

# A report specification: 1 pages
- file_path: 'C:\Users\dbosa\AppData\Local\Temp\RtmpKc23Rz/output/example2.rtf'
- output_type: RTF
- units: inches
- orientation: landscape
- margins: top 1 bottom 1 left 1 right 1
- line size/count: 9/36
- page_header: left=Sponsor: Company right=Study: ABC
- title 1: 'Table 1.0'
- title 2: 'Analysis of Demographic Characteristics'
- title 3: 'Safety Population'
- footnote 1: 'Program: DM_Table.R'
- footnote 2: 'NOTE: Denominator based on number of non-missing responses.'
- page_footer: left=Date Produced: 07Jan24 18:26 center= right=Page [pg] of [tpg]
- content: 
# A table specification:
- data: data.frame 'final' 11 rows 6 cols
- show_cols: all
- use_attributes: all
- stub: VAR LABEL 'Variable' width=2.5 align='left' 
- define: VAR 'Variable' dedupe='TRUE' 
- define: LABEL 'Demographic Category' 
- define: ARM A 'Treatment Group 1' 
- define: ARM B 'Treatment Group 2' 
- define: ARM C 'Treatment Group 3' 
- define: ARM D 'Treatment Group 4' 

=========================================================================
Clean Up
=========================================================================

=========================================================================
Log End Time: 2024-01-07 18:26:27.426253
Log Elapsed Time: 0 00:00:01
=========================================================================

mirror server hosted at Truenetwork, Russian Federation.