# Install and load necessary packages (if needed)
if ( ! require( dplyr) ) { install.packages ( "dplyr" ) }
if ( ! require( ggplot2) ) { install.packages ( "ggplot2" ) }
if ( ! require( DescTools) ) { install.packages ( "DescTools" ) }
library( dplyr)
library( ggplot2)
library( DescTools)
# Assuming your dataset is named 'surgery_data'
# (Adapt this to your actual dataset name)
# Example dataset using built in:
surgery_data <- airquality
surgery_data$mort30 <- sample( c( 0 , 1 ) , nrow( surgery_data) , replace = T)
surgery_data$categorical1 <- sample( c( "A" , "B" , "C" ) , nrow( surgery_data) , replace = T)
surgery_data$categorical2 <- sample( c( "X" , "Y" ) , nrow( surgery_data) , replace = T)
# Get variable names
variable_names <- names( surgery_data)
# Remove the outcome variable mort30 from the list and any categorical variables that should not be included
variable_names <- variable_names[ variable_names != "mort30" ]
categorical_variables_exclude <- c( "categorical1" , "categorical2" )
# Loop through each variable
for ( variable in variable_names) {
cat( "\n ---------------------------------------------------\n " )
cat( "Analyzing variable:" , variable, "\n " )
# Check if the variable is categorical (character or factor)
if ( is.character ( surgery_data[ [ variable] ] ) || is.factor ( surgery_data[ [ variable] ] ) || variable % in% categorical_variables_exclude) {
cat( " Type: Categorical\n " )
# Calculate proportions of mort30 within each category
prop <- surgery_data %>%
group_by( .data [ [ variable] ] ) %>%
summarize(
mort30_prop = mean( mort30, na.rm = TRUE) , # Proportion of mort30=1
n = n( ) # Number of records in each category
)
print( prop)
# Visualize using a bar chart (optional)
p <- ggplot( prop, aes( x = .data [ [ variable] ] , y= mort30_prop) ) +
geom_col( ) +
labs ( title
= paste
( "Proportion of mort30 for " , variable
, "Categories" ) , x = variable,
y = "Proportion mort30:1" )
print( p)
# perform Chi-square test
xtab <- table( surgery_data[ [ variable] ] , surgery_data$mort30)
if ( min( xtab) >= 10 ) {
chi_test <- chisq.test ( xtab)
print( chi_test)
} else if ( min( xtab) > 0 ) {
print( fisher.test ( xtab) )
} else {
print( "Not sufficient data to perform association test" )
}
} else if ( is.numeric ( surgery_data[ [ variable] ] ) ) { # Check if the variable is continuous
cat( " Type: Continuous\n " )
# Calculate summary statistics related to mort30=0 and mort30=1
summ_by_mort<- surgery_data %>%
group_by( mort30) %>%
summarize(
mean = mean( .data [ [ variable] ] , na.rm = TRUE) ,
median = median( .data [ [ variable] ] , na.rm = TRUE) ,
sd = sd( .data [ [ variable] ] , na.rm = TRUE) ,
min = min( .data [ [ variable] ] , na.rm = TRUE) ,
max = max( .data [ [ variable] ] , na.rm = TRUE) ,
IQR= IQR( .data [ [ variable] ] , na.rm = TRUE) ,
n = n( )
)
print( summ_by_mort)
# Visualize using boxplots
p <- ggplot( surgery_data, aes( x = factor( mort30) , y = .data [ [ variable] ] ) ) +
geom_boxplot( ) +
labs ( title
= paste
( "Box Plot of" , variable
, "by mort30" ) , x = 'mort30' ,
y = variable)
print( p)
# perform T or Mann-Whitney test
if ( nrow( surgery_data[ surgery_data$mort30 == 0 , ] ) > 10 &&
nrow( surgery_data[ surgery_data$mort30 == 1 , ] ) > 10 ) {
normality <- shapiro.test ( surgery_data[ [ variable] ] )
if ( normality$p.value > 0.05 ) {
t_test <- t.test ( .data [ [ variable] ] ~mort30, data = surgery_data)
print( t_test)
} else {
mw_test<- wilcox.test ( .data [ [ variable] ] ~mort30, data = surgery_data)
print( mw_test)
}
} else {
print( "Not sufficient data to perform association tests for continuous variables" )
}
} else {
cat( " Type: Not Categorical or Continuous, check data\n " )
}
}
IyBJbnN0YWxsIGFuZCBsb2FkIG5lY2Vzc2FyeSBwYWNrYWdlcyAoaWYgbmVlZGVkKQppZighcmVxdWlyZShkcGx5cikpe2luc3RhbGwucGFja2FnZXMoImRwbHlyIil9CmlmKCFyZXF1aXJlKGdncGxvdDIpKXtpbnN0YWxsLnBhY2thZ2VzKCJnZ3Bsb3QyIil9CmlmKCFyZXF1aXJlKERlc2NUb29scykpe2luc3RhbGwucGFja2FnZXMoIkRlc2NUb29scyIpfQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkoRGVzY1Rvb2xzKQoKCiMgQXNzdW1pbmcgeW91ciBkYXRhc2V0IGlzIG5hbWVkICdzdXJnZXJ5X2RhdGEnIAojIChBZGFwdCB0aGlzIHRvIHlvdXIgYWN0dWFsIGRhdGFzZXQgbmFtZSkKIyBFeGFtcGxlIGRhdGFzZXQgdXNpbmcgYnVpbHQgaW46CnN1cmdlcnlfZGF0YSA8LSBhaXJxdWFsaXR5CnN1cmdlcnlfZGF0YSRtb3J0MzAgPC0gc2FtcGxlKGMoMCwxKSxucm93KHN1cmdlcnlfZGF0YSksIHJlcGxhY2UgPSBUKQpzdXJnZXJ5X2RhdGEkY2F0ZWdvcmljYWwxIDwtIHNhbXBsZShjKCJBIiwiQiIsIkMiKSwgbnJvdyhzdXJnZXJ5X2RhdGEpLCByZXBsYWNlID0gVCkKc3VyZ2VyeV9kYXRhJGNhdGVnb3JpY2FsMiA8LSBzYW1wbGUoYygiWCIsIlkiKSwgbnJvdyhzdXJnZXJ5X2RhdGEpLCByZXBsYWNlID0gVCkKCgojIEdldCB2YXJpYWJsZSBuYW1lcyAKdmFyaWFibGVfbmFtZXMgPC0gbmFtZXMoc3VyZ2VyeV9kYXRhKQoKIyBSZW1vdmUgdGhlIG91dGNvbWUgdmFyaWFibGUgbW9ydDMwIGZyb20gdGhlIGxpc3QgYW5kIGFueSBjYXRlZ29yaWNhbCB2YXJpYWJsZXMgdGhhdCBzaG91bGQgbm90IGJlIGluY2x1ZGVkCnZhcmlhYmxlX25hbWVzIDwtIHZhcmlhYmxlX25hbWVzW3ZhcmlhYmxlX25hbWVzICE9ICJtb3J0MzAiIF0gIApjYXRlZ29yaWNhbF92YXJpYWJsZXNfZXhjbHVkZSA8LSBjKCJjYXRlZ29yaWNhbDEiLCAiY2F0ZWdvcmljYWwyIikKCgoKIyBMb29wIHRocm91Z2ggZWFjaCB2YXJpYWJsZQpmb3IodmFyaWFibGUgaW4gdmFyaWFibGVfbmFtZXMpewogIGNhdCgiXG4tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS1cbiIpCiAgY2F0KCJBbmFseXppbmcgdmFyaWFibGU6IiwgdmFyaWFibGUsICJcbiIpCiAgCiAgIyBDaGVjayBpZiB0aGUgdmFyaWFibGUgaXMgY2F0ZWdvcmljYWwgKGNoYXJhY3RlciBvciBmYWN0b3IpCiAgaWYoaXMuY2hhcmFjdGVyKHN1cmdlcnlfZGF0YVtbdmFyaWFibGVdXSkgfHwgaXMuZmFjdG9yKHN1cmdlcnlfZGF0YVtbdmFyaWFibGVdXSkgfHwgdmFyaWFibGUgJWluJSBjYXRlZ29yaWNhbF92YXJpYWJsZXNfZXhjbHVkZSl7IAogICAgY2F0KCIgIFR5cGU6IENhdGVnb3JpY2FsXG4iKQogICAgCiAgICAgICMgQ2FsY3VsYXRlIHByb3BvcnRpb25zIG9mIG1vcnQzMCB3aXRoaW4gZWFjaCBjYXRlZ29yeQogICAgICBwcm9wIDwtIHN1cmdlcnlfZGF0YSAlPiUKICAgICAgICBncm91cF9ieSguZGF0YVtbdmFyaWFibGVdXSkgJT4lCiAgICAgICAgc3VtbWFyaXplKAogICAgICAgICAgbW9ydDMwX3Byb3AgPSBtZWFuKG1vcnQzMCwgbmEucm0gPSBUUlVFKSwgIyBQcm9wb3J0aW9uIG9mIG1vcnQzMD0xCiAgICAgICAgICBuID0gbigpICMgTnVtYmVyIG9mIHJlY29yZHMgaW4gZWFjaCBjYXRlZ29yeQogICAgICAgICkKCiAgICAgICAgcHJpbnQocHJvcCkKCiAgICAgICAgIyBWaXN1YWxpemUgdXNpbmcgYSBiYXIgY2hhcnQgKG9wdGlvbmFsKQogICAgICAgIHAgPC0gZ2dwbG90KHByb3AsIGFlcyh4ID0gLmRhdGFbW3ZhcmlhYmxlXV0sIHk9IG1vcnQzMF9wcm9wKSkgKyAKICAgICAgICAgICAgICAgZ2VvbV9jb2woKSArIAogICAgICAgICAgICAgICAgbGFicyh0aXRsZT0gcGFzdGUoIlByb3BvcnRpb24gb2YgbW9ydDMwIGZvciAiLCB2YXJpYWJsZSwgIkNhdGVnb3JpZXMiKSwKICAgICAgICAgICAgICAgICAgICB4ID0gdmFyaWFibGUsCiAgICAgICAgICAgICAgICAgICAgeSA9ICJQcm9wb3J0aW9uIG1vcnQzMDoxIikgCiAgICAgICAgcHJpbnQocCkgCiAgICAgICAKICAgICAgICAjIHBlcmZvcm0gQ2hpLXNxdWFyZSB0ZXN0CiAgICAgICAgeHRhYiA8LSB0YWJsZShzdXJnZXJ5X2RhdGFbW3ZhcmlhYmxlXV0sIHN1cmdlcnlfZGF0YSRtb3J0MzApCiAgICAgICAgaWYobWluKHh0YWIpID49MTApewogICAgICAgICAgY2hpX3Rlc3QgPC0gY2hpc3EudGVzdCh4dGFiKQogICAgICAgICAgcHJpbnQoY2hpX3Rlc3QpCiAgICAgICAgfSBlbHNlIGlmKG1pbih4dGFiKT4wKXsKICAgICAgICAgICAgICBwcmludChmaXNoZXIudGVzdCh4dGFiKSkKICAgICAgICB9IGVsc2V7CiAgICAgICAgICBwcmludCgiTm90IHN1ZmZpY2llbnQgZGF0YSB0byBwZXJmb3JtIGFzc29jaWF0aW9uIHRlc3QiKQoKICAgICAgICB9CgogIH0gZWxzZSBpZihpcy5udW1lcmljKHN1cmdlcnlfZGF0YVtbdmFyaWFibGVdXSkpeyAjIENoZWNrIGlmIHRoZSB2YXJpYWJsZSBpcyBjb250aW51b3VzCiAgICBjYXQoIiAgVHlwZTogQ29udGludW91c1xuIikKICAgICMgQ2FsY3VsYXRlIHN1bW1hcnkgc3RhdGlzdGljcyByZWxhdGVkIHRvIG1vcnQzMD0wIGFuZCBtb3J0MzA9MQogICAgICAgIHN1bW1fYnlfbW9ydDwtIHN1cmdlcnlfZGF0YSAlPiUgCiAgICAgICAgICBncm91cF9ieShtb3J0MzApICU+JSAKICAgICAgICAgIHN1bW1hcml6ZSgKICAgICAgICAgICAgbWVhbiA9IG1lYW4oLmRhdGFbW3ZhcmlhYmxlXV0sIG5hLnJtID0gVFJVRSksCiAgICAgICAgICAgIG1lZGlhbiA9IG1lZGlhbiguZGF0YVtbdmFyaWFibGVdXSwgbmEucm0gPSBUUlVFKSwKICAgICAgICAgICAgc2QgPSBzZCguZGF0YVtbdmFyaWFibGVdXSwgbmEucm0gPSBUUlVFKSwKICAgICAgICAgICAgbWluID0gbWluKC5kYXRhW1t2YXJpYWJsZV1dLCBuYS5ybSA9IFRSVUUpLAogICAgICAgICAgICBtYXggPSBtYXgoLmRhdGFbW3ZhcmlhYmxlXV0sIG5hLnJtID0gVFJVRSksCiAgICAgICAgICAgIElRUj0gSVFSKC5kYXRhW1t2YXJpYWJsZV1dLCBuYS5ybSA9IFRSVUUpLAogICAgICAgICAgICAgIG4gPW4oKQogICAgICAgICAgKQogICAgICAgIHByaW50KHN1bW1fYnlfbW9ydCkKICAgIAogICAgICAgICMgVmlzdWFsaXplIHVzaW5nIGJveHBsb3RzCiAgICAgICBwIDwtIGdncGxvdChzdXJnZXJ5X2RhdGEsIGFlcyh4ID0gZmFjdG9yKG1vcnQzMCksIHkgPSAuZGF0YVtbdmFyaWFibGVdXSkpICsKICAgICAgICBnZW9tX2JveHBsb3QoKSArCiAgICAgICAgbGFicyh0aXRsZSA9IHBhc3RlKCJCb3ggUGxvdCBvZiIsIHZhcmlhYmxlLCAiYnkgbW9ydDMwIiksCiAgICAgICAgICAgICB4ID0gJ21vcnQzMCcsCiAgICAgICAgICAgICB5ID0gdmFyaWFibGUpCiAgICAgICAgcHJpbnQocCkKCiAgICAgICAgIyBwZXJmb3JtIFQgb3IgTWFubi1XaGl0bmV5IHRlc3QKICAgICAgICBpZihucm93KHN1cmdlcnlfZGF0YVtzdXJnZXJ5X2RhdGEkbW9ydDMwID09MCxdKSA+MTAgJiYKICAgICAgICAgICBucm93KHN1cmdlcnlfZGF0YVtzdXJnZXJ5X2RhdGEkbW9ydDMwID09MSxdKSA+MTApeyAgIAogICAgICAgICAgIG5vcm1hbGl0eSA8LSBzaGFwaXJvLnRlc3Qoc3VyZ2VyeV9kYXRhW1t2YXJpYWJsZV1dKQogICAgICAgICAgICBpZihub3JtYWxpdHkkcC52YWx1ZSA+MC4wNSl7CiAgICAgICAgICAgICB0X3Rlc3QgPC0gdC50ZXN0KC5kYXRhW1t2YXJpYWJsZV1dfm1vcnQzMCwgZGF0YSA9IHN1cmdlcnlfZGF0YSkKICAgICAgICAgICAgICAgIHByaW50KHRfdGVzdCkKICAgICAgICAgICAgfSBlbHNlIHsKICAgICAgICAgICAgIG13X3Rlc3Q8LSB3aWxjb3gudGVzdCguZGF0YVtbdmFyaWFibGVdXX5tb3J0MzAsIGRhdGEgPSBzdXJnZXJ5X2RhdGEpCiAgICAgICAgICAgICAgcHJpbnQobXdfdGVzdCkKICAgICAgICAgICAgfQogICAgICAgICAgfSBlbHNlIHsKICAgICAgICAgICAgICBwcmludCgiTm90IHN1ZmZpY2llbnQgZGF0YSB0byBwZXJmb3JtIGFzc29jaWF0aW9uIHRlc3RzIGZvciBjb250aW51b3VzIHZhcmlhYmxlcyIpCiAgICAgICAgICB9CiAgICAgICAgCiAgfWVsc2UgewogICAgY2F0KCIgIFR5cGU6IE5vdCBDYXRlZ29yaWNhbCBvciBDb250aW51b3VzLCBjaGVjayBkYXRhXG4iKQogIH0KfQ==