# Install necessary packages if not already installed
install.packages(c("reticulate", "data.table", "dplyr"))
library(reticulate)
library(data.table)
library(dplyr)
# 1. Ensure 'autogluon' is installed in your Python environment
# You can check which python executable path is used by:
# py_config()
# Then, you can access the pip binary with `reticulate::py_install` like in:
# reticulate::pip_install("autogluon", envname=".virtualenvs/r-reticulate")
# Here we use the default env and install it.
reticulate::py_install("autogluon")
# 2. Create or Load Your Data
# Example: Using the Iris dataset
# Use data.table to load a dataframe
data("iris")
dt_iris <- as.data.table(iris)
setnames(dt_iris, "Species", "label")
# Create a temporary directory to save the data as CSV
temp_dir <- tempdir()
# Save the data as CSV
train_file <- file.path(temp_dir, "train.csv")
# 3. Define the Python script that uses AutoGluon
python_code <- '
from autogluon.tabular import TabularPredictor
import pandas as pd
import os
import sys
# Get arguments passed from R
train_file = sys.argv[1]
# Prepare data
train_data = pd.read_csv(train_file)
label = "label"
predictor = TabularPredictor(label=label)
predictor.fit(train_data, presets="best_quality")
# Ensure predict and leaderboards are returned
predictions = predictor.predict(train_data)
leaderboard = predictor.leaderboard(train_data)
print("Predictions:")
print(predictions)
print("Leaderboard:")
print(leaderboard)
#Save Predictor for later use
predictor.save("predictor.pkl")
'
# Save the Python code to a temporary file
python_file <- file.path(temp_dir, "autogluon_script.py")
writeLines(python_code, con = python_file)
# construct python argument list
python_args <- c(train_file)
# 4. Call the Python code from R using subprocess and reticulate
# using with so python can find correct python path.
with(reticulate_python_env <- reticulate_python_exists(), {
# create bash subprocess
res <- system2(command = reticulate::py_exe(),
args = c(python_file,python_args),
stdout = TRUE,
stderr = TRUE)
})
# Print output
print(res)
# 5. Load and examine results
# using system2 output and manual parsing
res_lines <- grep("Predictions|Leaderboard", res, value = T)
# extract the predicted dataframe
pred_line <- grep("Predictions", res_lines, value = T)
board_line <- grep("Leaderboard", res_lines, value = T)
# parsing using string separation, can be enhanced here.
# extract the values
predicted_values <- gsub("Predictions:\\[|\\]","", pred_line)
board_values <- gsub("Leaderboard:","", board_line)
# parse results, they are saved as strings from python output
# create prediction dataframe
predicted_df<- as.data.frame(strsplit(predicted_values, " ")[[1]])
colnames(predicted_df)<-"predicted"
print("------- Predictions ------")
print(predicted_df)
# create model leaderboard data frame
board_table <- tryCatch(
{
board_table
<- fread(board_values
) #read in parsed data },
error = function(cond)
{
message(paste("Issue parsing leaderboard, try upgrading Autogluon: ", cond))
return(NULL)
}
)
if(!is.null(board_table)){
print("------- Leaderboard ------")
print(board_table)
} else {
print("Not able to parse leaderboard, check autogluon!")
}
# optional: Load the predictor directly using reticulate (uncomment)
#
# py_run_string(paste0('import pickle; predictor = pickle.load(open("predictor.pkl", "rb"))'))
# predictions<- py$predictor$predict(dt_iris[,1:4])
# predictions
# leaderboard<- py$predictor$leaderboard(dt_iris[,1:4])
# leaderboard
# Clean up temporary resources if needed
unlink(temp_dir, recursive = TRUE)# your code goes here