-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrandomized_data_set.R
34 lines (27 loc) · 1.13 KB
/
randomized_data_set.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# Set the seed for reproducibility
# set.seed(123)
# Capture command-line arguments
args <- commandArgs(trailingOnly = TRUE)
commit_hash <- args[1]
left_prob_0 <- as.numeric(args[2])
left_prob_1 <- as.numeric(args[3])
work_accident_prob_0 <- as.numeric(args[4])
work_accident_prob_1 <- as.numeric(args[5])
sat_level<- as.numeric(args[6])
eval_level<- as.numeric(args[7])
# Print the commit hash for confirmation
cat("Using commit hash:", commit_hash, "\n")
# Load the necessary library
library(dplyr)
library(readxl)
original_data_set<- readxl::read_excel(path = "./data_set.xlsx",sheet = 1)
# Randomize specified columns in the dataset
modified_data_set <- original_data_set %>%
mutate(
satisfaction_level = satisfaction_level + sat_level,
last_evaluation = last_evaluation - eval_level,
left = sample(c(0, 1), size = n(), replace = TRUE, prob = c(left_prob_0, left_prob_1)),
Work_accident = sample(c(0, 1), size = n(), replace = TRUE, prob = c(work_accident_prob_0, work_accident_prob_1))
)
# Save the modified dataset
write.csv(modified_data_set, paste0("./data/modified_data_set_",commit_hash,".csv"), row.names = FALSE)