diff --git a/probe_src/libprobe/generated/libc_hooks.c b/probe_src/libprobe/generated/libc_hooks.c index 38fc2716..bd316b85 100644 --- a/probe_src/libprobe/generated/libc_hooks.c +++ b/probe_src/libprobe/generated/libc_hooks.c @@ -1523,8 +1523,13 @@ char * mkdtemp(char *template) int execv(const char *filename, char * const argv[]) { maybe_init_thread(); - putenv_probe_vars(); - struct Op op = {exec_op_code, {.exec = {.path = create_path_lazy(0, filename, 0), .ferrno = 0}}, {0}, 0, 0}; + size_t argc = -1; + char * const *copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); + size_t envc = 0; + char * const *updated_env = update_env_with_probe_vars(environ, &envc); + char * const *copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); + struct Op op = {exec_op_code, {.exec = {.path = create_path_lazy(0, filename, 0), .ferrno = 0, .argc = argc, .argv = copied_argv, .envc = envc, .env = copied_updated_env}}, {0}, 0, 0}; + op.data.exec.argc = argc; if (likely(prov_log_is_enabled())) { prov_log_try(op); @@ -1534,8 +1539,9 @@ int execv(const char *filename, char * const argv[]) { prov_log_save(); } - int ret = unwrapped_execv(filename, argv); + int ret = unwrapped_execvpe(filename, argv, updated_env); int saved_errno = errno; + free((char **) updated_env); if (likely(prov_log_is_enabled())) { assert(errno > 0); @@ -1549,8 +1555,22 @@ int execv(const char *filename, char * const argv[]) int execl(const char *filename, const char *arg0, ...) { maybe_init_thread(); - putenv_probe_vars(); - struct Op op = {exec_op_code, {.exec = {.path = create_path_lazy(0, filename, 0), .ferrno = 0}}, {0}, 0, 0}; + size_t argc = COUNT_NONNULL_VARARGS(arg0); + char **argv = malloc((argc + 1) * (sizeof(char *))); + va_list ap; + va_start(ap, arg0); + for (size_t i = 0; i < argc; ++i) + { + argv[i] = va_arg(ap, __type_charp); + } + + va_end(ap); + argv[argc] = NULL; + char * const *copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); + size_t envc = 0; + char * const *updated_env = update_env_with_probe_vars(environ, &envc); + char * const *copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); + struct Op op = {exec_op_code, {.exec = {.path = create_path_lazy(0, filename, 0), .ferrno = 0, .argc = argc, .argv = copied_argv, .envc = envc, .env = copied_updated_env}}, {0}, 0, 0}; if (likely(prov_log_is_enabled())) { prov_log_try(op); @@ -1560,9 +1580,10 @@ int execl(const char *filename, const char *arg0, ...) { prov_log_save(); } - size_t varargs_size = (sizeof(char *)) + ((COUNT_NONNULL_VARARGS(arg0) + 1) * (sizeof(char *))); - int ret = *((int *) __builtin_apply((void (*)()) unwrapped_execl, __builtin_apply_args(), varargs_size)); + int ret = unwrapped_execvpe(filename, argv, updated_env); int saved_errno = errno; + free((char **) updated_env); + free((char **) argv); if (likely(prov_log_is_enabled())) { assert(errno > 0); @@ -1576,8 +1597,12 @@ int execl(const char *filename, const char *arg0, ...) int execve(const char *filename, char * const argv[], char * const env[]) { maybe_init_thread(); - env = update_env_with_probe_vars(env); - struct Op op = {exec_op_code, {.exec = {.path = create_path_lazy(0, filename, 0), .ferrno = 0}}, {0}, 0, 0}; + size_t argc = 0; + char * const *copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); + size_t envc = 0; + char * const *updated_env = update_env_with_probe_vars(env, &envc); + char * const *copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); + struct Op op = {exec_op_code, {.exec = {.path = create_path_lazy(0, filename, 0), .ferrno = 0, .argc = argc, .argv = copied_argv, .envc = envc, .env = copied_updated_env}}, {0}, 0, 0}; if (likely(prov_log_is_enabled())) { prov_log_try(op); @@ -1587,10 +1612,9 @@ int execve(const char *filename, char * const argv[], char * const env[]) { prov_log_save(); } - DEBUG("in Execve"); - int ret = unwrapped_execve(filename, argv, env); + int ret = unwrapped_execvpe(filename, argv, updated_env); int saved_errno = errno; - free((char **) env); + free((char **) updated_env); if (likely(prov_log_is_enabled())) { assert(errno > 0); @@ -1604,8 +1628,12 @@ int execve(const char *filename, char * const argv[], char * const env[]) int fexecve(int fd, char * const argv[], char * const env[]) { maybe_init_thread(); - env = update_env_with_probe_vars(env); - struct Op op = {exec_op_code, {.exec = {.path = create_path_lazy(fd, "", AT_EMPTY_PATH), .ferrno = 0}}, {0}, 0, 0}; + size_t argc = 0; + char * const *copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); + size_t envc = 0; + char * const *updated_env = update_env_with_probe_vars(env, &envc); + char * const *copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); + struct Op op = {exec_op_code, {.exec = {.path = create_path_lazy(fd, "", AT_EMPTY_PATH), .ferrno = 0, .argc = argc, .argv = copied_argv, .envc = envc, .env = copied_updated_env}}, {0}, 0, 0}; if (likely(prov_log_is_enabled())) { prov_log_try(op); @@ -1615,9 +1643,9 @@ int fexecve(int fd, char * const argv[], char * const env[]) { prov_log_save(); } - int ret = unwrapped_fexecve(fd, argv, env); + int ret = unwrapped_fexecve(fd, argv, updated_env); int saved_errno = errno; - free((char **) env); + free((char **) updated_env); if (likely(prov_log_is_enabled())) { assert(errno > 0); @@ -1631,7 +1659,23 @@ int fexecve(int fd, char * const argv[], char * const env[]) int execle(const char *filename, const char *arg0, ...) { maybe_init_thread(); - struct Op op = {exec_op_code, {.exec = {.path = create_path_lazy(0, filename, 0), .ferrno = 0}}, {0}, 0, 0}; + size_t argc = COUNT_NONNULL_VARARGS(arg0) - 1; + char **argv = malloc((argc + 1) * (sizeof(char *))); + va_list ap; + va_start(ap, arg0); + for (size_t i = 0; i < argc; ++i) + { + argv[i] = va_arg(ap, __type_charp); + } + + argv[argc] = NULL; + char * const *copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); + char **env = va_arg(ap, __type_charpp); + va_end(ap); + size_t envc = 0; + char * const *updated_env = update_env_with_probe_vars(env, &envc); + char * const *copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); + struct Op op = {exec_op_code, {.exec = {.path = create_path_lazy(0, filename, 0), .ferrno = 0, .argc = argc, .argv = copied_argv, .envc = envc, .env = copied_updated_env}}, {0}, 0, 0}; if (likely(prov_log_is_enabled())) { prov_log_try(op); @@ -1642,21 +1686,10 @@ int execle(const char *filename, const char *arg0, ...) prov_log_save(); } ERROR("Not implemented; I need to figure out how to update the environment."); - size_t argc = COUNT_NONNULL_VARARGS(arg0); - char **arg_vec = malloc(argc * (sizeof(char *))); - va_list ap; - va_start(ap, arg0); - for (size_t i = 0; i < (argc - 1); ++i) - { - arg_vec[i] = va_arg(ap, __type_charp); - } - - char **env = va_arg(ap, __type_charpp); - va_end(ap); - char * const *updated_env = update_env_with_probe_vars(env); - int ret = unwrapped_execve(filename, arg_vec, updated_env); + int ret = unwrapped_execvpe(filename, argv, updated_env); int saved_errno = errno; free((char **) updated_env); + free((char **) argv); if (likely(prov_log_is_enabled())) { assert(errno > 0); @@ -1670,10 +1703,14 @@ int execle(const char *filename, const char *arg0, ...) int execvp(const char *filename, char * const argv[]) { maybe_init_thread(); - putenv_probe_vars(); char *bin_path = arena_calloc(get_data_arena(), PATH_MAX + 1, sizeof(char)); bool found = lookup_on_path(filename, bin_path); - struct Op op = {exec_op_code, {.exec = {.path = (found) ? (create_path_lazy(0, bin_path, 0)) : (null_path), .ferrno = 0}}, {0}, 0, 0}; + size_t argc = 0; + char * const *copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); + size_t envc = 0; + char * const *updated_env = update_env_with_probe_vars(environ, &envc); + char * const *copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); + struct Op op = {exec_op_code, {.exec = {.path = (found) ? (create_path_lazy(0, bin_path, 0)) : (null_path), .ferrno = 0, .argc = argc, .argv = copied_argv, .envc = envc, .env = copied_updated_env}}, {0}, 0, 0}; if (likely(prov_log_is_enabled())) { prov_log_try(op); @@ -1683,8 +1720,9 @@ int execvp(const char *filename, char * const argv[]) { prov_log_save(); } - int ret = unwrapped_execvp(filename, argv); + int ret = unwrapped_execvpe(filename, argv, updated_env); int saved_errno = errno; + free((char **) updated_env); if (likely(prov_log_is_enabled())) { assert(errno > 0); @@ -1698,10 +1736,24 @@ int execvp(const char *filename, char * const argv[]) int execlp(const char *filename, const char *arg0, ...) { maybe_init_thread(); - putenv_probe_vars(); char *bin_path = arena_calloc(get_data_arena(), PATH_MAX + 1, sizeof(char)); bool found = lookup_on_path(filename, bin_path); - struct Op op = {exec_op_code, {.exec = {.path = (found) ? (create_path_lazy(0, bin_path, 0)) : (null_path), .ferrno = 0}}, {0}, 0, 0}; + size_t argc = COUNT_NONNULL_VARARGS(arg0); + char **argv = malloc((argc + 1) * (sizeof(char *))); + va_list ap; + va_start(ap, arg0); + for (size_t i = 0; i < argc; ++i) + { + argv[i] = va_arg(ap, __type_charp); + } + + argv[argc] = NULL; + va_end(ap); + char * const *copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); + size_t envc = 0; + char * const *updated_env = update_env_with_probe_vars(environ, &envc); + char * const *copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); + struct Op op = {exec_op_code, {.exec = {.path = (found) ? (create_path_lazy(0, bin_path, 0)) : (null_path), .ferrno = 0, .argc = argc, .argv = copied_argv, .envc = envc, .env = copied_updated_env}}, {0}, 0, 0}; if (likely(prov_log_is_enabled())) { prov_log_try(op); @@ -1711,9 +1763,10 @@ int execlp(const char *filename, const char *arg0, ...) { prov_log_save(); } - size_t varargs_size = (sizeof(char *)) + ((COUNT_NONNULL_VARARGS(arg0) + 1) * (sizeof(char *))); - int ret = *((int *) __builtin_apply((void (*)()) unwrapped_execlp, __builtin_apply_args(), varargs_size)); + int ret = unwrapped_execvpe(filename, argv, updated_env); int saved_errno = errno; + free((char **) updated_env); + free((char **) argv); if (likely(prov_log_is_enabled())) { assert(errno > 0); @@ -1727,10 +1780,14 @@ int execlp(const char *filename, const char *arg0, ...) int execvpe(const char *filename, char * const argv[], char * const envp[]) { maybe_init_thread(); - envp = update_env_with_probe_vars(envp); char *bin_path = arena_calloc(get_data_arena(), PATH_MAX + 1, sizeof(char)); bool found = lookup_on_path(filename, bin_path); - struct Op op = {exec_op_code, {.exec = {.path = (found) ? (create_path_lazy(0, bin_path, 0)) : (null_path), .ferrno = 0}}, {0}, 0, 0}; + size_t argc = 0; + char * const *copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); + size_t envc = 0; + char * const *updated_env = update_env_with_probe_vars(envp, &envc); + char * const *copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); + struct Op op = {exec_op_code, {.exec = {.path = (found) ? (create_path_lazy(0, bin_path, 0)) : (null_path), .ferrno = 0, .argc = argc, .argv = copied_argv, .envc = envc, .env = copied_updated_env}}, {0}, 0, 0}; if (likely(prov_log_is_enabled())) { prov_log_try(op); @@ -1740,9 +1797,9 @@ int execvpe(const char *filename, char * const argv[], char * const envp[]) { prov_log_save(); } - int ret = unwrapped_execvpe(filename, argv, envp); + int ret = unwrapped_execvpe(filename, argv, updated_env); int saved_errno = errno; - free((char **) envp); + free((char **) updated_env); if (likely(prov_log_is_enabled())) { assert(errno > 0); diff --git a/probe_src/libprobe/generator/libc_hooks_source.c b/probe_src/libprobe/generator/libc_hooks_source.c index 890a7247..4d5d4828 100644 --- a/probe_src/libprobe/generator/libc_hooks_source.c +++ b/probe_src/libprobe/generator/libc_hooks_source.c @@ -1607,17 +1607,26 @@ char * mkdtemp (char *template) { } /* Need: We need this because exec kills all global variables, we need to dump our tables before continuing */ int execv (const char *filename, char *const argv[]) { void* pre_call = ({ - putenv_probe_vars(); + size_t argc = -1; + char * const* copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); + size_t envc = 0; + char * const* updated_env = update_env_with_probe_vars(environ, &envc); + char * const* copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); struct Op op = { exec_op_code, {.exec = { .path = create_path_lazy(0, filename, 0), .ferrno = 0, + .argc = argc, + .argv = copied_argv, + .envc = envc, + .env = copied_updated_env, }}, {0}, 0, 0, }; + op.data.exec.argc = argc; if (likely(prov_log_is_enabled())) { prov_log_try(op); prov_log_save(); @@ -1625,6 +1634,9 @@ int execv (const char *filename, char *const argv[]) { prov_log_save(); } }); + void* call = ({ + int ret = unwrapped_execvpe(filename, argv, updated_env); + }); void* post_call = ({ /* * If exec is successful { @@ -1641,6 +1653,7 @@ int execv (const char *filename, char *const argv[]) { * There must have been an error * } * */ + free((char**) updated_env); if (likely(prov_log_is_enabled())) { assert(errno > 0); op.data.exec.ferrno = saved_errno; @@ -1650,12 +1663,28 @@ int execv (const char *filename, char *const argv[]) { } int execl (const char *filename, const char *arg0, ...) { void* pre_call = ({ - putenv_probe_vars(); + size_t argc = COUNT_NONNULL_VARARGS(arg0); + char** argv = malloc((argc + 1) * sizeof(char*)); + va_list ap; + va_start(ap, arg0); + for (size_t i = 0; i < argc; ++i) { + argv[i] = va_arg(ap, __type_charp); + } + va_end(ap); + argv[argc] = NULL; + char * const* copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); + size_t envc = 0; + char * const* updated_env = update_env_with_probe_vars(environ, &envc); + char * const* copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); struct Op op = { exec_op_code, {.exec = { .path = create_path_lazy(0, filename, 0), .ferrno = 0, + .argc = argc, + .argv = copied_argv, + .envc = envc, + .env = copied_updated_env, }}, {0}, 0, @@ -1668,7 +1697,12 @@ int execl (const char *filename, const char *arg0, ...) { prov_log_save(); } }); + void* call = ({ + int ret = unwrapped_execvpe(filename, argv, updated_env); + }); void* post_call = ({ + free((char**) updated_env); + free((char**) argv); if (likely(prov_log_is_enabled())) { assert(errno > 0); op.data.exec.ferrno = saved_errno; @@ -1679,12 +1713,20 @@ int execl (const char *filename, const char *arg0, ...) { } int execve (const char *filename, char *const argv[], char *const env[]) { void* pre_call = ({ - env = update_env_with_probe_vars(env); + size_t argc = 0; + char * const* copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); + size_t envc = 0; + char * const* updated_env = update_env_with_probe_vars(env, &envc); + char * const* copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); struct Op op = { exec_op_code, {.exec = { .path = create_path_lazy(0, filename, 0), .ferrno = 0, + .argc = argc, + .argv = copied_argv, + .envc = envc, + .env = copied_updated_env, }}, {0}, 0, @@ -1696,10 +1738,12 @@ int execve (const char *filename, char *const argv[], char *const env[]) { } else { prov_log_save(); } - DEBUG("in Execve"); + }); + void* call = ({ + int ret = unwrapped_execvpe(filename, argv, updated_env); }); void* post_call = ({ - free((char**) env); // This is our own malloc from update_env_with_probe_vars, so it should be safe to free + free((char**) updated_env); if (likely(prov_log_is_enabled())) { assert(errno > 0); op.data.exec.ferrno = saved_errno; @@ -1709,12 +1753,20 @@ int execve (const char *filename, char *const argv[], char *const env[]) { } int fexecve (int fd, char *const argv[], char *const env[]) { void* pre_call = ({ - env = update_env_with_probe_vars(env); + size_t argc = 0; + char * const* copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); + size_t envc = 0; + char * const* updated_env = update_env_with_probe_vars(env, &envc); + char * const* copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); struct Op op = { exec_op_code, {.exec = { .path = create_path_lazy(fd, "", AT_EMPTY_PATH), .ferrno = 0, + .argc = argc, + .argv = copied_argv, + .envc = envc, + .env = copied_updated_env, }}, {0}, 0, @@ -1727,8 +1779,11 @@ int fexecve (int fd, char *const argv[], char *const env[]) { prov_log_save(); } }); + void* call = ({ + int ret = unwrapped_fexecve(fd, argv, updated_env); + }); void* post_call = ({ - free((char**) env); // This is our own malloc from update_env_with_probe_vars, so it should be safe to free + free((char**) updated_env); if (likely(prov_log_is_enabled())) { assert(errno > 0); op.data.exec.ferrno = saved_errno; @@ -1738,11 +1793,29 @@ int fexecve (int fd, char *const argv[], char *const env[]) { } int execle (const char *filename, const char *arg0, ...) { void* pre_call = ({ + size_t argc = COUNT_NONNULL_VARARGS(arg0) - 1; + char** argv = malloc((argc + 1) * sizeof(char*)); + va_list ap; + va_start(ap, arg0); + for (size_t i = 0; i < argc; ++i) { + argv[i] = va_arg(ap, __type_charp); + } + argv[argc] = NULL; + char * const* copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); + char** env = va_arg(ap, __type_charpp); + va_end(ap); + size_t envc = 0; + char * const* updated_env = update_env_with_probe_vars(env, &envc); + char * const* copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); struct Op op = { exec_op_code, {.exec = { .path = create_path_lazy(0, filename, 0), .ferrno = 0, + .argc = argc, + .argv = copied_argv, + .envc = envc, + .env = copied_updated_env, }}, {0}, 0, @@ -1757,20 +1830,11 @@ int execle (const char *filename, const char *arg0, ...) { ERROR("Not implemented; I need to figure out how to update the environment."); }); void* call = ({ - size_t argc = COUNT_NONNULL_VARARGS(arg0); - char** arg_vec = malloc(argc * sizeof(char*)); - va_list ap; - va_start(ap, arg0); - for (size_t i = 0; i < argc - 1; ++i) { - arg_vec[i] = va_arg(ap, __type_charp); - } - char** env = va_arg(ap, __type_charpp); - va_end(ap); - char * const* updated_env = update_env_with_probe_vars(env); - int ret = unwrapped_execve(filename, arg_vec, updated_env); + int ret = unwrapped_execvpe(filename, argv, updated_env); }); void* post_call = ({ free((char**)updated_env); + free((char**)argv); if (likely(prov_log_is_enabled())) { assert(errno > 0); op.data.exec.ferrno = saved_errno; @@ -1781,9 +1845,13 @@ int execle (const char *filename, const char *arg0, ...) { } int execvp (const char *filename, char *const argv[]) { void* pre_call = ({ - putenv_probe_vars(); char* bin_path = arena_calloc(get_data_arena(), PATH_MAX + 1, sizeof(char)); bool found = lookup_on_path(filename, bin_path); + size_t argc = 0; + char * const* copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); + size_t envc = 0; + char * const* updated_env = update_env_with_probe_vars(environ, &envc); + char * const* copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); struct Op op = { exec_op_code, {.exec = { @@ -1792,6 +1860,10 @@ int execvp (const char *filename, char *const argv[]) { * */ .path = found ? create_path_lazy(0, bin_path, 0) : null_path, .ferrno = 0, + .argc = argc, + .argv = copied_argv, + .envc = envc, + .env = copied_updated_env, }}, {0}, 0, @@ -1804,7 +1876,11 @@ int execvp (const char *filename, char *const argv[]) { prov_log_save(); } }); + void* call = ({ + int ret = unwrapped_execvpe(filename, argv, updated_env); + }); void* post_call = ({ + free((char**) updated_env); if (likely(prov_log_is_enabled())) { assert(errno > 0); op.data.exec.ferrno = saved_errno; @@ -1815,9 +1891,21 @@ int execvp (const char *filename, char *const argv[]) { int execlp (const char *filename, const char *arg0, ...) { size_t varargs_size = sizeof(char*) + (COUNT_NONNULL_VARARGS(arg0) + 1) * sizeof(char*); void* pre_call = ({ - putenv_probe_vars(); char* bin_path = arena_calloc(get_data_arena(), PATH_MAX + 1, sizeof(char)); bool found = lookup_on_path(filename, bin_path); + size_t argc = COUNT_NONNULL_VARARGS(arg0); + char** argv = malloc((argc + 1) * sizeof(char*)); + va_list ap; + va_start(ap, arg0); + for (size_t i = 0; i < argc; ++i) { + argv[i] = va_arg(ap, __type_charp); + } + argv[argc] = NULL; + va_end(ap); + char * const* copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); + size_t envc = 0; + char * const* updated_env = update_env_with_probe_vars(environ, &envc); + char * const* copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); struct Op op = { exec_op_code, {.exec = { @@ -1826,6 +1914,10 @@ int execlp (const char *filename, const char *arg0, ...) { * */ .path = found ? create_path_lazy(0, bin_path, 0) : null_path, .ferrno = 0, + .argc = argc, + .argv = copied_argv, + .envc = envc, + .env = copied_updated_env, }}, {0}, 0, @@ -1838,7 +1930,12 @@ int execlp (const char *filename, const char *arg0, ...) { prov_log_save(); } }); + void* call = ({ + int ret = unwrapped_execvpe(filename, argv, updated_env); + }); void* post_call = ({ + free((char**) updated_env); + free((char**) argv); if (likely(prov_log_is_enabled())) { assert(errno > 0); op.data.exec.ferrno = saved_errno; @@ -1850,9 +1947,13 @@ int execlp (const char *filename, const char *arg0, ...) { /* Docs: https://linux.die.net/man/3/execvpe1 */ int execvpe(const char *filename, char *const argv[], char *const envp[]) { void* pre_call = ({ - envp = update_env_with_probe_vars(envp); char* bin_path = arena_calloc(get_data_arena(), PATH_MAX + 1, sizeof(char)); bool found = lookup_on_path(filename, bin_path); + size_t argc = 0; + char * const* copied_argv = arena_copy_argv(get_data_arena(), argv, &argc); + size_t envc = 0; + char * const* updated_env = update_env_with_probe_vars(envp, &envc); + char * const* copied_updated_env = arena_copy_argv(get_data_arena(), updated_env, &envc); struct Op op = { exec_op_code, {.exec = { @@ -1861,6 +1962,10 @@ int execvpe(const char *filename, char *const argv[], char *const envp[]) { * */ .path = found ? create_path_lazy(0, bin_path, 0) : null_path, .ferrno = 0, + .argc = argc, + .argv = copied_argv, + .envc = envc, + .env = copied_updated_env, }}, {0}, 0, @@ -1873,8 +1978,11 @@ int execvpe(const char *filename, char *const argv[], char *const envp[]) { prov_log_save(); } }); + void* call = ({ + int ret = unwrapped_execvpe(filename, argv, updated_env); + }); void* post_call = ({ - free((char**) envp); // This is our own malloc from update_env_with_probe_vars, so it should be safe to free + free((char**) updated_env); // This is our own malloc from update_env_with_probe_vars, so it should be safe to free if (likely(prov_log_is_enabled())) { assert(errno > 0); op.data.exec.ferrno = saved_errno; diff --git a/probe_src/libprobe/include/libprobe/prov_ops.h b/probe_src/libprobe/include/libprobe/prov_ops.h index 9e0764a5..674f5487 100644 --- a/probe_src/libprobe/include/libprobe/prov_ops.h +++ b/probe_src/libprobe/include/libprobe/prov_ops.h @@ -52,6 +52,10 @@ struct ChdirOp { struct ExecOp { struct Path path; int ferrno; + size_t argc; + char * const* argv; + size_t envc; + char * const* env; }; enum TaskType { diff --git a/probe_src/libprobe/src/global_state.c b/probe_src/libprobe/src/global_state.c index 2a7ebe1b..64472869 100644 --- a/probe_src/libprobe/src/global_state.c +++ b/probe_src/libprobe/src/global_state.c @@ -210,7 +210,7 @@ static void reinit_thread_global_state() { init_log_arena(); } -static char* const* update_env_with_probe_vars(char* const* user_env) { +static char* const* update_env_with_probe_vars(char* const* user_env, size_t* updated_env_size) { /* Define env vars we care about */ const char* probe_vars[] = { is_proc_root_env_var, @@ -254,15 +254,15 @@ static char* const* update_env_with_probe_vars(char* const* user_env) { } /* Allocate a new env, based on the user's requested env, with our probe vars */ - char** new_env = malloc((user_env_size + probe_var_count + 1) * sizeof(char*)); - if (!new_env) { + char** updated_env = malloc((user_env_size + probe_var_count + 1) * sizeof(char*)); + if (!updated_env) { ERROR("Out of mem"); } /* Copy user's env to new env * Clear out existence of probe_vars, if they happen to exist in the user's requested env. * */ - size_t new_env_size = 0; + *updated_env_size = 0; for (char* const* ep = user_env; *ep; ++ep) { bool is_probe_var = false; for (size_t i = 0; i < probe_var_count; ++i) { @@ -272,8 +272,8 @@ static char* const* update_env_with_probe_vars(char* const* user_env) { } } if (!is_probe_var) { - new_env[new_env_size] = *ep; - new_env_size++; + updated_env[*updated_env_size] = *ep; + (*updated_env_size)++; } } @@ -281,40 +281,12 @@ static char* const* update_env_with_probe_vars(char* const* user_env) { * Now add our _desired_ versions of the probe vars we care about. */ for (size_t i = 0; i < probe_var_count; ++i) { - new_env[new_env_size + i] = probe_entries[i]; + updated_env[*updated_env_size] = probe_entries[i]; + (*updated_env_size)++; } /* Top it off with a NULL */ - new_env[new_env_size + probe_var_count] = NULL; + updated_env[*updated_env_size] = NULL; - return new_env; -} - -static void putenv_probe_vars() { - /* TODO: We shouldn't doo this. - * Because it makes observable changes to the parent process. - * Instead, we should turn execv into execve and use update_env_with_probe_vars(copy(environ)). */ - - /* Define env vars we care about */ - const char* probe_vars[] = { - is_proc_root_env_var, - exec_epoch_env_var, - pid_env_var, - probe_dir_env_var, - }; - char exec_epoch_str[unsigned_int_string_size]; - CHECK_SNPRINTF(exec_epoch_str, unsigned_int_string_size, "%d", get_exec_epoch()); - char pid_str[unsigned_int_string_size]; - CHECK_SNPRINTF(pid_str, unsigned_int_string_size, "%d", getpid()); - const char* probe_vals[] = { - "0", - exec_epoch_str, - pid_str, - __probe_dir, - }; - const size_t probe_var_count = sizeof(probe_vars) / sizeof(char*); - - for (size_t i = 0; i < probe_var_count; ++i) { - setenv(probe_vars[i], probe_vals[i], 1 /* overwrite*/); - } + return updated_env; } diff --git a/probe_src/libprobe/src/lib.c b/probe_src/libprobe/src/lib.c index 9a808474..846919af 100644 --- a/probe_src/libprobe/src/lib.c +++ b/probe_src/libprobe/src/lib.c @@ -58,16 +58,16 @@ static __thread bool __thread_inited = false; #include "prov_enable.c" +#define ARENA_USE_UNWRAPPED_LIBC +#define ARENA_PERROR +#include "../arena/include/arena.h" + #include "util.c" /* #include "fd_table.c" */ #include "../include/libprobe/prov_ops.h" -#define ARENA_USE_UNWRAPPED_LIBC -#define ARENA_PERROR -#include "../arena/include/arena.h" - #include "global_state.c" #include "prov_ops.c" diff --git a/probe_src/libprobe/src/util.c b/probe_src/libprobe/src/util.c index fb207b39..ff0e8b3c 100644 --- a/probe_src/libprobe/src/util.c +++ b/probe_src/libprobe/src/util.c @@ -159,12 +159,12 @@ extern char** environ; static const char* getenv_copy(const char* name) { /* Validate input */ - assert(name != NULL); + assert(name); assert(strchr(name, '=') == NULL); - assert(name[0] != '\0'); + assert(name[0]); assert(environ); size_t name_len = strlen(name); - for (char **ep = environ; *ep != NULL; ++ep) { + for (char **ep = environ; *ep; ++ep) { if (unlikely(strncmp(name, *ep, name_len) == 0) && likely((*ep)[name_len] == '=')) { return *ep + name_len + 1; } @@ -226,7 +226,7 @@ static OWNED const char* dirfd_path(int dirfd) { * -1 is never a valid fd because it's the error value for syscalls that return fds, so we can do the same. */ static int try_dirfd(BORROWED DIR* dirp) { - return (dirp != NULL) ? (dirfd(dirp)) : (-1); + return dirp ? (dirfd(dirp)) : (-1); } #ifndef NDEBUG @@ -242,7 +242,7 @@ static void listdir(const char* name, int indent) { if (!(dir = unwrapped_opendir(name))) return; - while ((entry = unwrapped_readdir(dir)) != NULL) { + while ((entry = unwrapped_readdir(dir))) { if (entry->d_type == DT_DIR) { char path[1024]; if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) @@ -262,10 +262,10 @@ static void listdir(const char* name, int indent) { /* strtol in libc is not totally static; * It is defined itself as a static function, but that static code calls some dynamically loaded function. * This would be fine, except some older versions of glibc may not have the deynamic function. */ -unsigned long my_strtoul(const char *restrict string, char **restrict string_end, int base) { +static unsigned long my_strtoul(const char *restrict string, char **restrict string_end, int base) { unsigned long accumulator = 0; const char* ptr = string; - while (*ptr != '\0') { + while (*ptr) { if ('0' <= *ptr && *ptr < ('0' + base)) { accumulator = accumulator * base + (*ptr - '0'); } else { @@ -278,3 +278,30 @@ unsigned long my_strtoul(const char *restrict string, char **restrict string_end } return accumulator; } + +/* Copy char* const argv[] into the arena. + * If argc argument is 0, compute argc and store there (if the size actually was zero, this is no bug). + * If argc argument is positive, assume that is the argc. + * */ +static char* const* arena_copy_argv(struct ArenaDir* arena_dir, char * const * argv, size_t* argc) { + if (*argc == 0) { + /* Compute argc and store in *argc */ + for (char * const* argv_p = argv; *argv_p; ++argv_p) { + (*argc)++; + } + } + + char** argv_copy = arena_calloc(arena_dir, *argc + 1, sizeof(char*)); + + for (size_t i = 0; i < *argc; ++i) { + size_t length = strlen(argv[i]); + argv_copy[i] = arena_calloc(arena_dir, length + 1, sizeof(char)); + memcpy(argv_copy[i], argv[i], length + 1); + assert(!argv_copy[i][length]); + } + + assert(!argv[*argc]); + argv_copy[*argc] = NULL; + + return argv_copy; +} diff --git a/probe_src/probe_frontend/cli/src/dump.rs b/probe_src/probe_frontend/cli/src/dump.rs index c7d3aaf8..ddffaeef 100644 --- a/probe_src/probe_frontend/cli/src/dump.rs +++ b/probe_src/probe_frontend/cli/src/dump.rs @@ -1,4 +1,5 @@ use std::{ + ffi::CString, fs::File, io::{Read, Write}, path::Path, @@ -139,6 +140,18 @@ trait Dump { fn dump(&self) -> String; } +impl Dump for Vec { + fn dump(&self) -> String { + let mut ret = "[ ".to_owned(); + for cstr in self { + ret.push_str(&format!("\"{}\", ", cstr.to_string_lossy())) + } + ret.push(']'); + + ret + } +} + impl Dump for ops::StatxTimestamp { fn dump(&self) -> String { match DateTime::from_timestamp(self.sec, self.nsec) { @@ -285,7 +298,13 @@ impl Dump for ops::ChdirOp { impl Dump for ops::ExecOp { fn dump(&self) -> String { - format!("[ path={}, errno={} ]", self.path.dump(), self.ferrno,) + format!( + "[ path={}, errno={}, argv={}, env={} ]", + self.path.dump(), + self.ferrno, + self.argv.dump(), + self.env.dump() + ) } } diff --git a/probe_src/probe_frontend/lib/src/ops.rs b/probe_src/probe_frontend/lib/src/ops.rs index f4c28660..08d26507 100644 --- a/probe_src/probe_frontend/lib/src/ops.rs +++ b/probe_src/probe_frontend/lib/src/ops.rs @@ -7,6 +7,7 @@ use crate::transcribe::ArenaContext; use probe_macros::{MakeRustOp, PygenDataclass}; use serde::{Deserialize, Serialize}; use std::ffi::CString; +use std::vec::Vec; /// Specialized version of [`std::convert::From`] for working with libprobe arena structs. /// @@ -20,6 +21,8 @@ use std::ffi::CString; /// /// - `*mut i8` and `*const i8` can (try to) be converted to [`CString`]s by looking up the /// pointers in the [`ArenaContext`], +/// - `*const *mut i8` can (try to) be converted into [`Vec`]s by looking up the pointers +/// in the [`ArenaContext`], /// - Any type implementing [`Copy`], this base case just returns itself. pub trait FfiFrom { fn ffi_from(value: &T, ctx: &ArenaContext) -> Result @@ -45,6 +48,32 @@ impl FfiFrom<*mut i8> for CString { try_cstring(*value, ctx) } } +impl FfiFrom<*const *mut i8> for Vec { + fn ffi_from(value: &*const *mut i8, ctx: &ArenaContext) -> Result { + let ptr = match ctx.try_get_slice(*value as usize) { + Some(x) => x, + None => return Err(ProbeError::InvalidPointer(*value as usize)), + }; + + let array = unsafe { + core::slice::from_raw_parts( + ptr.as_ptr() as *const *const i8, + // integer division truncates, which will implicitly align the byte array to the + // size of a pointer + ptr.len() / std::mem::size_of::<*const i8>(), + ) + }; + + array + .iter() + // the entire slice is valid memory (no risk of segfaults), but try_get_slice() just + // returns all the bytes from the pointer to the end of the arena, but only the data up + // until the first null pointer are valid *const i8 pointers. + .take_while(|ptr| !(**ptr).is_null()) + .map(|ptr| try_cstring(*ptr, ctx)) + .collect::>>() + } +} /// Specialized version of [`std::convert::Into`] for working with libprobe arena structs. /// diff --git a/probe_src/probe_frontend/lib/src/transcribe.rs b/probe_src/probe_frontend/lib/src/transcribe.rs index a87d6c89..28247b21 100644 --- a/probe_src/probe_frontend/lib/src/transcribe.rs +++ b/probe_src/probe_frontend/lib/src/transcribe.rs @@ -257,6 +257,7 @@ fn filename_numeric>(dir: P) -> Result { } /// this struct represents a `/data` probe record directory. +#[derive(Debug)] pub struct ArenaContext(pub Vec); impl ArenaContext { @@ -271,6 +272,7 @@ impl ArenaContext { } /// This struct represents a single `data/*.dat` file from a probe record directory. +#[derive(Debug)] pub struct DataArena { header: ArenaHeader, raw: Vec, diff --git a/probe_src/probe_frontend/macros/src/lib.rs b/probe_src/probe_frontend/macros/src/lib.rs index df9cf0d6..7fbe9d49 100644 --- a/probe_src/probe_frontend/macros/src/lib.rs +++ b/probe_src/probe_frontend/macros/src/lib.rs @@ -20,10 +20,10 @@ type MacroResult = Result; /// - contain only types that implement `FfiFrom` (defined in probe_frontend, see ops module for /// details). /// -/// In will generate a struct with the following characteristics: +/// It will generate a struct with the following characteristics: /// /// - same name, but without the `C_` prefix, and converted from snake_case to PascalCase. -/// - any field in the original struct starting with `__` is ignored. +/// - any field in the original struct starting with `__spare` or `__reserved` is ignored. /// - any field in the original struct starting with `ru_`, `tv_`, or `stx_` will have that prefix /// removed. /// - derives serde's `Serialize`, `Deserialize` traits. @@ -175,14 +175,20 @@ pub fn make_rust_op(input: TokenStream) -> TokenStream { fn convert_bindgen_type(ty: &syn::Type) -> MacroResult { match ty { - syn::Type::Ptr(_inner) => Ok(parse_quote!(::std::ffi::CString)), + // single pointers are treated as recorded as null-terminated byte-strings (as CString), + // double pointers are treated as null-terminated arrays of null-terminated byte-strings + // (as Vec). + syn::Type::Ptr(inner) => Ok(match inner.elem.as_ref() { + syn::Type::Ptr(_inner) => parse_quote!(::std::vec::Vec<::std::ffi::CString>), + _ => parse_quote!(::std::ffi::CString), + }), syn::Type::Array(inner) => { let mut new = inner.clone(); new.elem = Box::new(convert_bindgen_type(&new.elem)?); Ok(Type::Array(new)) } syn::Type::Path(inner) => { - if let Some(name) = type_basename(inner)?.to_string().strip_prefix("C_") { + if let Some(name) = type_basename(inner)?.ident.to_string().strip_prefix("C_") { let name = snake_case_to_pascal(name); let name = Ident::new(&name, Span::mixed_site()); Ok(parse_quote!(#name)) @@ -198,7 +204,7 @@ fn convert_bindgen_type(ty: &syn::Type) -> MacroResult { } } -fn type_basename(ty: &syn::TypePath) -> MacroResult<&syn::Ident> { +fn type_basename(ty: &syn::TypePath) -> MacroResult<&syn::PathSegment> { if let Some(qself) = &ty.qself { return Err(quote_spanned! { qself.span() => @@ -208,7 +214,7 @@ fn type_basename(ty: &syn::TypePath) -> MacroResult<&syn::Ident> { } match ty.path.segments.last() { - Some(x) => Ok(&x.ident), + Some(x) => Ok(x), None => Err(quote_spanned! { ty.path.segments.span() => compile_error!("Type path has no segments"); diff --git a/probe_src/probe_frontend/macros/src/pygen.rs b/probe_src/probe_frontend/macros/src/pygen.rs index 6008b8f3..6b972750 100644 --- a/probe_src/probe_frontend/macros/src/pygen.rs +++ b/probe_src/probe_frontend/macros/src/pygen.rs @@ -147,7 +147,15 @@ fn convert_to_pytype(ty: &syn::Type) -> MacroResult { match ty { syn::Type::Array(inner) => Ok(format!("list[{}]", convert_to_pytype(inner.elem.as_ref())?)), syn::Type::Path(inner) => { - let name = crate::type_basename(inner)?.to_string(); + let inner_ty = crate::type_basename(inner)?; + let name = inner_ty.ident.to_string(); + let generics = match &inner_ty.arguments { + syn::PathArguments::None => Vec::new(), + syn::PathArguments::AngleBracketed(inner) => { + inner.args.iter().cloned().collect::>() + } + syn::PathArguments::Parenthesized(_) => Vec::new(), + }; Ok(match name.as_str() { // that's a lot of ways to say "int", python ints are bigints so we don't have to // care about size @@ -168,7 +176,51 @@ fn convert_to_pytype(ty: &syn::Type) -> MacroResult { // bool types are basically the same everywhere "bool" => name, - _ => name, + _ => { + // no generics means we just pass it through verbatim, this includes things + // like dataclass types + if generics.is_empty() { + name + } else { + let generic_types = generics + .iter() + .filter_map(|generic| match generic { + syn::GenericArgument::Type(ty) => Some(convert_to_pytype(ty)), + _ => None, + }) + .collect::, _>>()?; + + if generic_types.is_empty() { + // if there are no type generics on a type, that means that it has + // constant or lifetime generics that aren't applicable to python, so + // we just ignore them + name + } else { + // NOTE: this does result in generic types in the generated file(s) + // having a trailing comma, which *will* show up in type annotations + // for things like class constructors, as those are stored as strings. + let py_generics = + generic_types + .iter() + .fold(String::new(), |mut acc, generic| { + acc.push_str(generic); + acc.push(','); + acc.push(' '); + acc + }); + + match name.as_str() { + // a rust vec is basically a list in python + "Vec" => format!("list[{}]", py_generics), + + // Any type we don't know about gets passed through verbatim. + // FIXME: this is really fragile, consider throwing a compiler + // error instead. + _ => format!("{}[{}]", name, py_generics), + } + } + } + } }) } _ => Err(quote_spanned! { diff --git a/probe_src/probe_frontend/python/probe_py/generated/ops.py b/probe_src/probe_frontend/python/probe_py/generated/ops.py index 03bed957..caa9b9aa 100644 --- a/probe_src/probe_frontend/python/probe_py/generated/ops.py +++ b/probe_src/probe_frontend/python/probe_py/generated/ops.py @@ -129,6 +129,10 @@ class ChdirOp: class ExecOp: path: Path ferrno: int + argc: int + argv: list[bytes, ] + envc: int + env: list[bytes, ] @dataclass(init=True, frozen=True) diff --git a/probe_src/probe_frontend/python/probe_py/generated/parser.py b/probe_src/probe_frontend/python/probe_py/generated/parser.py index 2e912531..3ce06948 100644 --- a/probe_src/probe_frontend/python/probe_py/generated/parser.py +++ b/probe_src/probe_frontend/python/probe_py/generated/parser.py @@ -77,8 +77,11 @@ def op_hook(json_map: typing.Dict[str, typing.Any]) -> typing.Any: constructor = ops.__dict__[ty] + # HACK: convert jsonlines' lists of integers into python byte types for ident, ty in constructor.__annotations__.items(): if ty == "bytes" and ident in json_map: json_map[ident] = bytes(json_map[ident]) + if ty == "list[bytes,]" and ident in json_map: + json_map[ident] = [bytes(x) for x in json_map[ident]] return constructor(**json_map)