Skip to content

Commit

Permalink
Work on C side of capturing symlinks
Browse files Browse the repository at this point in the history
  • Loading branch information
charmoniumQ committed Jan 21, 2025
1 parent 7841783 commit 7576929
Show file tree
Hide file tree
Showing 11 changed files with 123 additions and 48 deletions.
2 changes: 1 addition & 1 deletion cli-wrapper/lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ log = "0.4.21"
machine-info = "1.0.9"
probe_macros = { path = "../macros" }
rayon = "1.10.0"
serde = { version = "1.0.203", features = ["serde_derive"] }
serde = { version = "1.0.203", features = ["serde_derive", "rc"] }
serde_json = "1.0.118"
thiserror = "1.0.61"

Expand Down
7 changes: 7 additions & 0 deletions cli-wrapper/lib/src/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use probe_macros::{MakeRustOp, PygenDataclass};
use serde::{Deserialize, Serialize};
use std::ffi::CString;
use std::vec::Vec;
use std::boxed::Box;

/// Specialized version of [`std::convert::From`] for working with libprobe arena structs.
///
Expand Down Expand Up @@ -74,6 +75,11 @@ impl FfiFrom<*const *mut i8> for Vec<CString> {
.collect::<Result<Vec<_>>>()
}
}
impl FfiFrom<*const C_Path> for Box<Path> {
fn ffi_from(value: &*const C_Path, ctx: &ArenaContext) -> Result<Self> {
Ok(Box::new(unsafe {Path::ffi_from(&**value, ctx)}?))
}
}

/// Specialized version of [`std::convert::Into`] for working with libprobe arena structs.
///
Expand Down Expand Up @@ -108,6 +114,7 @@ fn try_cstring(str: *const i8, ctx: &ArenaContext) -> Result<CString> {
}
}


// Bindings are generated by `../build.sh` and the MakeRustOp proc-macro
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));

Expand Down
2 changes: 1 addition & 1 deletion cli-wrapper/macros/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ proc-macro = true
parking_lot = "0.12.3"
proc-macro2 = "1.0.86"
quote = "1.0.36"
syn = "2.0.68"
syn = { version = "2.0.68", features = ["extra-traits"] }

[lints]
workspace = true
26 changes: 17 additions & 9 deletions cli-wrapper/macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,14 +174,26 @@ pub fn make_rust_op(input: TokenStream) -> TokenStream {
}

fn convert_bindgen_type(ty: &syn::Type) -> MacroResult<syn::Type> {
let ty_str = format!("Bindgen unsupported type: {:?}", ty);
let error = quote_spanned! {
ty.span() => compile_error!(#ty_str);
};
match ty {
// single pointers are treated as recorded as null-terminated byte-strings (as CString),
// double pointers are treated as null-terminated arrays of null-terminated byte-strings
// (as Vec<CString>).
syn::Type::Ptr(inner) => Ok(match inner.elem.as_ref() {
syn::Type::Ptr(_inner) => parse_quote!(::std::vec::Vec<::std::ffi::CString>),
_ => parse_quote!(::std::ffi::CString),
}),
syn::Type::Ptr(inner) => match inner.elem.as_ref() {
syn::Type::Ptr(_inner) => Ok(parse_quote!(::std::vec::Vec<::std::ffi::CString>)),
syn::Type::Path(path) => match path.path.segments.iter().last() {
None => Err(error.into()),
Some(last_path_seg) => match last_path_seg.ident.to_string().as_str() {
"c_char" => Ok(parse_quote!(::std::ffi::CString)),
"C_Path" => Ok(parse_quote!(::std::boxed::Box<Path>)),
_ => Err(error.into()),
},
},
_ => Err(error.into()),
},
syn::Type::Array(inner) => {
let mut new = inner.clone();
new.elem = Box::new(convert_bindgen_type(&new.elem)?);
Expand All @@ -196,11 +208,7 @@ fn convert_bindgen_type(ty: &syn::Type) -> MacroResult<syn::Type> {
Ok(Type::Path(inner.clone()))
}
}
_ => Err(quote_spanned! {
ty.span() =>
compile_error!("Unable to convert bindgen type");
}
.into()),
_ => Err(error.into()),
}
}

Expand Down
23 changes: 17 additions & 6 deletions cli-wrapper/macros/src/pygen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ fn basic_dataclass(name: String, pairs: &[(String, String)]) -> Dataclass {
}

fn convert_to_pytype(ty: &syn::Type) -> MacroResult<String> {
let msg = format!("Pygen unsupported type type: {:?}", ty);
let error = quote_spanned! {
ty.span() =>
compile_error!(#msg);
};
match ty {
syn::Type::Array(inner) => Ok(format!("list[{}]", convert_to_pytype(inner.elem.as_ref())?)),
syn::Type::Path(inner) => {
Expand Down Expand Up @@ -222,12 +227,18 @@ fn convert_to_pytype(ty: &syn::Type) -> MacroResult<String> {
}
}
})
}
_ => Err(quote_spanned! {
ty.span() =>
compile_error!("Unsupported type type");
}
.into()),
},
syn::Type::Ptr(inner) => match inner.elem.as_ref() {
syn::Type::Path(path) => match path.path.segments.iter().last() {
None => Err(error.into()),
Some(last_path_seg) => match last_path_seg.ident.to_string().as_str() {
"C_Path" => Ok("Path".to_owned()),
_ => Err(error.into()),
},
},
_ => Err(error.into()),
},
_ => Err(error.into()),
}
}

Expand Down
25 changes: 24 additions & 1 deletion libprobe/generator/gen_libc_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ def wrapper_func_body(func: ParsedFunc) -> typing.Sequence[Node]:
pycparser.c_ast.FuncCall(
name=pycparser.c_ast.ID(name="DEBUG"),
args=pycparser.c_ast.ExprList(exprs=[
pycparser.c_ast.Constant(type="string", value='"' + func.name + '(...)"'),
pycparser.c_ast.Constant(type="string", value='"vvv ' + func.name + '(...)"'),
]),
),
]
Expand Down Expand Up @@ -401,13 +401,36 @@ def wrapper_func_body(func: ParsedFunc) -> typing.Sequence[Node]:
else:
call_stmts = expect_type(Compound, call_stmts_block.init).block_items

call_stmts.insert(0, pycparser.c_ast.FuncCall(
name=pycparser.c_ast.ID(name="DEBUG"),
args=pycparser.c_ast.ExprList(exprs=[
pycparser.c_ast.Constant(type="string", value='"vvv ' + func.name + ' calling real %p"'),
pycparser.c_ast.ID(name=func_prefix + func.name),
]),
))
call_stmts.append(pycparser.c_ast.FuncCall(
name=pycparser.c_ast.ID(name="DEBUG"),
args=pycparser.c_ast.ExprList(exprs=[
pycparser.c_ast.Constant(type="string", value='"^^^ ' + func.name + ' calling real"'),
]),
))

save_errno = define_var(c_ast_int, "saved_errno", pycparser.c_ast.ID(name="errno"))
restore_errno = Assignment(
op='=',
lvalue=pycparser.c_ast.ID(name="errno"),
rvalue=pycparser.c_ast.ID(name="saved_errno"),
)

post_call_stmts.append(pycparser.c_ast.FuncCall(
name=pycparser.c_ast.ID(name="DEBUG"),
args=pycparser.c_ast.ExprList(exprs=[
pycparser.c_ast.Constant(type="string", value='"^^^ ' + func.name + '(...)"'),
]),
))

# If we are doing post call operations,
# make sure to save/restore errno so caller doesn't know
if post_call_stmts:
post_call_stmts.insert(0, save_errno)
post_call_stmts.append(restore_errno)
Expand Down
16 changes: 16 additions & 0 deletions libprobe/generator/libc_hooks_source.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,14 @@ int fclose (FILE *stream) {
prov_log_try(op);
}
});
void* call = ({
int ret;
if (fileno(stream) != 2 /* Keep debug log open */) {
ret = unwrapped_fclose(stream);
} else {
ret = 0;
}
});
void* post_call = ({
if (likely(prov_log_is_enabled())) {
op.data.close.ferrno = ret == 0 ? 0 : errno;
Expand Down Expand Up @@ -296,6 +304,14 @@ int close (int filedes) {
prov_log_try(op);
}
});
void* call = ({
int ret;
if (filedes != 2 /* keep debug log open */) {
ret = unwrapped_close(filedes);
} else {
ret = 0;
}
});
void* post_call = ({
if (likely(prov_log_is_enabled())) {
op.data.close.ferrno = ret == 0 ? 0 : errno;
Expand Down
28 changes: 21 additions & 7 deletions libprobe/include/libprobe/prov_ops.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
struct SymlinkInfo;

/* TODO: rename this PathMetadata */
struct Path {
int32_t dirfd_minus_at_fdcwd;
const char* path; /* path valid if non-null */
Expand All @@ -10,15 +13,26 @@ struct Path {
size_t size;
bool stat_valid;
bool dirfd_valid;
const char* symlink_content;
const struct Path* symlink_dst;
};

static const struct Path null_path = {
.dirfd_minus_at_fdcwd = -1,
.path = NULL,
.mode_and_type = -1,
.device_major = -1,
.device_minor = -1,
.inode = -1,
.mtime = {0},
.ctime = {0},
.size = 0,
.dirfd_valid = false,
.stat_valid = false,
.symlink_content = NULL,
.symlink_dst = NULL,
};

struct SymlinkInfo {
const Path src;
const char* content;
const Path dst;
}

static const struct Path null_path = {-1, NULL, -1, -1, -1, -1, {0}, {0}, 0, false, false};
/* We don't need to free paths since I switched to the Arena allocator */
/* static void free_path(struct Path path); */

Expand Down
6 changes: 0 additions & 6 deletions libprobe/src/global_state.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,6 @@ static int get_inodes_dirfd() {

static __thread struct ArenaDir __op_arena = { 0 };
static __thread struct ArenaDir __data_arena = { 0 };
static __thread struct ArenaDir __symlink_arena = { 0 };
static const size_t prov_log_arena_size = 64 * 1024;
static void init_log_arena() {
assert(!arena_is_initialized(&__op_arena));
Expand All @@ -220,7 +219,6 @@ static void init_log_arena() {
int thread_dirfd = mkdir_and_descend(get_epoch_dirfd(), NULL, my_gettid(), true, false);
EXPECT( == 0, arena_create(&__op_arena , thread_dirfd, "ops" , prov_log_arena_size));
EXPECT( == 0, arena_create(&__data_arena , thread_dirfd, "data" , prov_log_arena_size));
EXPECT( == 0, arena_create(&__symlink_arena, thread_dirfd, "symlinks", prov_log_arena_size));
}
static struct ArenaDir* get_op_arena() {
assert(arena_is_initialized(&__op_arena));
Expand All @@ -230,10 +228,6 @@ static struct ArenaDir* get_data_arena() {
assert(arena_is_initialized(&__data_arena));
return &__data_arena;
}
static struct ArenaDir* get_symlink_arena() {
assert(arena_is_initialized(&__symlink_arena));
return &__symlink_arena;
}

char* _DEFAULT_PATH = NULL;

Expand Down
3 changes: 3 additions & 0 deletions libprobe/src/prov_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ static void prov_log_try(struct Op op) {
if (op.op_code == exec_op_code) {
prov_log_record(op);
}
DEBUG("prov_log_try");

const struct Path* path = op_to_path(&op);
if (should_copy_files() && path->path && path->stat_valid) {
Expand Down Expand Up @@ -92,12 +93,14 @@ static void prov_log_try(struct Op op) {
}
}
}
DEBUG("prov_log_try ^^^");
}

/*
* Call this to indicate that the process did something (successful or not).
*/
static void prov_log_record(struct Op op) {
DEBUG("prov_log_record vvv");
#ifdef DEBUG_LOG
char str[PATH_MAX * 2];
op_to_human_readable(str, PATH_MAX * 2, &op);
Expand Down
33 changes: 16 additions & 17 deletions libprobe/src/prov_ops.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
static void link_deref(int dirfd, BORROWED struct Path* path);

static struct Path create_path_lazy(int dirfd, BORROWED const char* path, int flags) {
if (likely(prov_log_is_enabled())) {
struct Path ret = null_path;
Expand Down Expand Up @@ -31,8 +33,8 @@ static struct Path create_path_lazy(int dirfd, BORROWED const char* path, int fl
ret.ctime = statx_buf.stx_ctime;
ret.size = statx_buf.stx_size;
ret.stat_valid = true;
if (statx_buf.stx_type & S_IFMT == S_IFLNK) {
link_deref(dirfd, path, &ret);
if ((statx_buf.stx_mode & S_IFMT) == S_IFLNK) {
link_deref(dirfd, &ret);
}
} else {
DEBUG("Stat of %d,%s is not valid", dirfd, path);
Expand All @@ -44,26 +46,23 @@ static struct Path create_path_lazy(int dirfd, BORROWED const char* path, int fl
}
}

void link_deref(int dirfd, BORROWED const char* pathname, const Path* path) {
static void link_deref(int dirfd, BORROWED struct Path* path) {
/* Some magic symlinks under (for example) /proc and /sys
report 'st_size' as zero. In that case, take PATH_MAX as
a "good enough" estimate. */
size_t symlink_size = path->size == 0 ? PATH_MAX : path->size + 1;
char* referent_pathname = EXPECT_NONNULL(arena_calloc(get_data_arena(), symlink_size, 1));
ssize_t readlink_ret = unwrapped_readlinkat(dirfd, path, referent_pathname, symlink_size);
assert(readlink_ret < symlink_size);
referent_pathname[readlink_ret] = '\0';
Path referent_path = create_path_lazy(dirfd, path, flags);
SymlinkInfo* info = EXPECT_NONNULL(arena_calloc(get_symlink_arena(), SymlinkInfo, 1));
/*
** TODO: this path gest copied twice; once in ops and once in symlinks
*/
info->src = path;
info->content = referent_pathname;
info->dst = referent_path;
/* ssize_t symlink_size = path->size == 0 ? PATH_MAX : path->size + 1; */
/* char* content = EXPECT_NONNULL(arena_calloc(get_data_arena(), symlink_size, 1)); */
/* ssize_t readlink_ret = unwrapped_readlinkat(dirfd, path->path, content, symlink_size); */
/* assert(readlink_ret < symlink_size); */
/* content[readlink_ret] = '\0'; */
/* path->symlink_content = (const char*) content; */
/* struct Path symlink_dst = create_path_lazy(dirfd, content, 0); */
/* struct Path* symlink_dst_copy = EXPECT_NONNULL(arena_calloc(get_data_arena(), sizeof(struct Path), 1)); */
/* memcpy(symlink_dst_copy, &symlink_dst, sizeof(struct Path)); */
/* path->symlink_dst = symlink_dst_copy; */
}

void path_to_id_string(const struct Path* path, BORROWED char* string) {
static void path_to_id_string(const struct Path* path, BORROWED char* string) {
CHECK_SNPRINTF(
string,
PATH_MAX,
Expand Down

0 comments on commit 7576929

Please sign in to comment.