Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deterministic serialisation for cross binary communication #4567

Merged
merged 5 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions packages/serialise/serialise.pony
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,18 @@ invariants. However, if only "trusted" data (i.e. data produced by Pony
serialisation from the same binary) is deserialised, it will always maintain a
well-formed heap and all object invariants.

Note that serialised data is not usable between different Pony binaries. This is
due to the use of type identifiers rather than a heavy-weight self-describing
serialisation schema. This also means it isn't safe to deserialise something
serialised by the same program compiled for a different platform.
Note that serialised data can be used between binaries compiled with the same
version of the pony compiler. Cross binary serialisation will only work for
binaries of the same bit width (32 bit vs 64 bit), data model (ilp32, lp64, or
llp64), and endianness (big endian or little endian) but is not limited to a
single platform (for example: one can mix and match x86_64 linux and aarch64
linux because they have the same bitwidth, data model, and endianness).

The [Serialise.signature](serialise-Serialise.md#signature) method is provided
for the purposes of comparing communicating Pony binaries to determine if they
are the same. Confirming this before deserialising data can help mitigate the
risk of accidental serialisation across different Pony binaries, but does not on
its own address the security issues of accepting data from untrusted sources.
are compatible. Confirming this before deserialising data can help mitigate the
risk of accidental serialisation across incompatible Pony binaries, but does not
on its own address the security issues of accepting data from untrusted sources.
"""

use @"internal.signature"[Array[U8] val]()
Expand Down
3 changes: 3 additions & 0 deletions src/libponyc/ast/ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -2013,6 +2013,7 @@ static pony_type_t ast_signature_pony =
sizeof(ast_signature_t),
0,
0,
0,
NULL,
NULL,
ast_signature_serialise_trace,
Expand Down Expand Up @@ -2092,6 +2093,7 @@ static pony_type_t ast_nominal_pkg_id_signature_pony =
sizeof(ast_signature_t),
0,
0,
0,
NULL,
NULL,
ast_nominal_pkg_id_signature_serialise_trace,
Expand Down Expand Up @@ -2355,6 +2357,7 @@ static pony_type_t ast_pony =
sizeof(ast_t),
0,
0,
0,
NULL,
NULL,
ast_serialise_trace,
Expand Down
1 change: 1 addition & 0 deletions src/libponyc/ast/source.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ static pony_type_t source_pony =
sizeof(source_t),
0,
0,
0,
NULL,
NULL,
source_serialise_trace,
Expand Down
2 changes: 2 additions & 0 deletions src/libponyc/ast/stringtab.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ static __pony_thread_local struct _pony_type_t string_pony =
0,
0,
0,
0,
NULL,
NULL,
NULL,
Expand Down Expand Up @@ -220,6 +221,7 @@ static pony_type_t strlist_pony =
sizeof(strlist_t),
0,
0,
0,
NULL,
NULL,
strlist_serialise_trace,
Expand Down
1 change: 1 addition & 0 deletions src/libponyc/ast/symtab.c
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ static pony_type_t symbol_pony =
sizeof(symbol_t),
0,
0,
0,
NULL,
NULL,
symbol_serialise_trace,
Expand Down
3 changes: 3 additions & 0 deletions src/libponyc/ast/token.c
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ static pony_type_t token_signature_pony =
sizeof(token_signature_t),
0,
0,
0,
NULL,
NULL,
token_signature_serialise_trace,
Expand Down Expand Up @@ -458,6 +459,7 @@ static pony_type_t token_docstring_signature_pony =
sizeof(token_signature_t),
0,
0,
0,
NULL,
NULL,
token_docstring_signature_serialise_trace,
Expand Down Expand Up @@ -546,6 +548,7 @@ static pony_type_t token_pony =
sizeof(token_t),
0,
0,
0,
NULL,
NULL,
token_serialise_trace,
Expand Down
26 changes: 26 additions & 0 deletions src/libponyc/codegen/codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "../../libponyrt/mem/pool.h"
#include "ponyassert.h"

#include <blake2.h>
#include <platform.h>
#include <llvm-c/DebugInfo.h>
#include <llvm-c/Initialization.h>
Expand Down Expand Up @@ -202,6 +203,13 @@ static void init_runtime(compile_t* c)
c->msg_type = LLVMStructCreateNamed(c->context, "__message");
LLVMStructSetBody(c->msg_type, params, 2, false);

// descriptor_offset_lookup
// uint32_t (*)(size_t)
params[0] = target_is_ilp32(c->opt->triple) ? c->i32 : c->i64;
c->descriptor_offset_lookup_type = LLVMFunctionType(c->i32, params, 1, false);
c->descriptor_offset_lookup_fn =
LLVMPointerType(c->descriptor_offset_lookup_type, 0);

// trace
// void (*)(i8*, __object*)
params[0] = c->ptr;
Expand Down Expand Up @@ -856,6 +864,21 @@ bool codegen_pass_init(pass_opt_t* opt)
else
opt->cpu = LLVMGetHostCPUName();

opt->serialise_id_hash_key = (unsigned char*)ponyint_pool_alloc_size(16);

const char* version = "pony-" PONY_VERSION;
const char* data_model = target_is_ilp32(opt->triple) ? "ilp32" : (target_is_lp64(opt->triple) ? "lp64" : (target_is_llp64(opt->triple) ? "llp64" : "unknown"));
const char* endian = target_is_bigendian(opt->triple) ? "be" : "le";

printbuf_t* target_version_buf = printbuf_new();
printbuf(target_version_buf, "%s-%s-%s", version, data_model, endian);

int status = blake2b(opt->serialise_id_hash_key, 16, target_version_buf->m, target_version_buf->offset, NULL, 0);
(void)status;
pony_assert(status == 0);

printbuf_free(target_version_buf);

return true;
}

Expand All @@ -872,6 +895,9 @@ void codegen_pass_cleanup(pass_opt_t* opt)
opt->triple = NULL;
opt->cpu = NULL;
opt->features = NULL;

ponyint_pool_free_size(16, opt->serialise_id_hash_key);
opt->serialise_id_hash_key = NULL;
}

bool codegen(ast_t* program, pass_opt_t* opt)
Expand Down
3 changes: 3 additions & 0 deletions src/libponyc/codegen/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ typedef struct compile_t
LLVMValueRef primitives_init;
LLVMValueRef primitives_final;
LLVMValueRef desc_table;
LLVMValueRef desc_table_offset_lookup_fn;
LLVMValueRef numeric_sizes;

LLVMTypeRef void_type;
Expand All @@ -187,6 +188,8 @@ typedef struct compile_t

LLVMTypeRef ptr;
LLVMTypeRef descriptor_type;
LLVMTypeRef descriptor_offset_lookup_type;
LLVMTypeRef descriptor_offset_lookup_fn;
LLVMTypeRef field_descriptor;
LLVMTypeRef object_type;
LLVMTypeRef msg_type;
Expand Down
92 changes: 74 additions & 18 deletions src/libponyc/codegen/gendesc.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,25 @@

#define DESC_ID 0
#define DESC_SIZE 1
#define DESC_FIELD_COUNT 2
#define DESC_FIELD_OFFSET 3
#define DESC_INSTANCE 4
#define DESC_TRACE 5
#define DESC_SERIALISE_TRACE 6
#define DESC_SERIALISE 7
#define DESC_DESERIALISE 8
#define DESC_CUSTOM_SERIALISE_SPACE 9
#define DESC_CUSTOM_DESERIALISE 10
#define DESC_DISPATCH 11
#define DESC_FINALISE 12
#define DESC_EVENT_NOTIFY 13
#define DESC_MIGHT_REFERENCE_ACTOR 14
#define DESC_TRAITS 15
#define DESC_FIELDS 16
#define DESC_VTABLE 17

#define DESC_LENGTH 18
#define DESC_SERIALISEID 2
#define DESC_FIELD_COUNT 3
#define DESC_FIELD_OFFSET 4
#define DESC_INSTANCE 5
#define DESC_TRACE 6
#define DESC_SERIALISE_TRACE 7
#define DESC_SERIALISE 8
#define DESC_DESERIALISE 9
#define DESC_CUSTOM_SERIALISE_SPACE 10
#define DESC_CUSTOM_DESERIALISE 11
#define DESC_DISPATCH 12
#define DESC_FINALISE 13
#define DESC_EVENT_NOTIFY 14
#define DESC_MIGHT_REFERENCE_ACTOR 15
#define DESC_TRAITS 16
#define DESC_FIELDS 17
#define DESC_VTABLE 18

#define DESC_LENGTH 19

static LLVMValueRef make_unbox_function(compile_t* c, reach_type_t* t,
reach_method_t* m)
Expand Down Expand Up @@ -330,6 +331,7 @@ void gendesc_basetype(compile_t* c, LLVMTypeRef desc_type)

params[DESC_ID] = c->i32;
params[DESC_SIZE] = c->i32;
params[DESC_SERIALISEID] = target_is_ilp32(c->opt->triple) ? c->i32 : c->i64;
params[DESC_FIELD_COUNT] = c->i32;
params[DESC_FIELD_OFFSET] = c->i32;
params[DESC_INSTANCE] = c->ptr;
Expand Down Expand Up @@ -377,6 +379,7 @@ void gendesc_type(compile_t* c, reach_type_t* t)

params[DESC_ID] = c->i32;
params[DESC_SIZE] = c->i32;
params[DESC_SERIALISEID] = target_is_ilp32(c->opt->triple) ? c->i32 : c->i64;
params[DESC_FIELD_COUNT] = c->i32;
params[DESC_FIELD_OFFSET] = c->i32;
params[DESC_INSTANCE] = c->ptr;
Expand Down Expand Up @@ -414,6 +417,7 @@ void gendesc_init(compile_t* c, reach_type_t* t)
LLVMValueRef args[DESC_LENGTH];
args[DESC_ID] = LLVMConstInt(c->i32, t->type_id, false);
args[DESC_SIZE] = LLVMConstInt(c->i32, c_t->abi_size, false);
args[DESC_SERIALISEID] = LLVMConstInt(target_is_ilp32(c->opt->triple) ? c->i32 : c->i64, t->serialise_id, false);
args[DESC_FIELD_COUNT] = make_field_count(c, t);
args[DESC_FIELD_OFFSET] = make_field_offset(c, t);
args[DESC_INSTANCE] = make_desc_ptr(c, c_t->instance);
Expand Down Expand Up @@ -478,6 +482,58 @@ void gendesc_table(compile_t* c)
ponyint_pool_free_size(size, args);
}

void gendesc_table_lookup(compile_t* c)
{
reach_type_t* t;
size_t i = HASHMAP_BEGIN;

LLVMValueRef desc_lkp_fn = codegen_addfun(c, "__DescOffsetLookupFn",
c->descriptor_offset_lookup_type, false);
codegen_startfun(c, desc_lkp_fn, NULL, NULL, NULL, false);
LLVMSetFunctionCallConv(desc_lkp_fn, LLVMCCallConv);
LLVMSetLinkage(desc_lkp_fn, LLVMExternalLinkage);

LLVMBasicBlockRef unreachable = codegen_block(c, "unreachable");

// Read the serialise ID.
LLVMValueRef serialise_id = LLVMGetParam(desc_lkp_fn, 0);

// switch based on serialise_id
LLVMValueRef serialise_switch = LLVMBuildSwitch(c->builder, serialise_id, unreachable, 0);

// the default case is unreachable unless something major has gone wrong
LLVMPositionBuilderAtEnd(c->builder, unreachable);

LLVMValueRef ret = LLVMConstInt(c->i32, (uint32_t)-1, false);
LLVMBuildRet(c->builder, ret);

while((t = reach_types_next(&c->reach->types, &i)) != NULL)
{
if(t->is_trait || (t->underlying == TK_STRUCT))
continue;

pony_assert(t->serialise_id != (uint64_t)-1);

LLVMBasicBlockRef type_block = codegen_block(c,
genname_type_with_id(t->name, t->serialise_id));

LLVMAddCase(serialise_switch, LLVMConstInt(target_is_ilp32(c->opt->triple) ? c->i32 : c->i64, t->serialise_id, false),
type_block);

LLVMPositionBuilderAtEnd(c->builder, type_block);

ret = LLVMConstInt(c->i32, t->type_id, false);
LLVMBuildRet(c->builder, ret);
}

// Mark the default case as unreachable.
LLVMPositionBuilderAtEnd(c->builder, unreachable);

codegen_finishfun(c);

c->desc_table_offset_lookup_fn = make_desc_ptr(c, desc_lkp_fn);
}

static LLVMValueRef desc_field(compile_t* c, LLVMValueRef desc, int index)
{
LLVMTypeRef field_type = LLVMStructGetTypeAtIndex(c->descriptor_type, index);
Expand Down
2 changes: 2 additions & 0 deletions src/libponyc/codegen/gendesc.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ void gendesc_init(compile_t* c, reach_type_t* t);

void gendesc_table(compile_t* c);

void gendesc_table_lookup(compile_t* c);

LLVMValueRef gendesc_fetch(compile_t* c, LLVMValueRef object);

LLVMValueRef gendesc_typeid(compile_t* c, LLVMValueRef desc);
Expand Down
10 changes: 8 additions & 2 deletions src/libponyc/codegen/genexe.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,16 @@ static LLVMValueRef make_lang_features_init(compile_t* c)
boolean = c->i8;

uint32_t desc_table_size = reach_max_type_id(c->reach);
LLVMValueRef desc_table_lookup_fn = c->desc_table_offset_lookup_fn;

LLVMTypeRef f_params[4];
LLVMTypeRef f_params[5];
f_params[0] = boolean;
f_params[1] = boolean;
f_params[2] = c->ptr;
f_params[3] = c->intptr;
f_params[4] = c->descriptor_offset_lookup_fn;

LLVMTypeRef lfi_type = LLVMStructTypeInContext(c->context, f_params, 4,
LLVMTypeRef lfi_type = LLVMStructTypeInContext(c->context, f_params, 5,
false);

LLVMBasicBlockRef this_block = LLVMGetInsertBlock(c->builder);
Expand Down Expand Up @@ -89,6 +91,10 @@ static LLVMValueRef make_lang_features_init(compile_t* c)
LLVMBuildStore(c->builder, LLVMConstInt(c->intptr, desc_table_size, false),
field);

field = LLVMBuildStructGEP2(c->builder, lfi_type, lfi_object, 4, "");
LLVMBuildStore(c->builder, LLVMBuildBitCast(c->builder, desc_table_lookup_fn,
c->descriptor_offset_lookup_fn, ""), field);

return lfi_object;
}

Expand Down
7 changes: 7 additions & 0 deletions src/libponyc/codegen/genname.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,10 @@ const char* genname_program_fn(const char* program, const char* name)
{
return stringtab_two(program, name);
}

const char* genname_type_with_id(const char* type, uint64_t type_id)
{
printbuf_t* buf = printbuf_new();
printbuf(buf, "%s_%" PRIu64, type, type_id);
return stringtab_buf(buf);
}
2 changes: 2 additions & 0 deletions src/libponyc/codegen/genname.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ const char* genname_unsafe(const char* name);

const char* genname_program_fn(const char* program, const char* name);

const char* genname_type_with_id(const char* type, uint64_t type_id);

PONY_EXTERN_C_END

#endif
8 changes: 4 additions & 4 deletions src/libponyc/codegen/genprim.c
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,7 @@ void genprim_array_serialise(compile_t* c, reach_type_t* t)
LLVMValueRef offset_addr = LLVMBuildInBoundsGEP2(c->builder, c->i8, addr,
&offset, 1, "");

genserialise_typeid(c, t, offset_addr);
genserialise_serialiseid(c, t, offset_addr);

// Don't serialise our contents if we are opaque.
LLVMBasicBlockRef body_block = codegen_block(c, "body");
Expand Down Expand Up @@ -890,7 +890,7 @@ void genprim_array_deserialise(compile_t* c, reach_type_t* t)

LLVMValueRef ctx = LLVMGetParam(c_t->deserialise_fn, 0);
LLVMValueRef object = LLVMGetParam(c_t->deserialise_fn, 1);
gendeserialise_typeid(c, c_t, object);
gendeserialise_serialiseid(c, c_t, object);

// Deserialise the array contents.
LLVMValueRef alloc = field_value(c, c_t->structure, object, 2);
Expand Down Expand Up @@ -1010,7 +1010,7 @@ void genprim_string_serialise(compile_t* c, reach_type_t* t)
LLVMValueRef offset_addr = LLVMBuildInBoundsGEP2(c->builder, c->i8, addr,
&offset, 1, "");

genserialise_typeid(c, t, offset_addr);
genserialise_serialiseid(c, t, offset_addr);

// Don't serialise our contents if we are opaque.
LLVMBasicBlockRef body_block = codegen_block(c, "body");
Expand Down Expand Up @@ -1069,7 +1069,7 @@ void genprim_string_deserialise(compile_t* c, reach_type_t* t)
LLVMValueRef ctx = LLVMGetParam(c_t->deserialise_fn, 0);
LLVMValueRef object = LLVMGetParam(c_t->deserialise_fn, 1);

gendeserialise_typeid(c, c_t, object);
gendeserialise_serialiseid(c, c_t, object);

// Deserialise the string contents.
LLVMValueRef alloc = field_value(c, c_t->structure, object, 2);
Expand Down
Loading
Loading