Skip to content

Commit

Permalink
[metadata] Break up mono_metadata_parse_type_internal (#60725)
Browse files Browse the repository at this point in the history
* [metadata] Break up mono_metadata_parse_type_internal

Separate the type allocation and cleanup logic from the main decoding logic and
from the cached instance lookup.

Also free the allocated memory, if any, in case of decoding errors.
  • Loading branch information
lambdageek authored Oct 26, 2021
1 parent 13d6458 commit bf885a3
Showing 1 changed file with 211 additions and 99 deletions.
310 changes: 211 additions & 99 deletions src/mono/mono/metadata/metadata.c
Original file line number Diff line number Diff line change
Expand Up @@ -1970,6 +1970,188 @@ mono_metadata_init (void)
mono_metadata_update_init ();
}

/*
* Make a pass over the metadata signature blob starting at \p tmp_ptr and count the custom modifiers.
*/
static int
count_custom_modifiers (MonoImage *m, const char *tmp_ptr)
{
int count = 0;
gboolean found = TRUE;
while (found) {
switch (*tmp_ptr) {
case MONO_TYPE_PINNED:
case MONO_TYPE_BYREF:
++tmp_ptr;
break;
case MONO_TYPE_CMOD_REQD:
case MONO_TYPE_CMOD_OPT:
count ++;
mono_metadata_parse_custom_mod (m, NULL, tmp_ptr, &tmp_ptr);
break;
default:
found = FALSE;
}
}
return count;
}

/*
* Decode the (expected \p count, possibly 0) custom modifiers as well as the "byref" and "pinned"
* markers from the metadata stream \p ptr and put them into \p cmods
*
* Sets \p rptr past the end of the parsed metadata. Sets \p pinned and \p byref if those modifiers
* were present.
*/
static void
decode_custom_modifiers (MonoImage *m, MonoCustomModContainer *cmods, int count, const char *ptr, const char **rptr, gboolean *pinned, gboolean *byref)
{
gboolean found = TRUE;
/* cmods are encoded in reverse order from how we normally see them.
* "int32 modopt (Foo) modopt (Bar)" is encoded as "cmod_opt [typedef_or_ref "Bar"] cmod_opt [typedef_or_ref "Foo"] I4"
*/
while (found) {
switch (*ptr) {
case MONO_TYPE_PINNED:
*pinned = TRUE;
++ptr;
break;
case MONO_TYPE_BYREF:
*byref = TRUE;
++ptr;
break;
case MONO_TYPE_CMOD_REQD:
case MONO_TYPE_CMOD_OPT:
g_assert (count > 0);
mono_metadata_parse_custom_mod (m, &(cmods->modifiers [--count]), ptr, &ptr);
break;
default:
found = FALSE;
}
}

// either there were no cmods, or else we iterated through all of cmods backwards to populate it.
g_assert (count == 0);
*rptr = ptr;
}

/*
* Allocate the memory necessary to hold a \c MonoType with \p count custom modifiers.
* If \p transient is true, allocate from the heap, otherwise allocate from the mempool of image \p m
*/
static MonoType *
alloc_type_with_cmods (MonoImage *m, gboolean transient, int count)
{
g_assert (count > 0);
MonoType *type;
size_t size = mono_sizeof_type_with_mods (count, FALSE);
type = transient ? (MonoType *)g_malloc0 (size) : (MonoType *)mono_image_alloc0 (m, size);
type->has_cmods = TRUE;

MonoCustomModContainer *cmods = mono_type_get_cmods (type);
cmods->count = count;
cmods->image = m;

return type;
}

/*
* If \p transient is true, free \p type, otherwise no-op
*/
static void
free_parsed_type (MonoType *type, gboolean transient)
{
if (transient)
mono_metadata_free_type (type);
}

/*
* Try to find a pre-allocated version of the given \p type.
* Returns true and sets \p canonical_type if found, otherwise return false.
*
* For classes and valuetypes, this returns their embedded byval_arg or
* this_arg types. For base types, it returns the global versions.
*/
static gboolean
try_get_canonical_type (MonoType *type, MonoType **canonical_type)
{
/* Note: If the type has any attribtues or modifiers the function currently returns false,
* although there's no fundamental reason we can't have cached copies in those instances (or
* indeed cached arrays, pointers or some generic instances). However in that case there's
* limited utility in returning a cached copy because the parsing code in
* do_mono_metadata_parse_type could have allocated some mempool or heap memory already.
*
* This function should be kept closely in sync with mono_metadata_free_type so that it
* doesn't try to free canonical MonoTypes (which might not even be heap allocated).
*/
g_assert (!type->has_cmods);
if ((type->type == MONO_TYPE_CLASS || type->type == MONO_TYPE_VALUETYPE) && !type->pinned && !type->attrs) {
MonoType *ret = m_type_is_byref (type) ? m_class_get_this_arg (type->data.klass) : m_class_get_byval_arg (type->data.klass);

/* Consider the case:
class Foo<T> { class Bar {} }
class Test : Foo<Test>.Bar {}
When Foo<Test> is being expanded, 'Test' isn't yet initialized. It's actually in
a really pristine state: it doesn't even know whether 'Test' is a reference or a value type.
We ensure that the MonoClass is in a state that we can canonicalize to:
klass->_byval_arg.data.klass == klass
klass->this_arg.data.klass == klass
If we can't canonicalize 'type', it doesn't matter, since later users of 'type' will do it.
LOCKING: even though we don't explicitly hold a lock, in the problematic case 'ret' is a field
of a MonoClass which currently holds the loader lock. 'type' is local.
*/
if (ret->data.klass == type->data.klass) {
*canonical_type = ret;
return TRUE;
}
}

/* Maybe it's one of the globaly-known basic types */
MonoType *cached;
/* No need to use locking since nobody is modifying the hash table */
if ((cached = (MonoType *)g_hash_table_lookup (type_cache, type))) {
*canonical_type = cached;
return TRUE;
}

return FALSE;
}

/*
* Fill in \p type (expecting \p cmod_count custom modifiers) by parsing it from the metadata stream pointed at by \p ptr.
*
* On success returns true and sets \p rptr past the parsed stream data. On failure return false and sets \p error.
*/
static gboolean
do_mono_metadata_parse_type_with_cmods (MonoType *type, int cmod_count, MonoImage *m, MonoGenericContainer *container,
short opt_attrs, gboolean transient, const char *ptr, const char **rptr, MonoError *error)
{
gboolean byref= FALSE;
gboolean pinned = FALSE;

error_init (error);

/* Iterate again, but now parse pinned, byref and custom modifiers */
decode_custom_modifiers (m, mono_type_get_cmods (type), cmod_count, ptr, &ptr, &pinned, &byref);

type->attrs = opt_attrs;
type->byref__ = byref;
type->pinned = pinned ? 1 : 0;

if (!do_mono_metadata_parse_type (type, m, container, transient, ptr, &ptr, error))
return FALSE;

if (rptr)
*rptr = ptr;
return TRUE;
}

/**
* mono_metadata_parse_type:
* \param m metadata context
Expand Down Expand Up @@ -1999,16 +2181,22 @@ static MonoType*
mono_metadata_parse_type_internal (MonoImage *m, MonoGenericContainer *container,
short opt_attrs, gboolean transient, const char *ptr, const char **rptr, MonoError *error)
{
MonoType *type, *cached;
MonoType *type;
MonoType stype;
gboolean byref = FALSE;
gboolean pinned = FALSE;
const char *tmp_ptr;
int count = 0; // Number of mod arguments
gboolean found;

gboolean allocated = FALSE;

error_init (error);

/*
* Q: What's going on with `stype` and `allocated`? A: A very common case is that we're
* parsing "int" or "string" or "Dictionary<K,V>" non-transiently. In that case we don't
* want to flood the mempool with millions of copies of MonoType 'int' (etc). So we parse
* it into a stack variable and try_get_canonical_type, below. As long as the type is
* normal, we will avoid having to make an extra copy in the mempool.
*/

/*
* According to the spec, custom modifiers should come before the byref
* flag, but the IL produced by ilasm from the following signature:
Expand All @@ -2022,121 +2210,43 @@ mono_metadata_parse_type_internal (MonoImage *m, MonoGenericContainer *container
*/

/* Count the modifiers first */
tmp_ptr = ptr;
found = TRUE;
while (found) {
switch (*tmp_ptr) {
case MONO_TYPE_PINNED:
case MONO_TYPE_BYREF:
++tmp_ptr;
break;
case MONO_TYPE_CMOD_REQD:
case MONO_TYPE_CMOD_OPT:
count ++;
mono_metadata_parse_custom_mod (m, NULL, tmp_ptr, &tmp_ptr);
break;
default:
found = FALSE;
}
}

MonoCustomModContainer *cmods = NULL;
count = count_custom_modifiers (m, ptr);

if (count) { // There are mods, so the MonoType will be of nonstandard size.
allocated = TRUE;
if (count > 64) {
mono_error_set_bad_image (error, m, "Invalid type with more than 64 modifiers");
return NULL;
}

size_t size = mono_sizeof_type_with_mods (count, FALSE);
type = transient ? (MonoType *)g_malloc0 (size) : (MonoType *)mono_image_alloc0 (m, size);
type->has_cmods = TRUE;

cmods = mono_type_get_cmods (type);
cmods->count = count;
cmods->image = m;
type = alloc_type_with_cmods (m, transient, count);
} else { // The type is of standard size, so we can allocate it on the stack.
type = &stype;
memset (type, 0, MONO_SIZEOF_TYPE);
}

/* Iterate again, but now parse pinned, byref and custom modifiers */
found = TRUE;
/* cmods are encoded in reverse order from how we normally see them.
* "int32 modopt (Foo) modopt (Bar)" is encoded as "cmod_opt [typedef_or_ref "Bar"] cmod_opt [typedef_or_ref "Foo"] I4"
*/
while (found) {
switch (*ptr) {
case MONO_TYPE_PINNED:
pinned = TRUE;
++ptr;
break;
case MONO_TYPE_BYREF:
byref = TRUE;
++ptr;
break;
case MONO_TYPE_CMOD_REQD:
case MONO_TYPE_CMOD_OPT:
mono_metadata_parse_custom_mod (m, &(cmods->modifiers [--count]), ptr, &ptr);
break;
default:
found = FALSE;
}
}

// either there were no cmods, or else we iterated through all of cmods backwards to populate it.
g_assert (count == 0);

type->attrs = opt_attrs;
type->byref__ = byref;
type->pinned = pinned ? 1 : 0;

if (!do_mono_metadata_parse_type (type, m, container, transient, ptr, &ptr, error))
if (!do_mono_metadata_parse_type_with_cmods (type, count, m, container, opt_attrs, transient, ptr, rptr, error)) {
if (allocated)
free_parsed_type (type, transient);
return NULL;

if (rptr)
*rptr = ptr;
}


// Possibly we can return an already-allocated type instead of the one we decoded
if (!type->has_cmods && !transient) {
if (!allocated && !transient) {
/* no need to free type here, because it is on the stack */
if ((type->type == MONO_TYPE_CLASS || type->type == MONO_TYPE_VALUETYPE) && !type->pinned && !type->attrs) {
MonoType *ret = m_type_is_byref (type) ? m_class_get_this_arg (type->data.klass) : m_class_get_byval_arg (type->data.klass);

/* Consider the case:
class Foo<T> { class Bar {} }
class Test : Foo<Test>.Bar {}
When Foo<Test> is being expanded, 'Test' isn't yet initialized. It's actually in
a really pristine state: it doesn't even know whether 'Test' is a reference or a value type.
We ensure that the MonoClass is in a state that we can canonicalize to:
klass->_byval_arg.data.klass == klass
klass->this_arg.data.klass == klass
If we can't canonicalize 'type', it doesn't matter, since later users of 'type' will do it.
LOCKING: even though we don't explicitly hold a lock, in the problematic case 'ret' is a field
of a MonoClass which currently holds the loader lock. 'type' is local.
*/
if (ret->data.klass == type->data.klass) {
return ret;
}
}
/* No need to use locking since nobody is modifying the hash table */
if ((cached = (MonoType *)g_hash_table_lookup (type_cache, type))) {
return cached;
}
MonoType *ret_type = NULL;
if (try_get_canonical_type (type, &ret_type))
return ret_type;
}

/* printf ("%x %x %c %s\n", type->attrs, type->num_mods, type->pinned ? 'p' : ' ', mono_type_full_name (type)); */

if (type == &stype) { // Type was allocated on the stack, so we need to copy it to safety

// Otherwise return the type we decoded
if (!allocated) { // Type was allocated on the stack, so we need to copy it to safety
type = transient ? (MonoType *)g_malloc (MONO_SIZEOF_TYPE) : (MonoType *)mono_image_alloc (m, MONO_SIZEOF_TYPE);
memcpy (type, &stype, MONO_SIZEOF_TYPE);
}
g_assert (type != &stype);
return type;
}

Expand Down Expand Up @@ -3977,6 +4087,8 @@ do_mono_metadata_parse_type (MonoType *type, MonoImage *m, MonoGenericContainer
void
mono_metadata_free_type (MonoType *type)
{
/* Note: keep in sync with do_mono_metadata_parse_type and try_get_canonical_type which
* allocate memory or try to avoid allocating memory. */
if (type >= builtin_types && type < builtin_types + NBUILTIN_TYPES ())
return;

Expand Down

0 comments on commit bf885a3

Please sign in to comment.