Skip to content

Commit

Permalink
gf: improve ordering of operations based on performance estimates (#3…
Browse files Browse the repository at this point in the history
…6436)

In the last commit, I didn't expect NamedTuple to hammer our
performance, but was starting to notice performance problems with trying
to call constructors. It's somewhat violating our best-practices in two
common cases (both the constructor and structdiff functions are
dispatching on non-leaf types instead of values). That was putting a lot
of strain on the cache, and so it forms a good test case. Keeping those
cases out of the cache, and searching the cache in a more suitable order
(significant mainly for constructors because there are always so many of
them), offsets that--and seems to possibly make us slightly faster
overall as a bonus because of that effect!
  • Loading branch information
vtjnash authored Jun 26, 2020
1 parent ab520c7 commit d762e8c
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 8 deletions.
35 changes: 27 additions & 8 deletions src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1139,8 +1139,11 @@ static jl_method_instance_t *cache_method(

jl_typemap_entry_t *newentry = jl_typemap_alloc(cachett, simplett, guardsigs, (jl_value_t*)newmeth, min_valid, max_valid);
temp = (jl_value_t*)newentry;
if (mt && cachett == tt && jl_svec_len(guardsigs) == 0) {
if (!jl_has_free_typevars((jl_value_t*)tt) && jl_lookup_cache_type_(tt) == NULL) {
if (mt && cachett == tt && jl_svec_len(guardsigs) == 0 && tt->hash && !tt->hasfreetypevars) {
// we check `tt->hash` exists, since otherwise the NamedTuple
// constructor and `structdiff` method pollutes this lookup with a lot
// of garbage in the linear table search
if (jl_lookup_cache_type_(tt) == NULL) {
// if this type isn't normally in the cache, force it in there now
// anyways so that we can depend on it as a token (especially since
// we just cached it in memory as this method signature anyways)
Expand Down Expand Up @@ -1948,6 +1951,11 @@ jl_tupletype_t *arg_type_tuple(jl_value_t *arg1, jl_value_t **args, size_t nargs
return jl_inst_arg_tuple_type(arg1, args, nargs, 1);
}

jl_tupletype_t *lookup_arg_type_tuple(jl_value_t *arg1 JL_PROPAGATES_ROOT, jl_value_t **args, size_t nargs)
{
return jl_lookup_arg_tuple_type(arg1, args, nargs, 1);
}

jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world)
{
assert(nargs > 0 && "expected caller to handle this case");
Expand Down Expand Up @@ -2449,14 +2457,25 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
// if no method was found in the associative cache, check the full cache
JL_TIMING(METHOD_LOOKUP_FAST);
mt = jl_gf_mtable(F);
entry = jl_typemap_assoc_exact(mt->cache, F, args, nargs, jl_cachearg_offset(mt), world);
jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
entry = NULL;
if (leafcache != (jl_array_t*)jl_an_empty_vec_any && jl_typeis(mt->cache, jl_typemap_level_type)) {
// hashing args is expensive, but looking at mt->cache is probably even more expensive
tt = lookup_arg_type_tuple(F, args, nargs);
if (tt != NULL)
entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
}
if (entry == NULL) {
last_alloc = jl_options.malloc_log ? jl_gc_diff_total_bytes() : 0;
tt = arg_type_tuple(F, args, nargs);
jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
entry = jl_typemap_assoc_exact(mt->cache, F, args, nargs, jl_cachearg_offset(mt), world);
if (entry == NULL) {
last_alloc = jl_options.malloc_log ? jl_gc_diff_total_bytes() : 0;
if (tt == NULL) {
tt = arg_type_tuple(F, args, nargs);
entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
}
}
}
if (entry && entry->isleafsig && entry->simplesig == (void*)jl_nothing && entry->guardsigs == jl_emptysvec) {
if (entry != NULL && entry->isleafsig && entry->simplesig == (void*)jl_nothing && entry->guardsigs == jl_emptysvec) {
// put the entry into the cache if it's valid for a leafsig lookup,
// using pick_which to slightly randomize where it ends up
call_cache[cache_idx[++pick_which[cache_idx[0]] & 3]] = entry;
Expand Down
5 changes: 5 additions & 0 deletions src/jltypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -1513,6 +1513,11 @@ jl_datatype_t *jl_inst_concrete_tupletype_v(jl_value_t **p, size_t np)
return (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, NULL, p, np, 1, NULL, NULL);
}

jl_tupletype_t *jl_lookup_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size_t nargs, int leaf)
{
return (jl_datatype_t*)lookup_typevalue(jl_tuple_typename, arg1, args, nargs, leaf);
}

jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size_t nargs, int leaf)
{
jl_tupletype_t *tt = (jl_datatype_t*)lookup_typevalue(jl_tuple_typename, arg1, args, nargs, leaf);
Expand Down
1 change: 1 addition & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,7 @@ int jl_has_concrete_subtype(jl_value_t *typ);
jl_datatype_t *jl_inst_concrete_tupletype_v(jl_value_t **p, size_t np) JL_ALWAYS_LEAFTYPE;
jl_datatype_t *jl_inst_concrete_tupletype(jl_svec_t *p) JL_ALWAYS_LEAFTYPE;
jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size_t nargs, int leaf);
jl_tupletype_t *jl_lookup_arg_tuple_type(jl_value_t *arg1 JL_PROPAGATES_ROOT, jl_value_t **args, size_t nargs, int leaf);
JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method, jl_tupletype_t *simpletype);
jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_args_t fptr) JL_GC_DISABLED;
jl_value_t *jl_type_intersection_env_s(jl_value_t *a, jl_value_t *b, jl_svec_t **penv, int *issubty);
Expand Down

7 comments on commit d762e8c

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily benchmark build, I will reply here when finished:

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here. cc @ararslan

@vtjnash
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, that’s an unexpected lot of zeros!

@vtjnash
Copy link
Member Author

@vtjnash vtjnash commented on d762e8c Jun 27, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To put a couple of those in absolute terms (b15f6ad / ab520c7 / d762e8c), assuming I didn't just break the benchmarking tool somehow on a couple of these, lol:

["shootout"]["meteor_contest"] 1.517s 7.014s 1.513s
["sort"]["issorted"]
  ("reverse", "ascending") = 3.361μs 7.250μs 115ns
  ("reverse", "random") => 3.441μs 7.205μs 108ns
["collection"]["deletion"]
  ("Set", "Any", "pop!") => 12.494ms 29.153ms 104.961μs
["dates"]["parse"]
  ("Date", "DateFormat") => 14.981μs 78.495μs 16.246μs
["io"]["array_limit"]
  ("display", "Array{Float64,1}(100000000,)") => 318.152μs 129.989μs 52.161μs
  ("display", "Array{Float64,2}(100000000, 1)") => 316.363μs 129.568μs 52.492μs
  ("display", "Array{Float64,2}(10000, 10000)") => 2.300ms 1.026ms 396.583μs
["parallel"]["remotecall"]
  ("identity", 2) => 134.320μs 213.661μs 86.221μs
["array"]["cat"]
  "4467" => 4.069μs 14.642μs 1.419μs

@vtjnash
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nanosoldier runbenchmarks(ALL, vs="@52c55d7934f71c5b2d9f6e6fa98cb48817def57c")

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily package evaluation, I will reply here when finished:

@nanosoldier runtests(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here. cc @ararslan

Please sign in to comment.