Skip to content

Commit

Permalink
[interp] Replace ldloca + ldfld/stfld pairs with ldloc/stloc (#33952)
Browse files Browse the repository at this point in the history
If loaded/stored field is the only field of the valuetype. This is meant to mainly optimize IntPtr accesses, which contains a single void* field and is heavily used in Unsafe intrinsics.

Co-authored-by: BrzVlad <[email protected]>
  • Loading branch information
monojenkins and BrzVlad authored Mar 24, 2020
1 parent 3d8073d commit 2c4fd80
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 23 deletions.
1 change: 1 addition & 0 deletions src/mono/mono/mini/interp/interp-internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ typedef struct {
gint32 movlocs;
gint32 copy_propagations;
gint32 constant_folds;
gint32 ldlocas_removed;
gint32 killed_instructions;
gint32 emitted_instructions;
gint32 super_instructions;
Expand Down
1 change: 1 addition & 0 deletions src/mono/mono/mini/interp/interp.c
Original file line number Diff line number Diff line change
Expand Up @@ -7649,6 +7649,7 @@ register_interp_stats (void)
mono_counters_register ("Copy propagations", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.copy_propagations);
mono_counters_register ("Added pop count", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.added_pop_count);
mono_counters_register ("Constant folds", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.constant_folds);
mono_counters_register ("Ldlocas removed", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.ldlocas_removed);
mono_counters_register ("Super instructions", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.super_instructions);
mono_counters_register ("Killed instructions", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.killed_instructions);
mono_counters_register ("Emitted instructions", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.emitted_instructions);
Expand Down
1 change: 1 addition & 0 deletions src/mono/mono/mini/interp/mintops.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ typedef enum {
#define MINT_IS_LDLOCFLD(op) ((op) >= MINT_LDLOCFLD_I1 && (op) <= MINT_LDLOCFLD_O)
#define MINT_IS_STLOCFLD(op) ((op) >= MINT_STLOCFLD_I1 && (op) <= MINT_STLOCFLD_O)
#define MINT_IS_LOCUNOP(op) ((op) >= MINT_LOCADD1_I4 && (op) <= MINT_LOCSUB1_I8)
#define MINT_IS_LDFLD(op) ((op) >= MINT_LDFLD_I1 && (op) <= MINT_LDFLD_O)


#define MINT_POP_ALL -2
Expand Down
81 changes: 60 additions & 21 deletions src/mono/mono/mini/interp/transform.c
Original file line number Diff line number Diff line change
Expand Up @@ -842,6 +842,7 @@ create_interp_local (TransformData *td, MonoType *type)
td->locals [td->locals_size].type = type;
td->locals [td->locals_size].mt = mint_type (type);
td->locals [td->locals_size].flags = 0;
td->locals [td->locals_size].indirects = 0;
td->locals [td->locals_size].offset = -1;
td->locals_size++;
return td->locals_size - 1;
Expand Down Expand Up @@ -2899,6 +2900,7 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
imethod->local_offsets [i] = offset;
td->locals [i].offset = offset;
td->locals [i].flags = 0;
td->locals [i].indirects = 0;
td->locals [i].type = header->locals [i];
td->locals [i].mt = mint_type (header->locals [i]);
offset += size;
Expand Down Expand Up @@ -3571,7 +3573,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
int loc_n = arg_locals [n];
interp_add_ins (td, MINT_LDLOCA_S);
td->last_ins->data [0] = loc_n;
td->locals [loc_n].flags |= INTERP_LOCAL_FLAG_INDIRECT;
td->locals [loc_n].indirects++;
}
PUSH_SIMPLE_TYPE(td, STACK_TYPE_MP);
td->ip += 2;
Expand Down Expand Up @@ -3601,7 +3603,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
if (inlining)
loc_n = local_locals [loc_n];
td->last_ins->data [0] = loc_n;
td->locals [loc_n].flags |= INTERP_LOCAL_FLAG_INDIRECT;
td->locals [loc_n].indirects++;
PUSH_SIMPLE_TYPE(td, STACK_TYPE_MP);
td->ip += 2;
break;
Expand Down Expand Up @@ -4738,7 +4740,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
store_local (td, local);
interp_add_ins (td, MINT_LDLOCA_S);
td->last_ins->data [0] = local;
td->locals [local].flags |= INTERP_LOCAL_FLAG_INDIRECT;
td->locals [local].indirects++;
PUSH_SIMPLE_TYPE (td, STACK_TYPE_MP);
} else {
interp_add_ins (td, MINT_UNBOX);
Expand Down Expand Up @@ -6109,7 +6111,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
int loc_n = arg_locals [n];
interp_add_ins (td, MINT_LDLOCA_S);
td->last_ins->data [0] = loc_n;
td->locals [loc_n].flags |= INTERP_LOCAL_FLAG_INDIRECT;
td->locals [loc_n].indirects++;
}
PUSH_SIMPLE_TYPE(td, STACK_TYPE_MP);
td->ip += 3;
Expand Down Expand Up @@ -6139,7 +6141,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
if (inlining)
loc_n = local_locals [loc_n];
td->last_ins->data [0] = loc_n;
td->locals [loc_n].flags |= INTERP_LOCAL_FLAG_INDIRECT;
td->locals [loc_n].indirects++;
PUSH_SIMPLE_TYPE(td, STACK_TYPE_MP);
td->ip += 3;
break;
Expand Down Expand Up @@ -6710,8 +6712,9 @@ interp_local_deadce (TransformData *td, int *local_ref_count)

for (int i = 0; i < td->locals_size; i++) {
g_assert (local_ref_count [i] >= 0);
g_assert (td->locals [i].indirects >= 0);
if (!local_ref_count [i] &&
(td->locals [i].flags & INTERP_LOCAL_FLAG_INDIRECT) == 0 &&
!td->locals [i].indirects &&
(td->locals [i].flags & INTERP_LOCAL_FLAG_DEAD) == 0) {
needs_dce = TRUE;
// If we do another deadce iteration over the code, make sure we don't try
Expand All @@ -6729,19 +6732,19 @@ interp_local_deadce (TransformData *td, int *local_ref_count)
// Kill instructions that don't use stack and are storing into dead locals
for (ins = td->first_ins; ins != NULL; ins = ins->next) {
if (MINT_IS_STLOC_NP (ins->opcode)) {
if (!local_ref_count [ins->data [0]] && (td->locals [ins->data [0]].flags & INTERP_LOCAL_FLAG_INDIRECT) == 0) {
if (!local_ref_count [ins->data [0]] && !td->locals [ins->data [0]].indirects) {
interp_clear_ins (td, ins);
mono_interp_stats.killed_instructions++;
// We killed an instruction that makes use of the stack. This might uncover new optimizations
needs_cprop = TRUE;
}
} else if (MINT_IS_MOVLOC (ins->opcode)) {
if (!local_ref_count [ins->data [1]] && (td->locals [ins->data [1]].flags & INTERP_LOCAL_FLAG_INDIRECT) == 0) {
if (!local_ref_count [ins->data [1]] && !td->locals [ins->data [1]].indirects) {
interp_clear_ins (td, ins);
mono_interp_stats.killed_instructions++;
}
} else if (MINT_IS_STLOC (ins->opcode) && ins->opcode != MINT_STLOC_VT) {
if (!local_ref_count [ins->data [0]] && (td->locals [ins->data [0]].flags & INTERP_LOCAL_FLAG_INDIRECT) == 0) {
if (!local_ref_count [ins->data [0]] && !td->locals [ins->data [0]].indirects) {
// We store to a dead stloc, we can replace it with a POP to save local space
ins->opcode = MINT_POP;
mono_interp_stats.added_pop_count++;
Expand Down Expand Up @@ -7066,7 +7069,7 @@ interp_cprop (TransformData *td)
if (replace_op) {
int stored_local = prev_ins->data [0];
sp->ins = NULL;
if (sp->val.type == STACK_VALUE_NONE && !(td->locals [stored_local].flags & INTERP_LOCAL_FLAG_INDIRECT)) {
if (sp->val.type == STACK_VALUE_NONE && !td->locals [stored_local].indirects) {
// We know what local is on the stack now. Track it
sp->val.type = STACK_VALUE_LOCAL;
sp->val.local = stored_local;
Expand All @@ -7086,12 +7089,12 @@ interp_cprop (TransformData *td)
}
}
} else if (locals [loaded_local].type == STACK_VALUE_LOCAL) {
g_assert (!(td->locals [loaded_local].flags & INTERP_LOCAL_FLAG_INDIRECT));
g_assert (!td->locals [loaded_local].indirects);
// do copy propagation of the original source
mono_interp_stats.copy_propagations++;
local_ref_count [loaded_local]--;
// We can't propagate a local that has its address taken
g_assert (!(td->locals [locals [loaded_local].local].flags & INTERP_LOCAL_FLAG_INDIRECT));
g_assert (!td->locals [locals [loaded_local].local].indirects);
ins->data [0] = locals [loaded_local].local;
local_ref_count [ins->data [0]]++;
if (td->verbose_level) {
Expand All @@ -7100,7 +7103,7 @@ interp_cprop (TransformData *td)
}
} else if (locals [loaded_local].type == STACK_VALUE_I4 || locals [loaded_local].type == STACK_VALUE_I8) {
gboolean is_i4 = locals [loaded_local].type == STACK_VALUE_I4;
g_assert (!(td->locals [loaded_local].flags & INTERP_LOCAL_FLAG_INDIRECT));
g_assert (!td->locals [loaded_local].indirects);
if (is_i4)
ins = interp_get_ldc_i4_from_const (td, ins, locals [loaded_local].i);
else
Expand All @@ -7120,7 +7123,7 @@ interp_cprop (TransformData *td)
// Save the ldloc on the stack if it wasn't optimized away
// For simplicity we don't track locals that have their address taken
// since it is hard to detect instructions that change the local value.
if (td->locals [loaded_local].flags & INTERP_LOCAL_FLAG_INDIRECT) {
if (td->locals [loaded_local].indirects) {
sp->val.type = STACK_VALUE_NONE;
} else {
sp->val.type = STACK_VALUE_LOCAL;
Expand All @@ -7138,7 +7141,7 @@ interp_cprop (TransformData *td)
// The locals have the same type. We can propagate the value
int vtsize = (ins->opcode == MINT_STLOC_VT) ? ins->data [1] : 0;

if (!(td->locals [dest_local].flags & INTERP_LOCAL_FLAG_INDIRECT)) {
if (!td->locals [dest_local].indirects) {
// Track what exactly is stored into local
locals [dest_local].type = STACK_VALUE_LOCAL;
locals [dest_local].local = src_local;
Expand Down Expand Up @@ -7168,7 +7171,7 @@ interp_cprop (TransformData *td)
locals [dest_local].type = STACK_VALUE_NONE;
} else {
g_assert (sp->val.type == STACK_VALUE_I4 || sp->val.type == STACK_VALUE_I8);
if (!(td->locals [dest_local].flags & INTERP_LOCAL_FLAG_INDIRECT))
if (!td->locals [dest_local].indirects)
locals [dest_local] = sp->val;
}
clear_stack_content_info_for_local (stack, sp, dest_local);
Expand Down Expand Up @@ -7206,7 +7209,7 @@ interp_cprop (TransformData *td)
int src_local = ins->data [0];
int dest_local = ins->data [1];
local_ref_count [src_local]++;
if (!(td->locals [dest_local].flags & INTERP_LOCAL_FLAG_INDIRECT)) {
if (!td->locals [dest_local].indirects) {
if (locals [src_local].type != STACK_VALUE_NONE) {
locals [dest_local] = locals [src_local];
} else {
Expand All @@ -7221,7 +7224,7 @@ interp_cprop (TransformData *td)
// Prevent optimizing away the instruction that pushed the value on the stack
sp [-1].ins = NULL;
// The local contains the value of the top of stack
if (!(td->locals [dest_local].flags & INTERP_LOCAL_FLAG_INDIRECT)) {
if (!td->locals [dest_local].indirects) {
locals [dest_local] = sp [-1].val;
clear_stack_content_info_for_local (stack, sp, dest_local);
clear_local_content_info_for_local (locals, locals + td->locals_size, dest_local);
Expand Down Expand Up @@ -7299,10 +7302,46 @@ interp_cprop (TransformData *td)
} else if (MINT_IS_BINOP (ins->opcode)) {
ins = interp_fold_binop (td, sp, ins);
sp--;
} else if (ins->opcode >= MINT_STFLD_I1 && ins->opcode <= MINT_STFLD_O && (mono_interp_opt & INTERP_OPT_SUPER_INSTRUCTIONS)) {
} else if (ins->opcode == MINT_LDLOCA_S && MINT_IS_LDFLD (ins->next->opcode) &&
td->locals [ins->data [0]].mt == (ins->next->opcode - MINT_LDFLD_I1) &&
ins->next->data [0] == 0) {
int mt = ins->next->opcode - MINT_LDFLD_I1;
int local = ins->data [0];
// Replace LDLOCA + LDFLD with LDLOC, when the storing field represents
// the entire local. This is the case with storing to the only field of
// an IntPtr. We don't handle value type loads.
ins->next->opcode = MINT_LDLOC_I1 + mt;
ins->next->data [0] = local;
td->locals [local].indirects--;
interp_clear_ins (td, ins);
mono_interp_stats.killed_instructions++;
mono_interp_stats.ldlocas_removed++;
if (td->verbose_level) {
g_print ("Replace ldloca/ldfld pair :\n\t");
dump_interp_inst_newline (ins->next);
}
} else if (ins->opcode >= MINT_STFLD_I1 && ins->opcode <= MINT_STFLD_O) {
StackContentInfo *src = &sp [-2];
if (src->ins) {
if (src->val.type == STACK_VALUE_LOCAL) {
if (src->ins->opcode == MINT_LDLOCA_S && td->locals [src->ins->data [0]].mt == (ins->opcode - MINT_STFLD_I1) &&
ins->data [0] == 0) {
int mt = ins->opcode - MINT_STFLD_I1;
int local = src->ins->data [0];
interp_clear_ins (td, src->ins);
ins->opcode = MINT_STLOC_I1 + mt;
ins->data [0] = local;
td->locals [local].indirects--;
mono_interp_stats.killed_instructions++;
mono_interp_stats.ldlocas_removed++;
// FIXME Update stack contents for stloc, we currently rely on cprop running again.
clear_stack_content_info_for_local (stack, sp, local);
clear_local_content_info_for_local (locals, locals + td->locals_size, local);

if (td->verbose_level) {
g_print ("Replace ldloca/stfld pair (off %p) :\n\t", src->ins->il_offset);
dump_interp_inst_newline (ins);
}
} else if (src->val.type == STACK_VALUE_LOCAL && (mono_interp_opt & INTERP_OPT_SUPER_INSTRUCTIONS)) {
int loc_index = src->val.local;
int fld_offset = ins->data [0];
int mt = ins->opcode - MINT_STFLD_I1;
Expand All @@ -7314,7 +7353,7 @@ interp_cprop (TransformData *td)
interp_clear_ins (td, src->ins);
mono_interp_stats.super_instructions++;
mono_interp_stats.killed_instructions++;
} else if (src->val.type == STACK_VALUE_ARG) {
} else if (src->val.type == STACK_VALUE_ARG && (mono_interp_opt & INTERP_OPT_SUPER_INSTRUCTIONS)) {
int arg_index = src->val.arg;
int fld_offset = ins->data [0];
int mt = ins->opcode - MINT_STFLD_I1;
Expand Down
4 changes: 2 additions & 2 deletions src/mono/mono/mini/interp/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
#define INTERP_INST_FLAG_SEQ_POINT_NESTED_CALL 8
#define INTERP_INST_FLAG_RECORD_CALL_PATCH 16

#define INTERP_LOCAL_FLAG_INDIRECT 1
#define INTERP_LOCAL_FLAG_DEAD 2
#define INTERP_LOCAL_FLAG_DEAD 1

typedef struct InterpInst InterpInst;

Expand Down Expand Up @@ -91,6 +90,7 @@ typedef struct {
MonoType *type;
int mt;
int flags;
int indirects;
int offset;
} InterpLocal;

Expand Down

0 comments on commit 2c4fd80

Please sign in to comment.