#include "moar.h" /* Checks if we have any existing specialization of this. */ static MVMint32 have_existing_specialization(MVMThreadContext *tc, MVMStaticFrame *sf, MVMCallsite *cs, MVMSpeshStatsType *type_tuple) { MVMStaticFrameSpesh *sfs = sf->body.spesh; MVMuint32 i; for (i = 0; i < sfs->body.num_spesh_candidates; i++) { if (sfs->body.spesh_candidates[i]->body.cs == cs) { /* Callsite matches. Is it a matching certain specialization? */ MVMSpeshStatsType *cand_type_tuple = sfs->body.spesh_candidates[i]->body.type_tuple; if (type_tuple == NULL && cand_type_tuple == NULL) { /* Yes, so we're done. */ return 1; } else if (type_tuple != NULL && cand_type_tuple != NULL) { /* Typed specialization, so compare the tuples. */ size_t tt_size = cs->flag_count * sizeof(MVMSpeshStatsType); if (memcmp(type_tuple, cand_type_tuple, tt_size) == 0) return 1; } } } return 0; } /* Adds a planned specialization, provided it doesn't already exist (this may * happen due to further data suggesting it being logged while it was being * produced). */ static void add_planned(MVMThreadContext *tc, MVMSpeshPlan *plan, MVMSpeshPlannedKind kind, MVMStaticFrame *sf, MVMSpeshStatsByCallsite *cs_stats, MVMSpeshStatsType *type_tuple, MVMSpeshStatsByType **type_stats, MVMuint32 num_type_stats) { MVMSpeshPlanned *p; if (sf->body.bytecode_size > MVM_SPESH_MAX_BYTECODE_SIZE || have_existing_specialization(tc, sf, cs_stats->cs, type_tuple)) { /* Clean up allocated memory. * NB - the only caller is plan_for_cs, which means that we could do the * allocations in here, except that we need the type tuple for the * lookup already. So this is messy but it works. */ MVM_free(type_stats); MVM_free(type_tuple); return; } if (plan->num_planned == plan->alloc_planned) { plan->alloc_planned += 16; plan->planned = MVM_realloc(plan->planned, plan->alloc_planned * sizeof(MVMSpeshPlanned)); } p = &(plan->planned[plan->num_planned++]); p->kind = kind; p->sf = sf; p->cs_stats = cs_stats; p->type_tuple = type_tuple; p->type_stats = type_stats; p->num_type_stats = num_type_stats; if (num_type_stats) { MVMuint32 i; p->max_depth = type_stats[0]->max_depth; for (i = 1; i < num_type_stats; i++) if (type_stats[i]->max_depth > p->max_depth) p->max_depth = type_stats[i]->max_depth; } else { p->max_depth = cs_stats->max_depth; } } /* Makes a copy of an argument type tuple. */ MVMSpeshStatsType * MVM_spesh_plan_copy_type_tuple(MVMThreadContext *tc, MVMCallsite *cs, MVMSpeshStatsType *to_copy) { size_t stats_size = cs->flag_count * sizeof(MVMSpeshStatsType); MVMSpeshStatsType *result = MVM_malloc(stats_size); memcpy(result, to_copy, stats_size); return result; } /* Used to track counts of types in a given parameter position, for the purpose * of deciding what to specialize on. */ typedef struct { MVMSpeshStatsType type; MVMuint32 count; } ParamTypeCount; // TODO: rename, move #define PERCENT_RELEVANT 40 /* Considers the statistics of a given callsite + static frame pairing and * plans specializations to produce for it. */ static void plan_for_cs(MVMThreadContext *tc, MVMSpeshPlan *plan, MVMStaticFrame *sf, MVMSpeshStatsByCallsite *by_cs, MVMuint64 *in_certain_specialization, MVMuint64 *in_observed_specialization, MVMuint64 *in_osr_specialization) { /* First, make sure it even is possible to specialize something by type * in this code. */ MVMuint32 specializations = 0; if (sf->body.specializable && by_cs->cs) { /* It is. We'll try and produce some specializations, looping until * no tuples that remain give us anything significant. */ MVMuint32 required_hits = (PERCENT_RELEVANT * (by_cs->hits + by_cs->osr_hits)) / 100; MVMuint8 *tuples_used = MVM_calloc(by_cs->num_by_type, 1); MVMuint32 num_obj_args = 0, i; for (i = 0; i < by_cs->cs->flag_count; i++) if (by_cs->cs->arg_flags[i] & MVM_CALLSITE_ARG_OBJ) num_obj_args++; while (specializations < by_cs->num_by_type) { /* Here, we'll look through the incoming argument tuples to try * to produce a tuple to specialize on. In some cases, we'll find a * high degree of stability in all arguments, e.g. the Int candidate * of infix:<+> will probably just always have (Int,Int). In others, * we'll find only some arguments are stable, e.g. `push` is likely in * any non-trivial program to have little variance in the first * argument, but a lot in the second. */ MVMSpeshStatsType *chosen_tuple = MVM_calloc(by_cs->cs->flag_count, sizeof(MVMSpeshStatsType)); MVMuint8 *chosen_position = MVM_calloc(by_cs->cs->flag_count, 1); MVMuint32 param_idx, j, k, have_chosen; MVM_VECTOR_DECL(ParamTypeCount, type_counts); MVM_VECTOR_INIT(type_counts, by_cs->num_by_type); for (param_idx = 0; param_idx < by_cs->cs->flag_count; param_idx++) { /* Skip over non-object types. */ if (!(by_cs->cs->arg_flags[param_idx] & MVM_CALLSITE_ARG_OBJ)) continue; /* Sum up the counts of this parameter's types. */ MVM_VECTOR_CLEAR(type_counts); for (j = 0; j < by_cs->num_by_type; j++) { /* Make sure that the prefix matches what we've already decided * to focus on, and that the tuple wasn't already covered. */ MVMSpeshStatsByType *by_type = &(by_cs->by_type[j]); MVMint32 found, valid; if (tuples_used[j]) continue; valid = 1; for (k = 0; k < param_idx; k++) { if (chosen_position[k] && memcmp(&(by_type->arg_types[k]), &(chosen_tuple[k]), sizeof(MVMSpeshStatsType)) != 0) { valid = 0; break; } } if (!valid) continue; /* Otherwise, assemble its counts. */ found = 0; for (k = 0; k < MVM_VECTOR_ELEMS(type_counts); k++) { if (memcmp(&(type_counts[k].type), &(by_type->arg_types[param_idx]), sizeof(MVMSpeshStatsType)) == 0) { type_counts[k].count += by_type->hits + by_type->osr_hits; found = 1; break; } } if (!found) { ParamTypeCount ptc; ptc.type = by_type->arg_types[param_idx]; ptc.count = by_type->hits + by_type->osr_hits; MVM_VECTOR_PUSH(type_counts, ptc); } } /* Go through those hits and find an entry that meets the * threshold if we are to specialize on it. */ for (j = 0; j < MVM_VECTOR_ELEMS(type_counts); j++) { if (type_counts[j].count >= required_hits) { /* Yes, found; copy into the type tuple and mark chosen. */ chosen_tuple[param_idx] = type_counts[j].type; chosen_position[param_idx] = 1; break; } } } MVM_VECTOR_DESTROY(type_counts); /* By this point, we may well have a tuple to specialize on. Check * if that is the case. */ have_chosen = 0; for (j = 0; j < by_cs->cs->flag_count; j++) { if (chosen_position[j]) { have_chosen = 1; break; } } if (have_chosen || num_obj_args == 0) { /* Yes, we have a decision. Gather all tuples that provide * evidence for the choice. */ MVM_VECTOR_DECL(MVMSpeshStatsByType *, evidence); MVM_VECTOR_INIT(evidence, 4); for (j = 0; j < by_cs->num_by_type; j++) { MVMint32 matching = 1; for (k = 0; k < by_cs->cs->flag_count; k++) { if (chosen_position[k] && memcmp(&(by_cs->by_type[j].arg_types[k]), &(chosen_tuple[k]), sizeof(MVMSpeshStatsType)) != 0) { matching = 0; break; } } if (matching) { MVM_VECTOR_PUSH(evidence, &(by_cs->by_type[j])); tuples_used[j] = 1; } } /* Add the specialization. */ add_planned(tc, plan, MVM_VECTOR_ELEMS(evidence) == 1 ? MVM_SPESH_PLANNED_OBSERVED_TYPES : MVM_SPESH_PLANNED_DERIVED_TYPES, sf, by_cs, chosen_tuple, evidence, MVM_VECTOR_ELEMS(evidence)); specializations++; /* Clean up and we're done. */ MVM_free(chosen_position); } else { /* No fitting tuple; clean up and leave the loop. */ MVM_free(chosen_position); MVM_free(chosen_tuple); break; } } /* Clean up allocated memory. */ MVM_free(tuples_used); } /* If we get here, and found no specializations to produce, we can add * a certain specializaiton instead. */ if (!specializations) add_planned(tc, plan, MVM_SPESH_PLANNED_CERTAIN, sf, by_cs, NULL, NULL, 0); } /* Considers the statistics of a given static frame and plans specializtions * to produce for it. */ static void plan_for_sf(MVMThreadContext *tc, MVMSpeshPlan *plan, MVMStaticFrame *sf, MVMuint64 *in_certain_specialization, MVMuint64 *in_observed_specialization, MVMuint64 *in_osr_specialization) { MVMSpeshStats *ss = sf->body.spesh->body.spesh_stats; MVMuint32 threshold = MVM_spesh_threshold(tc, sf); if (ss->hits >= threshold || ss->osr_hits >= MVM_SPESH_PLAN_SF_MIN_OSR) { /* The frame is hot enough; look through its callsites to see if any * of those are. */ MVMuint32 i; for (i = 0; i < ss->num_by_callsite; i++) { MVMSpeshStatsByCallsite *by_cs = &(ss->by_callsite[i]); if (by_cs->hits >= threshold || by_cs->osr_hits >= MVM_SPESH_PLAN_CS_MIN_OSR) plan_for_cs(tc, plan, sf, by_cs, in_certain_specialization, in_observed_specialization, in_osr_specialization); } } } /* Maximum stack depth is a decent heuristic for the order to specialize in, * but sometimes it's misleading, and we end up with a planned specialization * of a callee having a lower maximum than the caller. Boost the depth of any * callees in such a situation. */ static void twiddle_stack_depths(MVMThreadContext *tc, MVMSpeshPlanned *planned, MVMuint32 num_planned) { MVMuint32 i; if (num_planned < 2) return; for (i = 0; i < num_planned; i++) { /* For each planned specialization, look for its calls. */ MVMSpeshPlanned *p = &(planned[i]); MVMuint32 j; for (j = 0; j < p->num_type_stats; j++) { MVMSpeshStatsByType *sbt = p->type_stats[j]; MVMuint32 k; for (k = 0; k < sbt->num_by_offset; k++) { MVMSpeshStatsByOffset *sbo = &(sbt->by_offset[k]); MVMuint32 l; for (l = 0; l < sbo->num_invokes; l++) { /* Found an invoke. If we plan a specialization for it, * then bump its count. */ MVMStaticFrame *invoked_sf = sbo->invokes[l].sf; MVMuint32 m; for (m = 0; m < num_planned; m++) if (planned[m].sf == invoked_sf) planned[m].max_depth = p->max_depth + 1; } } } } } /* Sorts the plan in descending order of maximum call depth. */ static void sort_plan(MVMThreadContext *tc, MVMSpeshPlanned *planned, MVMuint32 n) { if (n >= 2) { MVMSpeshPlanned pivot = planned[n / 2]; MVMuint32 i, j; for (i = 0, j = n - 1; ; i++, j--) { MVMSpeshPlanned temp; while (planned[i].max_depth > pivot.max_depth) i++; while (planned[j].max_depth < pivot.max_depth) j--; if (i >= j) break; temp = planned[i]; planned[i] = planned[j]; planned[j] = temp; } sort_plan(tc, planned, i); sort_plan(tc, planned + i, n - i); } } /* Forms a specialization plan from considering all frames whose statics have * changed. */ MVMSpeshPlan * MVM_spesh_plan(MVMThreadContext *tc, MVMObject *updated_static_frames, MVMuint64 *in_certain_specialization, MVMuint64 *in_observed_specialization, MVMuint64 *in_osr_specialization) { MVMSpeshPlan *plan = MVM_calloc(1, sizeof(MVMSpeshPlan)); MVMint64 updated = MVM_repr_elems(tc, updated_static_frames); MVMint64 i; #if MVM_GC_DEBUG tc->in_spesh = 1; #endif for (i = 0; i < updated; i++) { MVMObject *sf = MVM_repr_at_pos_o(tc, updated_static_frames, i); plan_for_sf(tc, plan, (MVMStaticFrame *)sf, in_certain_specialization, in_observed_specialization, in_osr_specialization); } twiddle_stack_depths(tc, plan->planned, plan->num_planned); sort_plan(tc, plan->planned, plan->num_planned); #if MVM_GC_DEBUG tc->in_spesh = 0; #endif return plan; } /* Marks garbage-collectable objects held in the spesh plan. */ void MVM_spesh_plan_gc_mark(MVMThreadContext *tc, MVMSpeshPlan *plan, MVMGCWorklist *worklist) { MVMuint32 i; if (!plan) return; for (i = 0; i < plan->num_planned; i++) { MVMSpeshPlanned *p = &(plan->planned[i]); MVM_gc_worklist_add(tc, worklist, &(p->sf)); if (p->type_tuple) { MVMCallsite *cs = p->cs_stats->cs; MVMuint32 j; for (j = 0; j < cs->flag_count; j++) { if (cs->arg_flags[j] & MVM_CALLSITE_ARG_OBJ) { MVM_gc_worklist_add(tc, worklist, &(p->type_tuple[j].type)); MVM_gc_worklist_add(tc, worklist, &(p->type_tuple[j].decont_type)); } } } } } void MVM_spesh_plan_gc_describe(MVMThreadContext *tc, MVMHeapSnapshotState *ss, MVMSpeshPlan *plan) { MVMuint32 i; MVMuint64 cache_1 = 0; MVMuint64 cache_2 = 0; MVMuint64 cache_3 = 0; if (!plan) return; for (i = 0; i < plan->num_planned; i++) { MVMSpeshPlanned *p = &(plan->planned[i]); MVM_profile_heap_add_collectable_rel_const_cstr_cached(tc, ss, (MVMCollectable*)(p->sf), "staticframe", &cache_1); if (p->type_tuple) { MVMCallsite *cs = p->cs_stats->cs; MVMuint32 j; for (j = 0; j < cs->flag_count; j++) { if (cs->arg_flags[j] & MVM_CALLSITE_ARG_OBJ) { MVM_profile_heap_add_collectable_rel_const_cstr_cached(tc, ss, (MVMCollectable*)(p->type_tuple[j].type), "argument type", &cache_2); MVM_profile_heap_add_collectable_rel_const_cstr_cached(tc, ss, (MVMCollectable*)(p->type_tuple[j].decont_type), "argument decont type", &cache_3); } } } } } /* Frees all memory associated with a specialization plan. */ void MVM_spesh_plan_destroy(MVMThreadContext *tc, MVMSpeshPlan *plan) { MVMuint32 i; for (i = 0; i < plan->num_planned; i++) { MVM_free(plan->planned[i].type_stats); MVM_free(plan->planned[i].type_tuple); } MVM_free(plan->planned); MVM_free(plan); }