#include "moar.h" /* This is where the spesh stuff all begins. The logic in here takes bytecode * and builds a spesh graph from it. This is a CFG in SSA form. Transforming * to SSA involves computing dominance frontiers, done by the algorithm found * in http://www.cs.rice.edu/~keith/EMBED/dom.pdf. The SSA algorithm itself is * from http://www.cs.utexas.edu/~pingali/CS380C/2010/papers/ssaCytron.pdf. */ #define GET_I8(pc, idx) *((MVMint8 *)((pc) + (idx))) #define GET_UI8(pc, idx) *((MVMuint8 *)((pc) + (idx))) #define GET_I16(pc, idx) *((MVMint16 *)((pc) + (idx))) #define GET_UI16(pc, idx) *((MVMuint16 *)((pc) + (idx))) MVM_STATIC_INLINE MVMint32 GET_I32(const MVMuint8 *pc, MVMint32 idx) { MVMint32 retval; memcpy(&retval, pc + idx, sizeof(retval)); return retval; } MVM_STATIC_INLINE MVMuint32 GET_UI32(const MVMuint8 *pc, MVMint32 idx) { MVMuint32 retval; memcpy(&retval, pc + idx, sizeof(retval)); return retval; } MVM_STATIC_INLINE MVMuint64 GET_UI64(const MVMuint8 *pc, MVMint32 idx) { MVMuint64 retval; memcpy(&retval, pc + idx, sizeof(retval)); return retval; } MVM_STATIC_INLINE MVMuint32 GET_N32(const MVMuint8 *pc, MVMint32 idx) { MVMnum32 retval; memcpy(&retval, pc + idx, sizeof(retval)); return retval; } /* Allocate a piece of memory from the spesh graph's region * allocator. Deallocated when the spesh graph is. */ void * MVM_spesh_alloc(MVMThreadContext *tc, MVMSpeshGraph *g, size_t bytes) { return MVM_region_alloc(tc, &g->region_alloc, bytes); } /* Grows the spesh graph's deopt table if it is already full, so that we have * space for 1 more entry. */ void MVM_spesh_graph_grow_deopt_table(MVMThreadContext *tc, MVMSpeshGraph *g) { if (g->num_deopt_addrs == g->alloc_deopt_addrs) { g->alloc_deopt_addrs += 8; if (g->deopt_addrs) g->deopt_addrs = MVM_realloc(g->deopt_addrs, g->alloc_deopt_addrs * sizeof(MVMint32) * 2); else g->deopt_addrs = MVM_malloc(g->alloc_deopt_addrs * sizeof(MVMint32) * 2); } } /* Records a de-optimization annotation and mapping pair. */ MVMint32 MVM_spesh_graph_add_deopt_annotation(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshIns *ins_node, MVMuint32 deopt_target, MVMint32 type) { /* Add an annotations. */ MVMSpeshAnn *ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); ann->type = type; ann->data.deopt_idx = g->num_deopt_addrs; ann->next = ins_node->annotations; ins_node->annotations = ann; /* Record PC in the deopt entries table. */ MVM_spesh_graph_grow_deopt_table(tc, g); g->deopt_addrs[2 * g->num_deopt_addrs] = deopt_target; g->num_deopt_addrs++; return ann->data.deopt_idx; } MVM_FORMAT(printf, 4, 5) MVM_PUBLIC void MVM_spesh_graph_add_comment(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshIns *ins, const char *fmt, ...) { size_t size; char *comment; va_list ap; MVMSpeshAnn *ann; if (!MVM_spesh_debug_enabled(tc)) return; va_start(ap, fmt); size = vsnprintf(NULL, 0, fmt, ap); comment = MVM_spesh_alloc(tc, g, ++size); va_end(ap); ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); ann->type = MVM_SPESH_ANN_COMMENT; ann->next = ins->annotations; ins->annotations = ann; ann->data.comment = comment; ann->order = g->next_annotation_idx++; va_start(ap, fmt); vsnprintf(comment, size, fmt, ap); va_end(ap); } /* Records the current bytecode position as a logged annotation. Used for * resolving logged values. */ static void add_logged_annotation(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshIns *ins_node, MVMuint8 *pc) { MVMSpeshAnn *ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); ann->type = MVM_SPESH_ANN_LOGGED; ann->data.bytecode_offset = pc - g->bytecode; ann->next = ins_node->annotations; ins_node->annotations = ann; } /* Records the current bytecode position as an inline cache annotation. Used for * resolving to the correct inline cache entry. */ static void add_cached_annotation(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshIns *ins_node, MVMuint8 *pc) { MVMSpeshAnn *ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); ann->type = MVM_SPESH_ANN_CACHED; ann->data.bytecode_offset = pc - g->bytecode; ann->next = ins_node->annotations; ins_node->annotations = ann; } /* Finds the linearly previous basic block (not cheap, but uncommon). */ MVMSpeshBB * MVM_spesh_graph_linear_prev(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB *search) { MVMSpeshBB *bb = g->entry; while (bb) { if (bb->linear_next == search) return bb; bb = bb->linear_next; } return NULL; } /* Checks if a handler is a catch handler or a control handler. */ static MVMint32 is_catch_handler(MVMThreadContext *tc, MVMSpeshGraph *g, MVMint32 handler_idx) { return g->handlers[handler_idx].category_mask & MVM_EX_CAT_CATCH; } /* Checks if a basic block already has a particular successor. */ static MVMint32 already_succs(MVMThreadContext *tc, MVMSpeshBB *bb, MVMSpeshBB *succ) { MVMint32 i = 0; for (i = 0; i < bb->num_succ; i++) if (bb->succ[i] == succ) return 1; return 0; } /* Checks if the op is one of the spesh dispatch ops. */ static MVMint32 spesh_dispatchy(MVMuint16 opcode) { return (opcode >= MVM_OP_sp_dispatch_v && opcode <= MVM_OP_sp_dispatch_o) || (opcode >= MVM_OP_sp_runbytecode_v && opcode <= MVM_OP_sp_runbytecode_o) || (opcode >= MVM_OP_sp_runcfunc_v && opcode <= MVM_OP_sp_runcfunc_o) || (opcode >= MVM_OP_sp_runnativecall_v && opcode <= MVM_OP_sp_runnativecall_o); } int MVM_spesh_graph_ins_ends_bb(MVMThreadContext *tc, const MVMOpInfo *info) { switch (info->opcode) { case MVM_OP_return_i: case MVM_OP_return_u: case MVM_OP_return_n: case MVM_OP_return_s: case MVM_OP_return_o: case MVM_OP_return: case MVM_OP_dispatch_v: case MVM_OP_dispatch_i: case MVM_OP_dispatch_u: case MVM_OP_dispatch_n: case MVM_OP_dispatch_s: case MVM_OP_dispatch_o: case MVM_OP_sp_dispatch_v: case MVM_OP_sp_dispatch_i: case MVM_OP_sp_dispatch_u: case MVM_OP_sp_dispatch_n: case MVM_OP_sp_dispatch_s: case MVM_OP_sp_dispatch_o: case MVM_OP_sp_runbytecode_v: case MVM_OP_sp_runbytecode_i: case MVM_OP_sp_runbytecode_n: case MVM_OP_sp_runbytecode_s: case MVM_OP_sp_runbytecode_o: case MVM_OP_sp_runcfunc_v: case MVM_OP_sp_runcfunc_i: case MVM_OP_sp_runcfunc_n: case MVM_OP_sp_runcfunc_s: case MVM_OP_sp_runcfunc_o: case MVM_OP_sp_runnativecall_v: case MVM_OP_sp_runnativecall_i: case MVM_OP_sp_runnativecall_n: case MVM_OP_sp_runnativecall_s: case MVM_OP_sp_runnativecall_o: return 1; default: if (info->jittivity & (MVM_JIT_INFO_THROWISH | MVM_JIT_INFO_INVOKISH)) { return 1; } break; } return 0; } /* Builds the control flow graph, populating the passed spesh graph structure * with it. This also makes nodes for all of the instruction. */ #define MVM_CFG_BB_START 1 #define MVM_CFG_BB_END 2 #define MVM_CFG_BB_JUMPLIST 4 static void build_cfg(MVMThreadContext *tc, MVMSpeshGraph *g, MVMStaticFrame *sf, MVMint32 *existing_deopts, MVMint32 num_existing_deopts, MVMint32 *existing_deopt_synths, MVMint32 num_existing_deopt_synths, MVMint32 *deopt_usage_info, MVMSpeshIns ***deopt_usage_ins_out) { MVMSpeshBB *cur_bb, *prev_bb; MVMSpeshIns *last_ins; MVMint64 i; MVMint32 bb_idx; /* Temporary array of all MVMSpeshIns we create (one per instruction). * Overestimate at size. Has the flat view, matching the bytecode. */ MVMSpeshIns **ins_flat = MVM_calloc(g->bytecode_size / 2, sizeof(MVMSpeshIns *)); /* Temporary array where each byte in the input bytecode gets a 32-bit * integer. This is used for two things: * A) When we make the MVMSpeshIns for an instruction starting at the * byte, we put the instruction index (into ins_flat) in the slot, * shifting it by 3 bits to the left. We will use this to do fixups. * B) The first bit is "I have an incoming branch" - that is, start of * a basic block. The second bit is "I can branch" - that is, end of * a basic block. It's possible to have both bits set. If it's part * of a jumplist, it gets the third bit set also. * Anything that's just a zero has no instruction starting there. */ MVMuint32 *byte_to_ins_flags = MVM_calloc(g->bytecode_size, sizeof(MVMuint32)); /* Instruction to basic block mapping. Initialized later. */ MVMSpeshBB **ins_to_bb = NULL; /* Which handlers are active; used for placing edges from blocks covered * by exception handlers. */ MVMuint8 *active_handlers = MVM_calloc(1, g->num_handlers); MVMint32 num_active_handlers = 0; /* Make first pass through the bytecode. In this pass, we make MVMSpeshIns * nodes for each instruction and set the start/end of block bits. Also * set handler targets as basic block starters. */ MVMCompUnit *cu = sf->body.cu; MVMuint8 *pc = g->bytecode; MVMuint8 *end = g->bytecode + g->bytecode_size; MVMuint32 ins_idx = 0; MVMuint8 next_bbs = 1; /* Next iteration (here, first) starts a BB. */ MVMuint32 num_osr_points = 0; MVMBytecodeAnnotation *ann_ptr = MVM_bytecode_resolve_annotation(tc, &sf->body, sf->body.bytecode - pc); for (i = 0; i < g->num_handlers; i++) { if (g->handlers[i].start_offset != (MVMuint32)-1 && g->handlers[i].goto_offset != (MVMuint32)-1) { byte_to_ins_flags[g->handlers[i].start_offset] |= MVM_CFG_BB_START; byte_to_ins_flags[g->handlers[i].end_offset] |= MVM_CFG_BB_START; byte_to_ins_flags[g->handlers[i].goto_offset] |= MVM_CFG_BB_START; } } while (pc < end) { /* Look up op info. */ MVMuint16 opcode = *(MVMuint16 *)pc; MVMuint8 *args = pc + 2; MVMuint16 arg_size = 0; const MVMOpInfo *info = MVM_bytecode_get_validated_op_info(tc, cu, opcode); /* Create an instruction node, add it, and record its position. */ MVMSpeshIns *ins_node = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshIns)); ins_flat[ins_idx] = ins_node; byte_to_ins_flags[pc - g->bytecode] |= ins_idx << 3; /* Did previous instruction end a basic block? */ if (next_bbs) { byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_START; next_bbs = 0; } /* Also check we're not already a BB start due to being a branch * target, in which case we should ensure our prior is marked as * a BB end. */ else { if (byte_to_ins_flags[pc - g->bytecode] & MVM_CFG_BB_START) { MVMuint32 hunt = pc - g->bytecode; while (!byte_to_ins_flags[--hunt]); byte_to_ins_flags[hunt] |= MVM_CFG_BB_END; } } /* Store opcode. */ ins_node->info = info; /* If this is a pre-instruction deopt point opcode, annotate. */ if (!existing_deopts && (info->deopt_point & MVM_DEOPT_MARK_ONE_PRE)) MVM_spesh_graph_add_deopt_annotation(tc, g, ins_node, pc - g->bytecode, MVM_SPESH_ANN_DEOPT_PRE_INS); /* Let's see if we have a line-number annotation. */ if (ann_ptr && pc - sf->body.bytecode == ann_ptr->bytecode_offset) { MVMSpeshAnn *lineno_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); lineno_ann->next = ins_node->annotations; lineno_ann->type = MVM_SPESH_ANN_LINENO; lineno_ann->data.lineno.filename_string_index = ann_ptr->filename_string_heap_index; lineno_ann->data.lineno.line_number = ann_ptr->line_number; ins_node->annotations = lineno_ann; MVM_bytecode_advance_annotation(tc, &sf->body, ann_ptr); } /* Dispatch ops have a variable number of arguments based on a callsite. */ if (MVM_op_get_mark(info->opcode)[1] == 'd' || (MVM_op_get_mark(info->opcode)[1] == 's' && spesh_dispatchy(info->opcode))) { /* Fake up an op info struct for the duration of this spesh graph's * life, so everything else can pretend it's got a fixed number of * arguments. */ MVMCallsite *callsite = MVM_spesh_disp_callsite_for_dispatch_op(opcode, args, cu); ins_node->info = MVM_spesh_alloc(tc, g, MVM_spesh_disp_dispatch_op_info_size( tc, info, callsite)); MVM_spesh_disp_initialize_dispatch_op_info(tc, info, callsite, (MVMOpInfo*)ins_node->info); info = ins_node->info; } /* The sp_resumption op is also var args. */ else if (info->opcode == MVM_OP_sp_resumption) { MVMSpeshResumeInit *resume_init = &(g->resume_inits[((MVMuint16 *)args)[1]]); MVMOpInfo *res_op_info = MVM_spesh_alloc(tc, g, MVM_spesh_disp_resumption_op_info_size( tc, resume_init->dp, resume_init->res_idx)); MVM_spesh_disp_initialize_resumption_op_info(tc, resume_init->dp, resume_init->res_idx, res_op_info); info = ins_node->info = res_op_info; } /* Go over operands. */ ins_node->operands = MVM_spesh_alloc(tc, g, info->num_operands * sizeof(MVMSpeshOperand)); for (i = 0; i < info->num_operands; i++) { MVMuint8 flags = info->operands[i]; MVMuint8 rw = flags & MVM_operand_rw_mask; switch (rw) { case MVM_operand_read_reg: case MVM_operand_write_reg: ins_node->operands[i].reg.orig = GET_UI16(args, arg_size); arg_size += 2; break; case MVM_operand_read_lex: case MVM_operand_write_lex: ins_node->operands[i].lex.idx = GET_UI16(args, arg_size); ins_node->operands[i].lex.outers = GET_UI16(args, arg_size + 2); arg_size += 4; break; case MVM_operand_literal: { MVMuint32 type = flags & MVM_operand_type_mask; switch (type) { case MVM_operand_int8: ins_node->operands[i].lit_i8 = GET_I8(args, arg_size); arg_size += 1; break; case MVM_operand_int16: ins_node->operands[i].lit_i16 = GET_I16(args, arg_size); arg_size += 2; break; case MVM_operand_uint16: ins_node->operands[i].lit_ui16 = GET_UI16(args, arg_size); arg_size += 2; break; case MVM_operand_int32: ins_node->operands[i].lit_i32 = GET_I32(args, arg_size); arg_size += 4; break; case MVM_operand_uint32: ins_node->operands[i].lit_ui32 = GET_UI32(args, arg_size); arg_size += 4; break; case MVM_operand_int64: ins_node->operands[i].lit_i64 = MVM_BC_get_I64(args, arg_size); arg_size += 8; break; case MVM_operand_uint64: ins_node->operands[i].lit_i64 = GET_UI64(args, arg_size); arg_size += 8; break; case MVM_operand_num32: ins_node->operands[i].lit_n32 = GET_N32(args, arg_size); arg_size += 4; break; case MVM_operand_num64: ins_node->operands[i].lit_n64 = MVM_BC_get_N64(args, arg_size); arg_size += 8; break; case MVM_operand_callsite: ins_node->operands[i].callsite_idx = GET_UI16(args, arg_size); arg_size += 2; break; case MVM_operand_coderef: ins_node->operands[i].coderef_idx = GET_UI16(args, arg_size); arg_size += 2; break; case MVM_operand_str: ins_node->operands[i].lit_str_idx = GET_UI32(args, arg_size); arg_size += 4; break; case MVM_operand_ins: { /* Stash instruction offset. */ MVMuint32 target = GET_UI32(args, arg_size); ins_node->operands[i].ins_offset = target; /* This is a branching instruction, so it's a BB end. */ byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_END; /* Its target is a BB start, and any previous instruction * we already passed needs marking as a BB end. */ byte_to_ins_flags[target] |= MVM_CFG_BB_START; if (target > 0 && target < pc - g->bytecode) { while (!byte_to_ins_flags[--target]); byte_to_ins_flags[target] |= MVM_CFG_BB_END; } /* Next instruction is also a BB start. */ next_bbs = 1; arg_size += 4; break; } case MVM_operand_spesh_slot: ins_node->operands[i].lit_i16 = GET_I16(args, arg_size); arg_size += 2; break; default: MVM_oops(tc, "Spesh: unknown operand type %d in graph building (op %s)", (int)type, ins_node->info->name); } break; } default: break; } } /* We specially handle the jumplist case, which needs to mark all of * the possible places we could jump to in the following instructions * as starts of basic blocks. It is, in itself, the end of one. Note * we jump to the instruction after the n jump points if none match, * so that is marked too. */ if (opcode == MVM_OP_jumplist) { MVMint64 n = MVM_BC_get_I64(args, 0); for (i = 0; i <= n; i++) byte_to_ins_flags[(pc - g->bytecode) + 12 + i * 6] |= MVM_CFG_BB_START | MVM_CFG_BB_JUMPLIST; byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_END; } /* Dispatch and return end a basic block. Anything that is marked as * invokish and throwish are also basic block ends. OSR points are * basic block starts. */ if (opcode == MVM_OP_osrpoint) { byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_START; if (pc - g->bytecode > 0) { MVMuint32 prev = pc - g->bytecode; while (!byte_to_ins_flags[--prev]); byte_to_ins_flags[prev] |= MVM_CFG_BB_END; } num_osr_points++; } else if (MVM_spesh_graph_ins_ends_bb(tc, info)) { byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_END; next_bbs = 1; } /* Final instruction is basic block end. */ if (pc + 2 + arg_size == end) byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_END; /* If the instruction is logged or uses the inline cache, store its * program counter so we can look up the cache state or log info. */ if (info->logged) add_logged_annotation(tc, g, ins_node, pc); if (info->uses_cache) add_cached_annotation(tc, g, ins_node, pc); /* Caculate next instruction's PC. */ pc += 2 + arg_size; /* If this is a post-instruction deopt point opcode... */ if (!existing_deopts && (info->deopt_point & MVM_DEOPT_MARK_ONE)) MVM_spesh_graph_add_deopt_annotation(tc, g, ins_node, pc - g->bytecode, MVM_SPESH_ANN_DEOPT_ONE_INS); if (!existing_deopts && (info->deopt_point & MVM_DEOPT_MARK_ALL)) MVM_spesh_graph_add_deopt_annotation(tc, g, ins_node, pc - g->bytecode, MVM_SPESH_ANN_DEOPT_ALL_INS); if (!existing_deopts && (info->deopt_point & MVM_DEOPT_MARK_OSR)) MVM_spesh_graph_add_deopt_annotation(tc, g, ins_node, pc - g->bytecode, MVM_SPESH_ANN_DEOPT_OSR); /* If it is post-logged, annotate it with the updated offset (the -2 * is a bit of a hack; it avoids conflicts with the following * instructions pre-logged offsets). */ if (info->post_logged) add_logged_annotation(tc, g, ins_node, pc - 2); /* Go to next instruction. */ ins_idx++; } /* Annotate instructions that are handler-significant. */ for (i = 0; i < g->num_handlers; i++) { /* Start or got may be -1 if the code the handler covered became * dead. If so, mark the handler as removed. Ditto if end is * before start (would never match). */ if (g->handlers[i].start_offset == (MVMuint32)-1 || g->handlers[i].goto_offset == (MVMuint32)-1 || g->handlers[i].start_offset > g->handlers[i].end_offset) { if (!g->unreachable_handlers) g->unreachable_handlers = MVM_spesh_alloc(tc, g, g->num_handlers); g->unreachable_handlers[i] = 1; } else { MVMSpeshIns *start_ins = ins_flat[byte_to_ins_flags[g->handlers[i].start_offset] >> 3]; MVMSpeshIns *end_ins = ins_flat[byte_to_ins_flags[g->handlers[i].end_offset] >> 3]; MVMSpeshAnn *start_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); MVMSpeshAnn *end_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); MVMSpeshIns *goto_ins = ins_flat[byte_to_ins_flags[g->handlers[i].goto_offset] >> 3]; MVMSpeshAnn *goto_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); start_ann->next = start_ins->annotations; start_ann->type = MVM_SPESH_ANN_FH_START; start_ann->data.frame_handler_index = i; start_ins->annotations = start_ann; end_ann->next = end_ins->annotations; end_ann->type = MVM_SPESH_ANN_FH_END; end_ann->data.frame_handler_index = i; end_ins->annotations = end_ann; goto_ann->next = goto_ins->annotations; goto_ann->type = MVM_SPESH_ANN_FH_GOTO; goto_ann->data.frame_handler_index = i; goto_ins->annotations = goto_ann; } } /* Annotate instructions that are inline start/end points. */ for (i = 0; i < g->num_inlines; i++) { if (!g->inlines[i].unreachable) { MVMSpeshIns *start_ins = ins_flat[byte_to_ins_flags[g->inlines[i].start] >> 3]; MVMSpeshIns *end_ins = ins_flat[byte_to_ins_flags[g->inlines[i].end] >> 3]; MVMSpeshAnn *start_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); MVMSpeshAnn *end_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); start_ann->next = start_ins->annotations; start_ann->type = MVM_SPESH_ANN_INLINE_START; start_ann->data.inline_idx = i; start_ins->annotations = start_ann; end_ann->next = end_ins->annotations; end_ann->type = MVM_SPESH_ANN_INLINE_END; end_ann->data.inline_idx = i; end_ins->annotations = end_ann; } } /* Now for the second pass, where we assemble the basic blocks. Also we * build a lookup table of instructions that start a basic block to that * basic block, for the final CFG construction. We make the entry block a * special one, containing a noop; it will have any catch exception * handler targets linked from it, so they show up in the graph. For any * control exceptions, we will insert */ g->entry = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshBB)); g->entry->first_ins = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshIns)); g->entry->first_ins->info = MVM_bytecode_get_validated_op_info(tc, cu, 0); g->entry->last_ins = g->entry->first_ins; g->entry->idx = 0; cur_bb = NULL; prev_bb = g->entry; last_ins = NULL; ins_to_bb = MVM_calloc(ins_idx, sizeof(MVMSpeshBB *)); ins_idx = 0; bb_idx = 1; for (i = 0; i < g->bytecode_size; i++) { MVMSpeshIns *cur_ins; /* Skip zeros; no instruction here. */ if (!byte_to_ins_flags[i]) continue; /* Get current instruction. */ cur_ins = ins_flat[byte_to_ins_flags[i] >> 3]; /* Start of a basic block? */ if (byte_to_ins_flags[i] & MVM_CFG_BB_START) { /* Should not already be in a basic block. */ if (cur_bb) { MVM_spesh_graph_destroy(tc, g); MVM_oops(tc, "Spesh: confused during basic block analysis (in block)"); } /* Create it, and set first instruction and index. */ cur_bb = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshBB)); cur_bb->first_ins = cur_ins; cur_bb->idx = bb_idx; cur_bb->initial_pc = i; cur_bb->jumplist = byte_to_ins_flags[i] & MVM_CFG_BB_JUMPLIST; bb_idx++; /* Record instruction -> BB start mapping. */ ins_to_bb[ins_idx] = cur_bb; /* Link it to the previous one. */ prev_bb->linear_next = cur_bb; } /* Should always be in a BB at this point. */ if (!cur_bb) { MVM_spesh_graph_destroy(tc, g); MVM_oops(tc, "Spesh: confused during basic block analysis (no block)"); } /* Add instruction into double-linked per-block instruction list. */ if (last_ins) { last_ins->next = cur_ins; cur_ins->prev = last_ins; } last_ins = cur_ins; /* End of a basic block? */ if (byte_to_ins_flags[i] & MVM_CFG_BB_END) { cur_bb->last_ins = cur_ins; prev_bb = cur_bb; cur_bb = NULL; last_ins = NULL; } ins_idx++; } g->num_bbs = bb_idx; /* Finally, link the basic blocks up to form a CFG. Along the way, any of * the instruction operands get the target BB stored. This is where we * link basic blocks covered by control exception handlers to the goto * block of the handler also. */ cur_bb = g->entry; while (cur_bb) { /* If it's the first block, it's a special case; successors are the * real successor, all catch exception handlers, and all OSR points. */ if (cur_bb == g->entry) { MVMint32 num_bbs = 1 + g->num_handlers + num_osr_points; MVMint32 insert_pos = 1; cur_bb->succ = MVM_spesh_alloc(tc, g, num_bbs * sizeof(MVMSpeshBB *)); cur_bb->handler_succ = MVM_spesh_alloc(tc, g, g->num_handlers * sizeof(MVMSpeshBB *)); cur_bb->succ[0] = cur_bb->linear_next; for (i = 0; i < g->num_handlers; i++) { if (is_catch_handler(tc, g, i)) { MVMuint32 offset = g->handlers[i].goto_offset; if (offset != (MVMuint32)-1) cur_bb->succ[insert_pos++] = ins_to_bb[byte_to_ins_flags[offset] >> 3]; } } if (num_osr_points > 0) { MVMSpeshBB *search_bb = cur_bb->linear_next; while (search_bb) { if (search_bb->first_ins->info->opcode == MVM_OP_osrpoint) cur_bb->succ[insert_pos++] = search_bb; search_bb = search_bb->linear_next; } } cur_bb->num_succ = insert_pos; } /* Otherwise, non-entry basic block. */ else { /* If this is the start of a frame handler that is not a catch, * mark it as an active handler. Unmark those where we see the * end of the handler. */ if (cur_bb->first_ins->annotations) { /* Process them in two passes in case we have two on the * same instruction and disordered. */ MVMuint32 has_end = 0; MVMSpeshAnn *ann = cur_bb->first_ins->annotations; while (ann) { switch (ann->type) { case MVM_SPESH_ANN_FH_START: active_handlers[ann->data.frame_handler_index] = 1; num_active_handlers++; break; case MVM_SPESH_ANN_FH_END: has_end = 1; break; } ann = ann->next; } if (has_end) { ann = cur_bb->first_ins->annotations; while (ann) { switch (ann->type) { case MVM_SPESH_ANN_FH_END: active_handlers[ann->data.frame_handler_index] = 0; num_active_handlers--; break; } ann = ann->next; } } } /* Consider the last instruction, to see how we leave the BB. */ switch (cur_bb->last_ins->info->opcode) { case MVM_OP_jumplist: { /* Jumplist, so successors are next N+1 basic blocks. */ MVMint64 jump_bbs = cur_bb->last_ins->operands[0].lit_i64 + 1; MVMint64 num_bbs = jump_bbs + num_active_handlers; MVMSpeshBB *bb_to_add = cur_bb->linear_next; cur_bb->succ = MVM_spesh_alloc(tc, g, num_bbs * sizeof(MVMSpeshBB *)); for (i = 0; i < jump_bbs; i++) { cur_bb->succ[i] = bb_to_add; bb_to_add = bb_to_add->linear_next; } cur_bb->num_succ = jump_bbs; } break; case MVM_OP_goto: { /* Unconditional branch, so one successor. */ MVMint64 num_bbs = 1 + num_active_handlers; MVMuint32 offset = cur_bb->last_ins->operands[0].ins_offset; MVMSpeshBB *tgt = ins_to_bb[byte_to_ins_flags[offset] >> 3]; cur_bb->succ = MVM_spesh_alloc(tc, g, num_bbs * sizeof(MVMSpeshBB *)); cur_bb->succ[0] = tgt; cur_bb->num_succ = 1; cur_bb->last_ins->operands[0].ins_bb = tgt; } break; default: { /* Probably conditional branch, so two successors: one from * the instruction, another from fall-through. Or may just be * a non-branch that exits for other reasons. */ MVMint64 num_bbs = 2 + num_active_handlers; cur_bb->succ = MVM_spesh_alloc(tc, g, num_bbs * sizeof(MVMSpeshBB *)); for (i = 0; i < cur_bb->last_ins->info->num_operands; i++) { if (cur_bb->last_ins->info->operands[i] == MVM_operand_ins) { MVMuint32 offset = cur_bb->last_ins->operands[i].ins_offset; cur_bb->succ[0] = ins_to_bb[byte_to_ins_flags[offset] >> 3]; cur_bb->num_succ++; cur_bb->last_ins->operands[i].ins_bb = cur_bb->succ[0]; } } if (cur_bb->num_succ > 1) { /* If we ever get instructions with multiple targets, this * area of the code needs an update. */ MVM_spesh_graph_destroy(tc, g); MVM_oops(tc, "Spesh: unhandled multi-target branch"); } if (cur_bb->linear_next) { cur_bb->succ[cur_bb->num_succ] = cur_bb->linear_next; cur_bb->num_succ++; } } break; } /* Attach this block to the goto block of any active handlers. */ if ( num_active_handlers && ( cur_bb->last_ins->info->jittivity & (MVM_JIT_INFO_THROWISH | MVM_JIT_INFO_INVOKISH) || MVM_op_get_mark(cur_bb->last_ins->info->opcode)[1] == 'd' || (MVM_op_get_mark(cur_bb->last_ins->info->opcode)[1] == 's' && spesh_dispatchy(cur_bb->last_ins->info->opcode)) ) ) { cur_bb->handler_succ = MVM_spesh_alloc(tc, g, num_active_handlers * sizeof(MVMSpeshBB *)); for (i = 0; i < g->num_handlers; i++) { if (active_handlers[i]) { MVMuint32 offset = g->handlers[i].goto_offset; MVMSpeshBB *target = ins_to_bb[byte_to_ins_flags[offset] >> 3]; if (!already_succs(tc, cur_bb, target)) { cur_bb->succ[cur_bb->num_succ] = target; cur_bb->num_succ++; cur_bb->handler_succ[cur_bb->num_handler_succ++] = target; } } } } else cur_bb->handler_succ = NULL; } /* Move on to next block. */ cur_bb = cur_bb->linear_next; } /* If we're building the graph for optimized bytecode, insert existing * deopt points. */ if (existing_deopts) { for (i = 0; i < num_existing_deopts; i ++) { if (existing_deopts[2 * i + 1] >= 0) { // check last bit which is abused as PRE_INS marker and set deopt_ins = post_ins if it is set MVMuint32 deopt = (MVMuint32)existing_deopts[2 * i + 1]; MVMuint32 bytecode_pos = MVM_spesh_deopt_bytecode_pos(deopt); MVMSpeshIns *post_ins = ins_flat[byte_to_ins_flags[bytecode_pos] >> 3]; MVMSpeshIns *deopt_ins; MVMSpeshAnn *deopt_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); if (deopt & 1) { /* MVM_SPESH_ANN_DEOPT_PRE_INS annotation */ deopt_ins = post_ins; deopt_ann->type = MVM_SPESH_ANN_DEOPT_PRE_INS; } else { deopt_ins = post_ins->prev ? post_ins->prev : MVM_spesh_graph_linear_prev(tc, g, ins_to_bb[byte_to_ins_flags[bytecode_pos] >> 3])->last_ins; deopt_ann->type = MVM_SPESH_ANN_DEOPT_INLINE; } deopt_ann->next = deopt_ins->annotations; deopt_ann->data.deopt_idx = i; deopt_ins->annotations = deopt_ann; } } } if (existing_deopt_synths) { for (i = 0; i < num_existing_deopt_synths; i ++) { if (existing_deopt_synths[2 * i + 1] >= 0) { MVMSpeshIns *post_ins = ins_flat[byte_to_ins_flags[existing_deopt_synths[2 * i + 1]] >> 3]; MVMSpeshIns *deopt_ins = post_ins->prev ? post_ins->prev : MVM_spesh_graph_linear_prev(tc, g, ins_to_bb[byte_to_ins_flags[existing_deopt_synths[2 * i + 1]] >> 3])->last_ins; MVMSpeshAnn *deopt_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); deopt_ann->next = deopt_ins->annotations; deopt_ann->type = MVM_SPESH_ANN_DEOPT_SYNTH; deopt_ann->data.deopt_idx = existing_deopt_synths[2 * i]; deopt_ins->annotations = deopt_ann; } } } /* We may also need to reconstruct deopt usage info, to allow optimizing of * inlinees. That is persisted using bytecode offsets, and this is the last * place we can map those into instructions, so do it here if needed. */ if (deopt_usage_ins_out && deopt_usage_info) { MVM_VECTOR_DECL(MVMSpeshIns *, usage_ins); MVMuint32 idx = 0; MVM_VECTOR_INIT(usage_ins, 32); while (1) { MVMint32 offset = deopt_usage_info[idx]; if (offset == -1) break; MVM_VECTOR_PUSH(usage_ins, ins_flat[byte_to_ins_flags[offset] >> 3]); idx++; idx += deopt_usage_info[idx] + 1; /* Skip over deopt indices */ } *deopt_usage_ins_out = usage_ins; } /* Clear up the temporary arrays. */ MVM_free(byte_to_ins_flags); MVM_free(ins_flat); MVM_free(ins_to_bb); MVM_free(ann_ptr); MVM_free(active_handlers); } /* Inserts nulling of object reigsters. A later stage of the optimizer will * throw out any that are unrequired, leaving only those that cover (rare) * "register read before assigned" cases. (We can thus just start off with * them NULL, since zeroed memory is cheaper than copying a VMNull in to * place). */ static MVMint32 is_handler_reg(MVMThreadContext *tc, MVMSpeshGraph *g, MVMuint16 reg) { MVMuint32 num_handlers = g->num_handlers; MVMuint32 i; for (i = 0; i < num_handlers; i++) { if (g->handlers[i].action == MVM_EX_ACTION_INVOKE) if (g->handlers[i].block_reg == reg) return 1; if (g->handlers[i].category_mask & MVM_EX_CAT_LABELED) if (g->handlers[i].label_reg == reg) return 1; } return 0; } static void insert_object_null_instructions(MVMThreadContext *tc, MVMSpeshGraph *g) { MVMSpeshBB *insert_bb = g->entry->linear_next; MVMuint16 *local_types = g->sf->body.local_types; MVMuint16 num_locals = g->sf->body.num_locals; MVMuint16 i; MVMSpeshIns *insert_after = NULL; if (insert_bb->first_ins && insert_bb->first_ins->info->opcode == MVM_OP_prof_enter) { insert_after = insert_bb->first_ins; } for (i = 0; i < num_locals; i++) { if (local_types[i] == MVM_reg_obj && !is_handler_reg(tc, g, i)) { MVMSpeshIns *null_ins = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshIns)); null_ins->info = MVM_op_get_op(MVM_OP_null); null_ins->operands = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshOperand)); null_ins->operands[0].reg.orig = i; MVM_spesh_manipulate_insert_ins(tc, insert_bb, insert_after, null_ins); insert_after = null_ins; } } } /* Annotates the control flow graph with predecessors. */ static void add_predecessors(MVMThreadContext *tc, MVMSpeshGraph *g) { MVMSpeshBB *cur_bb = g->entry; while (cur_bb) { MVMuint16 i; for (i = 0; i < cur_bb->num_succ; i++) { MVMSpeshBB *tgt = cur_bb->succ[i]; MVMSpeshBB **new_pred = MVM_spesh_alloc(tc, g, (tgt->num_pred + 1) * sizeof(MVMSpeshBB *)); if (tgt->num_pred) memcpy(new_pred, tgt->pred, tgt->num_pred * sizeof(MVMSpeshBB *)); new_pred[tgt->num_pred] = cur_bb; tgt->pred = new_pred; tgt->num_pred++; } cur_bb = cur_bb->linear_next; } } /* Produces an array of the basic blocks, sorted in reverse postorder from * the entry point. */ static void dfs(MVMSpeshBB **rpo, MVMint32 *insert_pos, MVMuint8 *seen, MVMSpeshBB *bb) { MVMint32 i; seen[bb->idx] = 1; for (i = 0; i < bb->num_succ; i++) { MVMSpeshBB *succ = bb->succ[i]; if (!seen[succ->idx]) dfs(rpo, insert_pos, seen, succ); } rpo[*insert_pos] = bb; bb->rpo_idx = *insert_pos; (*insert_pos)--; } MVMSpeshBB ** MVM_spesh_graph_reverse_postorder(MVMThreadContext *tc, MVMSpeshGraph *g) { MVMSpeshBB **rpo = MVM_calloc(g->num_bbs, sizeof(MVMSpeshBB *)); MVMuint8 *seen = MVM_calloc(g->num_bbs, 1); MVMint32 ins = g->num_bbs - 1; dfs(rpo, &ins, seen, g->entry); MVM_free(seen); if (ins != -1) { char *dump_msg = MVM_spesh_dump(tc, g); printf("%s", dump_msg); MVM_free(dump_msg); MVM_spesh_graph_destroy(tc, g); MVM_oops(tc, "Spesh: reverse postorder calculation failed"); } return rpo; } /* 2-finger intersection algorithm, to find new immediate dominator. */ static void iter_check(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB **rpo, MVMint32 *doms, MVMint32 iters) { if (iters > 100000) { #ifdef NDEBUG MVMuint32 k; char *dump_msg = MVM_spesh_dump(tc, g); printf("%s", dump_msg); MVM_free(dump_msg); printf("RPO: "); for (k = 0; k < g->num_bbs; k++) printf("%d, ", rpo[k]->idx); printf("\n"); printf("Doms: "); for (k = 0; k < g->num_bbs; k++) printf("%d (%d), ", doms[k], doms[k] >= 0 ? rpo[doms[k]]->idx : -1); printf("\n"); #endif MVM_spesh_graph_destroy(tc, g); MVM_oops(tc, "Spesh: dominator intersection went infinite"); } } static MVMint32 intersect(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB **rpo, MVMint32 *doms, MVMint32 finger1, MVMint32 finger2) { MVMint32 iters = 0; while (finger1 != finger2) { while (finger1 > finger2) { iter_check(tc, g, rpo, doms, iters++); finger1 = doms[finger1]; } while (finger2 > finger1) { iter_check(tc, g, rpo, doms, iters++); finger2 = doms[finger2]; } } return finger1; } /* Computes dominator information about the basic blocks. */ static MVMint32 * compute_dominators(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB **rpo) { MVMuint32 i, j, changed; /* Create result list, with all initialized to undefined (use -1, as it's * not a valid basic block index). Start node dominates itself. */ MVMint32 *doms = MVM_malloc(g->num_bbs * sizeof(MVMint32)); doms[0] = 0; for (i = 1; i < g->num_bbs; i++) doms[i] = -1; /* Iterate to fixed point. */ changed = 1; while (changed) { changed = 0; /* Visit all except the start node in reverse postorder. */ for (i = 1; i < g->num_bbs; i++) { MVMSpeshBB *b = rpo[i]; /* See if there's a better dominator. */ MVMint32 chosen_pred = -1; MVMint32 new_idom; for (j = 0; j < b->num_pred; j++) { new_idom = b->pred[j]->rpo_idx; if (doms[new_idom] != -1) { chosen_pred = j; break; } } if (chosen_pred == -1) { MVM_spesh_graph_destroy(tc, g); MVM_oops(tc, "Spesh: could not find processed initial dominator"); } for (j = 0; j < b->num_pred; j++) { if (j != (MVMuint32)chosen_pred) { MVMint32 p_idx = b->pred[j]->rpo_idx; if (doms[p_idx] != -1) new_idom = intersect(tc, g, rpo, doms, p_idx, new_idom); } } if (doms[i] != new_idom) { doms[i] = new_idom; changed = 1; } } } return doms; } /* Builds the dominance tree children lists for each node. */ static void add_child(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB *target, MVMSpeshBB *to_add) { MVMSpeshBB **new_children; MVMint32 i; /* Already in the child list? */ for (i = 0; i < target->num_children; i++) if (target->children[i] == to_add) return; /* Nope, so insert. */ new_children = MVM_spesh_alloc(tc, g, (target->num_children + 1) * sizeof(MVMSpeshBB *)); if (target->num_children) memcpy(new_children, target->children, target->num_children * sizeof(MVMSpeshBB *)); new_children[target->num_children] = to_add; target->children = new_children; target->num_children++; } static void add_children(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB **rpo, MVMint32 *doms) { MVMuint32 i; for (i = 0; i < g->num_bbs; i++) { MVMSpeshBB *bb = rpo[i]; MVMuint32 idom = doms[i]; if ((MVMuint32)idom != i) add_child(tc, g, rpo[idom], bb); } } /* Builds the dominance frontier set for each node. */ static void add_to_frontier_set(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB *target, MVMSpeshBB *to_add) { MVMSpeshBB **new_df; MVMint32 i; /* Already in the set? */ for (i = 0; i < target->num_df; i++) if (target->df[i] == to_add) return; /* Nope, so insert. */ new_df = MVM_spesh_alloc(tc, g, (target->num_df + 1) * sizeof(MVMSpeshBB *)); if (target->num_df) memcpy(new_df, target->df, target->num_df * sizeof(MVMSpeshBB *)); new_df[target->num_df] = to_add; target->df = new_df; target->num_df++; } static void add_dominance_frontiers(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB **rpo, MVMint32 *doms) { MVMint32 j; MVMSpeshBB *b = g->entry; while (b) { if (b->num_pred >= 2) { /* Thus it's a join point. */ for (j = 0; j < b->num_pred; j++) { MVMint32 runner = b->pred[j]->rpo_idx; MVMint32 finish_line = doms[b->rpo_idx]; while (runner != finish_line) { add_to_frontier_set(tc, g, rpo[runner], b); runner = doms[runner]; } } } b = b->linear_next; } } /* Per-local SSA info. */ typedef struct { /* Nodes that assign to the variable. */ MVMSpeshBB **ass_nodes; MVMuint16 num_ass_nodes; /* Count of processed assignments aka. C(V). */ MVMint32 count; /* Stack of integers aka. S(V). */ MVMint32 *stack; MVMint32 stack_top; MVMint32 stack_alloc; } SSAVarInfo; /* Creates an SSAVarInfo for each local, initializing it with a list of nodes * that assign to the local. */ static SSAVarInfo * initialize_ssa_var_info(MVMThreadContext *tc, MVMSpeshGraph *g) { SSAVarInfo *var_info = MVM_calloc(g->num_locals, sizeof(SSAVarInfo)); MVMint32 i; /* Visit all instructions, looking for local writes. */ MVMSpeshBB *bb = g->entry; while (bb) { MVMSpeshIns *ins = bb->first_ins; while (ins) { for (i = 0; i < ins->info->num_operands; i++) { if ((ins->info->operands[i] & MVM_operand_rw_mask) == MVM_operand_write_reg) { MVMuint16 written = ins->operands[i].reg.orig; MVMint32 found = 0; MVMint32 j; for (j = 0; j < var_info[written].num_ass_nodes; j++) if (var_info[written].ass_nodes[j] == bb) { found = 1; break; } if (!found) { if (var_info[written].num_ass_nodes % 8 == 0) { MVMint32 new_size = var_info[written].num_ass_nodes + 8; var_info[written].ass_nodes = MVM_realloc( var_info[written].ass_nodes, new_size * sizeof(MVMSpeshBB *)); } var_info[written].ass_nodes[var_info[written].num_ass_nodes] = bb; var_info[written].num_ass_nodes++; } } } ins = ins->next; } bb = bb->linear_next; } /* Set stack top to -1 sentinel for all nodes, and count = 1 (as we may * read the default value of a register). */ for (i = 0; i < g->num_locals; i++) { var_info[i].count = 1; var_info[i].stack_top = -1; } return var_info; } MVMOpInfo *MVM_spesh_graph_get_phi(MVMThreadContext *tc, MVMSpeshGraph *g, MVMuint32 nrargs) { MVMOpInfo *result = NULL; /* Check number of args to phi isn't huge. */ if (nrargs > 0xFFFF) MVM_panic(1, "Spesh: SSA calculation failed; cannot allocate enormous PHI node"); /* Up to 64 args, almost every number is represented, but after that * we have a sparse array through which we must search. */ if (nrargs - 1 < MVMPhiNodeCacheSparseBegin) { result = &g->phi_infos[nrargs - 1]; } else { MVMint32 cache_idx; for (cache_idx = MVMPhiNodeCacheSparseBegin; !result && cache_idx < MVMPhiNodeCacheSize; cache_idx++) { if (g->phi_infos[cache_idx].opcode == MVM_SSA_PHI) { if (g->phi_infos[cache_idx].num_operands == nrargs) { result = &g->phi_infos[cache_idx]; } } else { result = &g->phi_infos[cache_idx]; } } } if (result == NULL) { result = MVM_spesh_alloc(tc, g, sizeof(MVMOpInfo)); result->opcode = 0; } if (result->opcode != MVM_SSA_PHI) { result->opcode = MVM_SSA_PHI; result->name = "PHI"; result->num_operands = nrargs; } return result; } /* Inserts SSA phi functions at the required places in the graph. */ void MVM_spesh_graph_place_phi(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB *bb, MVMint32 n, MVMuint16 var) { MVMint32 i; MVMOpInfo *phi_op = MVM_spesh_graph_get_phi(tc, g, n + 1); MVMSpeshIns *ins = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshIns)); ins->info = phi_op; ins->operands = MVM_spesh_alloc(tc, g, phi_op->num_operands * sizeof(MVMSpeshOperand)); for (i = 0; i < phi_op->num_operands; i++) ins->operands[i].reg.orig = var; ins->next = bb->first_ins; bb->first_ins->prev = ins; bb->first_ins = ins; } static void insert_phi_functions(MVMThreadContext *tc, MVMSpeshGraph *g, SSAVarInfo *var_info) { MVMint32 *has_already = MVM_calloc(g->num_bbs, sizeof(MVMint32)); MVMint32 *work = MVM_calloc(g->num_bbs, sizeof(MVMint32)); MVMSpeshBB **worklist = MVM_calloc(g->num_bbs, sizeof(MVMSpeshBB *)); MVMint32 worklist_top = 0; MVMint32 iter_count = 0; /* Go over all locals. */ MVMint32 var, i, j, found; for (var = 0; var < g->num_locals; var++) { /* Move to next iteration. */ iter_count++; /* Add blocks assigning to this variable to the worklist. */ for (i = 0; i < var_info[var].num_ass_nodes; i++) { MVMSpeshBB *bb = var_info[var].ass_nodes[i]; work[bb->idx] = iter_count; worklist[worklist_top++] = bb; /* Algo unions, but ass_nodes unique. */ } /* Process the worklist. */ while (worklist_top) { MVMSpeshBB *x = worklist[--worklist_top]; for (i = 0; i < x->num_df; i++) { MVMSpeshBB *y = x->df[i]; if (has_already[y->idx] < iter_count) { /* Place phi function, and mark we have. */ MVM_spesh_graph_place_phi(tc, g, y, y->num_pred, var); has_already[y->idx] = iter_count; /* Add this block to worklist if needed. */ if (work[y->idx] < iter_count) { work[y->idx] = iter_count; found = 0; for (j = 0; j < worklist_top; j++) if (worklist[j] == y) { found = 1; break; } if (!found) worklist[worklist_top++] = y; } } } } } MVM_free(has_already); MVM_free(work); MVM_free(worklist); } /* Renames the local variables such that we end up with SSA form. */ static MVMint32 which_pred(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB *y, MVMSpeshBB *x) { MVMint32 i; for (i = 0; i < y->num_pred; i++) if (y->pred[i] == x) return i; MVM_spesh_graph_destroy(tc, g); MVM_oops(tc, "Spesh: which_pred failed to find x"); } static void rename_locals(MVMThreadContext *tc, MVMSpeshGraph *g, SSAVarInfo *var_info, MVMSpeshBB *x) { MVMint32 i; /* Visit instructions and do renames in normal (non-phi) instructions. */ MVMSpeshIns *a = x->first_ins; while (a) { /* Rename reads, provided it's not a PHI. */ MVMint32 is_phi = a->info->opcode == MVM_SSA_PHI; if (!is_phi) { for (i = 0; i < a->info->num_operands; i++) { if ((a->info->operands[i] & MVM_operand_rw_mask) == MVM_operand_read_reg) { MVMuint16 orig = a->operands[i].reg.orig; MVMint32 st = var_info[orig].stack_top; if (st >= 0) a->operands[i].reg.i = var_info[orig].stack[st]; else a->operands[i].reg.i = 0; } } } /* Rename writes. */ for (i = 0; i < a->info->num_operands; i++) { if (is_phi || (a->info->operands[i] & MVM_operand_rw_mask) == MVM_operand_write_reg) { MVMuint16 orig = a->operands[i].reg.orig; MVMint32 reg_i = var_info[orig].count; a->operands[i].reg.i = reg_i; if (var_info[orig].stack_top + 1 >= var_info[orig].stack_alloc) { if (var_info[orig].stack_alloc) var_info[orig].stack_alloc *= 2; else var_info[orig].stack_alloc = 8; var_info[orig].stack = MVM_realloc(var_info[orig].stack, var_info[orig].stack_alloc * sizeof(MVMint32)); } var_info[orig].stack[++var_info[orig].stack_top] = reg_i; var_info[orig].count++; } if (is_phi) break; } a = a->next; } /* Visit successors and update their phi functions. */ for (i = 0; i < x->num_succ; i++) { MVMSpeshBB *y = x->succ[i]; MVMint32 j = which_pred(tc, g, y, x); MVMSpeshIns *p = y->first_ins; while (p && p->info->opcode == MVM_SSA_PHI) { MVMuint16 orig = p->operands[j + 1].reg.orig; MVMint32 st = var_info[orig].stack_top; if (st >= 0) p->operands[j + 1].reg.i = var_info[orig].stack[st]; else p->operands[j + 1].reg.i = 0; p = p->next; } } /* Rename for all the children in the dominator tree. */ for (i = 0; i < x->num_children; i++) rename_locals(tc, g, var_info, x->children[i]); /* Go over assignments and pop new variable names. */ a = x->first_ins; while (a) { MVMint32 is_phi = a->info->opcode == MVM_SSA_PHI; for (i = 0; i < a->info->num_operands; i++) { if (is_phi || (a->info->operands[i] & MVM_operand_rw_mask) == MVM_operand_write_reg) { MVMuint16 orig = a->operands[i].reg.orig; var_info[orig].stack_top--; } if (is_phi) break; } a = a->next; } } /* Transforms a spesh graph into SSA form. After this, the graph will have all * register accesses given an SSA "version", and phi instructions inserted as * needed. */ static void ssa(MVMThreadContext *tc, MVMSpeshGraph *g) { SSAVarInfo *var_info; MVMint32 i, num_locals; /* Compute dominance frontiers. */ MVMSpeshBB **rpo = MVM_spesh_graph_reverse_postorder(tc, g); MVMint32 *doms = compute_dominators(tc, g, rpo); add_children(tc, g, rpo, doms); add_dominance_frontiers(tc, g, rpo, doms); MVM_free(rpo); MVM_free(doms); /* Initialize per-local data for SSA analysis. */ var_info = initialize_ssa_var_info(tc, g); /* Compute SSA itself. */ insert_phi_functions(tc, g, var_info); rename_locals(tc, g, var_info, g->entry); /* Allocate space for spesh facts for each local; clean up stacks while * we're at it. */ num_locals = g->num_locals; g->facts = MVM_spesh_alloc(tc, g, num_locals * sizeof(MVMSpeshFacts *)); g->fact_counts = MVM_spesh_alloc(tc, g, num_locals * sizeof(MVMuint16)); for (i = 0; i < num_locals; i++) { g->fact_counts[i] = var_info[i].count; g->facts[i] = MVM_spesh_alloc(tc, g, var_info[i].count * sizeof(MVMSpeshFacts)); if (var_info[i].stack_alloc) { MVM_free(var_info[i].stack); MVM_free(var_info[i].ass_nodes); } } MVM_free(var_info); } /* Takes a static frame and creates a spesh graph for it. */ MVMSpeshGraph * MVM_spesh_graph_create(MVMThreadContext *tc, MVMStaticFrame *sf, MVMuint32 cfg_only, MVMuint32 insert_object_nulls) { /* Create top-level graph object. */ MVMSpeshGraph *g = MVM_calloc(1, sizeof(MVMSpeshGraph)); g->sf = sf; g->bytecode = sf->body.bytecode; g->bytecode_size = sf->body.bytecode_size; g->handlers = sf->body.handlers; g->num_handlers = sf->body.num_handlers; g->num_locals = sf->body.num_locals; g->num_lexicals = sf->body.num_lexicals; g->phi_infos = MVM_spesh_alloc(tc, g, MVMPhiNodeCacheSize * sizeof(MVMOpInfo)); /* Ensure the frame is validated, since we'll rely on this. */ if (!sf->body.validated) { MVM_spesh_graph_destroy(tc, g); MVM_oops(tc, "Spesh: cannot build CFG from unvalidated frame"); } /* Build the CFG out of the static frame, and transform it to SSA. */ build_cfg(tc, g, sf, NULL, 0, NULL, 0, NULL, NULL); if (insert_object_nulls) insert_object_null_instructions(tc, g); if (!cfg_only) { MVM_spesh_eliminate_dead_bbs(tc, g, 0); add_predecessors(tc, g); ssa(tc, g); } /* Hand back the completed graph. */ return g; } /* Takes a static frame and creates a spesh graph for it. */ MVMSpeshGraph * MVM_spesh_graph_create_from_cand(MVMThreadContext *tc, MVMStaticFrame *sf, MVMSpeshCandidate *cand, MVMuint32 cfg_only, MVMSpeshIns ***deopt_usage_ins_out) { /* Create top-level graph object. */ MVMSpeshGraph *g = MVM_calloc(1, sizeof(MVMSpeshGraph)); g->sf = sf; g->bytecode = cand->body.bytecode; g->bytecode_size = cand->body.bytecode_size; g->handlers = cand->body.handlers; g->num_handlers = cand->body.num_handlers; g->num_locals = cand->body.num_locals; g->num_lexicals = cand->body.num_lexicals; g->inlines = cand->body.inlines; g->num_inlines = cand->body.num_inlines; g->resume_inits = cand->body.resume_inits; g->resume_inits_num = cand->body.num_resume_inits; g->resume_inits_alloc = cand->body.num_resume_inits; g->deopt_addrs = cand->body.deopts; g->num_deopt_addrs = cand->body.num_deopts; g->alloc_deopt_addrs = cand->body.num_deopts; g->deopt_named_used_bit_field = cand->body.deopt_named_used_bit_field; g->deopt_pea = cand->body.deopt_pea; g->local_types = cand->body.local_types; g->lexical_types = cand->body.lexical_types; g->num_spesh_slots = cand->body.num_spesh_slots; g->alloc_spesh_slots = cand->body.num_spesh_slots; g->phi_infos = MVM_spesh_alloc(tc, g, MVMPhiNodeCacheSize * sizeof(MVMOpInfo)); g->cand = cand; g->spesh_slots = MVM_malloc(g->alloc_spesh_slots * sizeof(MVMCollectable *)); memcpy(g->spesh_slots, cand->body.spesh_slots, sizeof(MVMCollectable *) * g->num_spesh_slots); /* Ensure the frame is validated, since we'll rely on this. */ if (!sf->body.validated) { MVM_spesh_graph_destroy(tc, g); MVM_oops(tc, "Spesh: cannot build CFG from unvalidated frame"); } /* Build the CFG out of the static frame, and transform it to SSA. */ build_cfg(tc, g, sf, cand->body.deopts, cand->body.num_deopts, cand->body.deopt_synths, cand->body.num_deopt_synths, cand->body.deopt_usage_info, deopt_usage_ins_out); if (!cfg_only) { MVM_spesh_eliminate_dead_bbs(tc, g, 0); add_predecessors(tc, g); ssa(tc, g); } /* Hand back the completed graph. */ return g; } /* Recomputes the dominance tree, after modifications to the CFG. */ void MVM_spesh_graph_recompute_dominance(MVMThreadContext *tc, MVMSpeshGraph *g) { MVMSpeshBB **rpo; MVMint32 *doms; /* First, clear away all existing dominance tree information; we also toss * out all of the predecessors, in case they got out of sync (should try * and fix things up to not need this in the future). */ MVMSpeshBB *cur_bb = g->entry; while (cur_bb) { cur_bb->children = NULL; cur_bb->num_children = 0; cur_bb->pred = NULL; cur_bb->num_pred = 0; cur_bb = cur_bb->linear_next; } /* Now form the new dominance tree. */ add_predecessors(tc, g); rpo = MVM_spesh_graph_reverse_postorder(tc, g); doms = compute_dominators(tc, g, rpo); add_children(tc, g, rpo, doms); MVM_free(rpo); MVM_free(doms); } /* Marks GCables held in a spesh graph. */ void MVM_spesh_graph_mark(MVMThreadContext *tc, MVMSpeshGraph *g, MVMGCWorklist *worklist) { MVMuint16 i, j, num_locals, num_facts, *local_types; /* Mark static frame. */ MVM_gc_worklist_add(tc, worklist, &g->sf); /* Mark facts. */ num_locals = g->num_locals; local_types = g->local_types ? g->local_types : g->sf->body.local_types; for (i = 0; i < num_locals; i++) { num_facts = g->fact_counts[i]; for (j = 0; j < num_facts; j++) { MVMint32 flags = g->facts[i][j].flags; if (flags & MVM_SPESH_FACT_KNOWN_TYPE) MVM_gc_worklist_add(tc, worklist, &(g->facts[i][j].type)); if (flags & MVM_SPESH_FACT_KNOWN_DECONT_TYPE) MVM_gc_worklist_add(tc, worklist, &(g->facts[i][j].decont_type)); if (flags & MVM_SPESH_FACT_KNOWN_VALUE) { if (local_types[i] == MVM_reg_obj) MVM_gc_worklist_add(tc, worklist, &(g->facts[i][j].value.o)); else if (local_types[i] == MVM_reg_str) MVM_gc_worklist_add(tc, worklist, &(g->facts[i][j].value.s)); } } } /* Mark spesh slots. */ for (i = 0; i < g->num_spesh_slots; i++) MVM_gc_worklist_add(tc, worklist, &(g->spesh_slots[i])); /* Mark inlines. */ for (MVMuint32 k = 0; k < g->num_inlines; k++) MVM_gc_worklist_add(tc, worklist, &(g->inlines[k].sf)); /* Mark spesh candidate. */ if (g->cand) MVM_gc_worklist_add(tc, worklist, &(g->cand)); } /* Describes GCables for a heap snapshot. */ void MVM_spesh_graph_describe(MVMThreadContext *tc, MVMSpeshGraph *g, MVMHeapSnapshotState *snapshot) { MVMuint16 i, j, num_locals, num_facts, *local_types; /* Mark static frame. */ MVM_profile_heap_add_collectable_rel_const_cstr(tc, snapshot, (MVMCollectable *)g->sf, "Static frame"); /* Mark facts. */ num_locals = g->num_locals; local_types = g->local_types ? g->local_types : g->sf->body.local_types; for (i = 0; i < num_locals; i++) { num_facts = g->fact_counts[i]; for (j = 0; j < num_facts; j++) { MVMint32 flags = g->facts[i][j].flags; if (flags & MVM_SPESH_FACT_KNOWN_TYPE) MVM_profile_heap_add_collectable_rel_const_cstr(tc, snapshot, (MVMCollectable *)g->facts[i][j].type, "Known Type"); if (flags & MVM_SPESH_FACT_KNOWN_DECONT_TYPE) MVM_profile_heap_add_collectable_rel_const_cstr(tc, snapshot, (MVMCollectable *)g->facts[i][j].decont_type, "Known Decont Type"); if (flags & MVM_SPESH_FACT_KNOWN_VALUE) { if (local_types[i] == MVM_reg_obj) MVM_profile_heap_add_collectable_rel_const_cstr(tc, snapshot, (MVMCollectable *)g->facts[i][j].value.o, "Known Value"); else if (local_types[i] == MVM_reg_str) MVM_profile_heap_add_collectable_rel_const_cstr(tc, snapshot, (MVMCollectable *)g->facts[i][j].value.s, "Known String Value"); } } } /* Mark spesh slots. */ for (i = 0; i < g->num_spesh_slots; i++) MVM_profile_heap_add_collectable_rel_idx(tc, snapshot, g->spesh_slots[i], i); /* Mark inlines. */ for (MVMuint32 k = 0; k < g->num_inlines; k++) MVM_profile_heap_add_collectable_rel_idx(tc, snapshot, (MVMCollectable *)g->inlines[k].sf, k); } /* Destroys a spesh graph, deallocating all its associated memory. */ void MVM_spesh_graph_destroy(MVMThreadContext *tc, MVMSpeshGraph *g) { /* Free all of the allocated node memory. */ MVM_region_destroy(tc, &g->region_alloc); /* If there is a candidate that we either generated or that this graph was * generated from, it has ownership of the malloc'd memory. If not, then we * need to clean up */ if (g->spesh_slots && (!g->cand || g->cand->body.spesh_slots != g->spesh_slots)) MVM_free(g->spesh_slots); if (g->deopt_addrs && (!g->cand || g->cand->body.deopts != g->deopt_addrs)) MVM_free(g->deopt_addrs); if (g->inlines && (!g->cand || g->cand->body.inlines != g->inlines)) MVM_free(g->inlines); if (g->resume_inits && (!g->cand || g->cand->body.resume_inits != g->resume_inits)) MVM_free(g->resume_inits); if (g->local_types && (!g->cand || g->cand->body.local_types != g->local_types)) MVM_free(g->local_types); if (g->lexical_types && (!g->cand || g->cand->body.lexical_types != g->lexical_types)) MVM_free(g->lexical_types); if (!g->cand) MVM_spesh_pea_destroy_deopt_info(tc, &(g->deopt_pea)); /* Handlers can come directly from static frame, from spesh candidate, and * from malloc/realloc. We only free it in the last case */ if (g->handlers && g->handlers != g->sf->body.handlers && (!g->cand || g->cand->body.handlers != g->handlers)) MVM_free(g->handlers); /* Free the graph itself. */ MVM_free(g); }