qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20190426' into staging

Add tcg_gen_extract2_*.
Deal with overflow of TranslationBlocks.
Respect access_type in io_readx.

# gpg: Signature made Fri 26 Apr 2019 18:17:01 BST
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F

* remotes/rth/tags/pull-tcg-20190426:
cputlb: Fix io_readx() to respect the access_type
tcg/arm: Restrict constant pool displacement to 12 bits
tcg/ppc: Allow the constant pool to overflow at 32k
tcg: Restart TB generation after out-of-line ldst overflow
tcg: Restart TB generation after constant pool overflow
tcg: Restart TB generation after relocation overflow
tcg: Restart after TB code generation overflow
tcg: Hoist max_insns computation to tb_gen_code
tcg/aarch64: Support INDEX_op_extract2_{i32,i64}
tcg/arm: Support INDEX_op_extract2_i32
tcg/i386: Support INDEX_op_extract2_{i32,i64}
tcg: Use extract2 in tcg_gen_deposit_{i32,i64}
tcg: Use deposit and extract2 in tcg_gen_shifti_i64
tcg: Add INDEX_op_extract2_{i32,i64}
tcg: Implement tcg_gen_extract2_{i32,i64}

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

+449 -307
+3 -2
accel/tcg/cputlb.c
··· 878 878 CPUTLBEntry *entry; 879 879 target_ulong tlb_addr; 880 880 881 - tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); 881 + tlb_fill(cpu, addr, size, access_type, mmu_idx, retaddr); 882 882 883 883 entry = tlb_entry(env, mmu_idx, addr); 884 - tlb_addr = entry->addr_read; 884 + tlb_addr = (access_type == MMU_DATA_LOAD ? 885 + entry->addr_read : entry->addr_code); 885 886 if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { 886 887 /* RAM access */ 887 888 uintptr_t haddr = addr + entry->addend;
+45 -8
accel/tcg/translate-all.c
··· 1674 1674 tb_page_addr_t phys_pc, phys_page2; 1675 1675 target_ulong virt_page2; 1676 1676 tcg_insn_unit *gen_code_buf; 1677 - int gen_code_size, search_size; 1677 + int gen_code_size, search_size, max_insns; 1678 1678 #ifdef CONFIG_PROFILER 1679 1679 TCGProfile *prof = &tcg_ctx->prof; 1680 1680 int64_t ti; ··· 1692 1692 cflags &= ~CF_CLUSTER_MASK; 1693 1693 cflags |= cpu->cluster_index << CF_CLUSTER_SHIFT; 1694 1694 1695 + max_insns = cflags & CF_COUNT_MASK; 1696 + if (max_insns == 0) { 1697 + max_insns = CF_COUNT_MASK; 1698 + } 1699 + if (max_insns > TCG_MAX_INSNS) { 1700 + max_insns = TCG_MAX_INSNS; 1701 + } 1702 + if (cpu->singlestep_enabled || singlestep) { 1703 + max_insns = 1; 1704 + } 1705 + 1695 1706 buffer_overflow: 1696 1707 tb = tb_alloc(pc); 1697 1708 if (unlikely(!tb)) { ··· 1711 1722 tb->cflags = cflags; 1712 1723 tb->trace_vcpu_dstate = *cpu->trace_dstate; 1713 1724 tcg_ctx->tb_cflags = cflags; 1725 + tb_overflow: 1714 1726 1715 1727 #ifdef CONFIG_PROFILER 1716 1728 /* includes aborted translations because of exceptions */ ··· 1721 1733 tcg_func_start(tcg_ctx); 1722 1734 1723 1735 tcg_ctx->cpu = ENV_GET_CPU(env); 1724 - gen_intermediate_code(cpu, tb); 1736 + gen_intermediate_code(cpu, tb, max_insns); 1725 1737 tcg_ctx->cpu = NULL; 1726 1738 1727 1739 trace_translate_block(tb, tb->pc, tb->tc.ptr); ··· 1744 1756 ti = profile_getclock(); 1745 1757 #endif 1746 1758 1747 - /* ??? Overflow could be handled better here. In particular, we 1748 - don't need to re-do gen_intermediate_code, nor should we re-do 1749 - the tcg optimization currently hidden inside tcg_gen_code. All 1750 - that should be required is to flush the TBs, allocate a new TB, 1751 - re-initialize it per above, and re-do the actual code generation. */ 1752 1759 gen_code_size = tcg_gen_code(tcg_ctx, tb); 1753 1760 if (unlikely(gen_code_size < 0)) { 1754 - goto buffer_overflow; 1761 + switch (gen_code_size) { 1762 + case -1: 1763 + /* 1764 + * Overflow of code_gen_buffer, or the current slice of it. 1765 + * 1766 + * TODO: We don't need to re-do gen_intermediate_code, nor 1767 + * should we re-do the tcg optimization currently hidden 1768 + * inside tcg_gen_code. All that should be required is to 1769 + * flush the TBs, allocate a new TB, re-initialize it per 1770 + * above, and re-do the actual code generation. 1771 + */ 1772 + goto buffer_overflow; 1773 + 1774 + case -2: 1775 + /* 1776 + * The code generated for the TranslationBlock is too large. 1777 + * The maximum size allowed by the unwind info is 64k. 1778 + * There may be stricter constraints from relocations 1779 + * in the tcg backend. 1780 + * 1781 + * Try again with half as many insns as we attempted this time. 1782 + * If a single insn overflows, there's a bug somewhere... 1783 + */ 1784 + max_insns = tb->icount; 1785 + assert(max_insns > 1); 1786 + max_insns /= 2; 1787 + goto tb_overflow; 1788 + 1789 + default: 1790 + g_assert_not_reached(); 1791 + } 1755 1792 } 1756 1793 search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); 1757 1794 if (unlikely(search_size < 0)) {
+2 -13
accel/tcg/translator.c
··· 32 32 } 33 33 34 34 void translator_loop(const TranslatorOps *ops, DisasContextBase *db, 35 - CPUState *cpu, TranslationBlock *tb) 35 + CPUState *cpu, TranslationBlock *tb, int max_insns) 36 36 { 37 37 int bp_insn = 0; 38 38 ··· 42 42 db->pc_next = db->pc_first; 43 43 db->is_jmp = DISAS_NEXT; 44 44 db->num_insns = 0; 45 + db->max_insns = max_insns; 45 46 db->singlestep_enabled = cpu->singlestep_enabled; 46 - 47 - /* Instruction counting */ 48 - db->max_insns = tb_cflags(db->tb) & CF_COUNT_MASK; 49 - if (db->max_insns == 0) { 50 - db->max_insns = CF_COUNT_MASK; 51 - } 52 - if (db->max_insns > TCG_MAX_INSNS) { 53 - db->max_insns = TCG_MAX_INSNS; 54 - } 55 - if (db->singlestep_enabled || singlestep) { 56 - db->max_insns = 1; 57 - } 58 47 59 48 ops->init_disas_context(db, cpu); 60 49 tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */
+2 -2
include/exec/exec-all.h
··· 40 40 41 41 #include "qemu/log.h" 42 42 43 - void gen_intermediate_code(CPUState *cpu, struct TranslationBlock *tb); 44 - void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb, 43 + void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns); 44 + void restore_state_to_opc(CPUArchState *env, TranslationBlock *tb, 45 45 target_ulong *data); 46 46 47 47 void cpu_gen_init(void);
+2 -1
include/exec/translator.h
··· 123 123 * @db: Disassembly context. 124 124 * @cpu: Target vCPU. 125 125 * @tb: Translation block. 126 + * @max_insns: Maximum number of insns to translate. 126 127 * 127 128 * Generic translator loop. 128 129 * ··· 137 138 * - When too many instructions have been translated. 138 139 */ 139 140 void translator_loop(const TranslatorOps *ops, DisasContextBase *db, 140 - CPUState *cpu, TranslationBlock *tb); 141 + CPUState *cpu, TranslationBlock *tb, int max_insns); 141 142 142 143 void translator_loop_temp_check(DisasContextBase *db); 143 144
+2 -2
target/alpha/translate.c
··· 3049 3049 .disas_log = alpha_tr_disas_log, 3050 3050 }; 3051 3051 3052 - void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) 3052 + void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) 3053 3053 { 3054 3054 DisasContext dc; 3055 - translator_loop(&alpha_tr_ops, &dc.base, cpu, tb); 3055 + translator_loop(&alpha_tr_ops, &dc.base, cpu, tb, max_insns); 3056 3056 } 3057 3057 3058 3058 void restore_state_to_opc(CPUAlphaState *env, TranslationBlock *tb,
+2 -2
target/arm/translate.c
··· 13756 13756 }; 13757 13757 13758 13758 /* generate intermediate code for basic block 'tb'. */ 13759 - void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) 13759 + void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) 13760 13760 { 13761 13761 DisasContext dc; 13762 13762 const TranslatorOps *ops = &arm_translator_ops; ··· 13770 13770 } 13771 13771 #endif 13772 13772 13773 - translator_loop(ops, &dc.base, cpu, tb); 13773 + translator_loop(ops, &dc.base, cpu, tb, max_insns); 13774 13774 } 13775 13775 13776 13776 void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags)
+1 -9
target/cris/translate.c
··· 3081 3081 */ 3082 3082 3083 3083 /* generate intermediate code for basic block 'tb'. */ 3084 - void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) 3084 + void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) 3085 3085 { 3086 3086 CPUCRISState *env = cs->env_ptr; 3087 3087 uint32_t pc_start; ··· 3091 3091 uint32_t page_start; 3092 3092 target_ulong npc; 3093 3093 int num_insns; 3094 - int max_insns; 3095 3094 3096 3095 if (env->pregs[PR_VR] == 32) { 3097 3096 dc->decoder = crisv32_decoder; ··· 3137 3136 3138 3137 page_start = pc_start & TARGET_PAGE_MASK; 3139 3138 num_insns = 0; 3140 - max_insns = tb_cflags(tb) & CF_COUNT_MASK; 3141 - if (max_insns == 0) { 3142 - max_insns = CF_COUNT_MASK; 3143 - } 3144 - if (max_insns > TCG_MAX_INSNS) { 3145 - max_insns = TCG_MAX_INSNS; 3146 - } 3147 3139 3148 3140 gen_tb_start(tb); 3149 3141 do {
+2 -3
target/hppa/translate.c
··· 4312 4312 .disas_log = hppa_tr_disas_log, 4313 4313 }; 4314 4314 4315 - void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) 4316 - 4315 + void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) 4317 4316 { 4318 4317 DisasContext ctx; 4319 - translator_loop(&hppa_tr_ops, &ctx.base, cs, tb); 4318 + translator_loop(&hppa_tr_ops, &ctx.base, cs, tb, max_insns); 4320 4319 } 4321 4320 4322 4321 void restore_state_to_opc(CPUHPPAState *env, TranslationBlock *tb,
+2 -2
target/i386/translate.c
··· 8590 8590 }; 8591 8591 8592 8592 /* generate intermediate code for basic block 'tb'. */ 8593 - void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) 8593 + void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) 8594 8594 { 8595 8595 DisasContext dc; 8596 8596 8597 - translator_loop(&i386_tr_ops, &dc.base, cpu, tb); 8597 + translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns); 8598 8598 } 8599 8599 8600 8600 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
+1 -9
target/lm32/translate.c
··· 1050 1050 } 1051 1051 1052 1052 /* generate intermediate code for basic block 'tb'. */ 1053 - void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) 1053 + void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) 1054 1054 { 1055 1055 CPULM32State *env = cs->env_ptr; 1056 1056 LM32CPU *cpu = lm32_env_get_cpu(env); ··· 1058 1058 uint32_t pc_start; 1059 1059 uint32_t page_start; 1060 1060 int num_insns; 1061 - int max_insns; 1062 1061 1063 1062 pc_start = tb->pc; 1064 1063 dc->features = cpu->features; ··· 1078 1077 1079 1078 page_start = pc_start & TARGET_PAGE_MASK; 1080 1079 num_insns = 0; 1081 - max_insns = tb_cflags(tb) & CF_COUNT_MASK; 1082 - if (max_insns == 0) { 1083 - max_insns = CF_COUNT_MASK; 1084 - } 1085 - if (max_insns > TCG_MAX_INSNS) { 1086 - max_insns = TCG_MAX_INSNS; 1087 - } 1088 1080 1089 1081 gen_tb_start(tb); 1090 1082 do {
+2 -2
target/m68k/translate.c
··· 6170 6170 .disas_log = m68k_tr_disas_log, 6171 6171 }; 6172 6172 6173 - void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) 6173 + void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) 6174 6174 { 6175 6175 DisasContext dc; 6176 - translator_loop(&m68k_tr_ops, &dc.base, cpu, tb); 6176 + translator_loop(&m68k_tr_ops, &dc.base, cpu, tb, max_insns); 6177 6177 } 6178 6178 6179 6179 static double floatx80_to_double(CPUM68KState *env, uint16_t high, uint64_t low)
+1 -9
target/microblaze/translate.c
··· 1601 1601 } 1602 1602 1603 1603 /* generate intermediate code for basic block 'tb'. */ 1604 - void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) 1604 + void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) 1605 1605 { 1606 1606 CPUMBState *env = cs->env_ptr; 1607 1607 MicroBlazeCPU *cpu = mb_env_get_cpu(env); ··· 1611 1611 uint32_t page_start, org_flags; 1612 1612 uint32_t npc; 1613 1613 int num_insns; 1614 - int max_insns; 1615 1614 1616 1615 pc_start = tb->pc; 1617 1616 dc->cpu = cpu; ··· 1635 1634 1636 1635 page_start = pc_start & TARGET_PAGE_MASK; 1637 1636 num_insns = 0; 1638 - max_insns = tb_cflags(tb) & CF_COUNT_MASK; 1639 - if (max_insns == 0) { 1640 - max_insns = CF_COUNT_MASK; 1641 - } 1642 - if (max_insns > TCG_MAX_INSNS) { 1643 - max_insns = TCG_MAX_INSNS; 1644 - } 1645 1637 1646 1638 gen_tb_start(tb); 1647 1639 do
+2 -2
target/mips/translate.c
··· 29721 29721 .disas_log = mips_tr_disas_log, 29722 29722 }; 29723 29723 29724 - void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) 29724 + void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) 29725 29725 { 29726 29726 DisasContext ctx; 29727 29727 29728 - translator_loop(&mips_tr_ops, &ctx.base, cs, tb); 29728 + translator_loop(&mips_tr_ops, &ctx.base, cs, tb, max_insns); 29729 29729 } 29730 29730 29731 29731 static void fpu_dump_state(CPUMIPSState *env, FILE *f, int flags)
+2 -9
target/moxie/translate.c
··· 813 813 } 814 814 815 815 /* generate intermediate code for basic block 'tb'. */ 816 - void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) 816 + void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) 817 817 { 818 818 CPUMoxieState *env = cs->env_ptr; 819 819 MoxieCPU *cpu = moxie_env_get_cpu(env); 820 820 DisasContext ctx; 821 821 target_ulong pc_start; 822 - int num_insns, max_insns; 822 + int num_insns; 823 823 824 824 pc_start = tb->pc; 825 825 ctx.pc = pc_start; ··· 829 829 ctx.singlestep_enabled = 0; 830 830 ctx.bstate = BS_NONE; 831 831 num_insns = 0; 832 - max_insns = tb_cflags(tb) & CF_COUNT_MASK; 833 - if (max_insns == 0) { 834 - max_insns = CF_COUNT_MASK; 835 - } 836 - if (max_insns > TCG_MAX_INSNS) { 837 - max_insns = TCG_MAX_INSNS; 838 - } 839 832 840 833 gen_tb_start(tb); 841 834 do {
+2 -12
target/nios2/translate.c
··· 806 806 } 807 807 808 808 /* generate intermediate code for basic block 'tb'. */ 809 - void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) 809 + void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) 810 810 { 811 811 CPUNios2State *env = cs->env_ptr; 812 812 DisasContext dc1, *dc = &dc1; 813 813 int num_insns; 814 - int max_insns; 815 814 816 815 /* Initialize DC */ 817 816 dc->cpu_env = cpu_env; ··· 824 823 825 824 /* Set up instruction counts */ 826 825 num_insns = 0; 827 - if (cs->singlestep_enabled || singlestep) { 828 - max_insns = 1; 829 - } else { 826 + if (max_insns > 1) { 830 827 int page_insns = (TARGET_PAGE_SIZE - (tb->pc & TARGET_PAGE_MASK)) / 4; 831 - max_insns = tb_cflags(tb) & CF_COUNT_MASK; 832 - if (max_insns == 0) { 833 - max_insns = CF_COUNT_MASK; 834 - } 835 828 if (max_insns > page_insns) { 836 829 max_insns = page_insns; 837 - } 838 - if (max_insns > TCG_MAX_INSNS) { 839 - max_insns = TCG_MAX_INSNS; 840 830 } 841 831 } 842 832
+2 -2
target/openrisc/translate.c
··· 1409 1409 .disas_log = openrisc_tr_disas_log, 1410 1410 }; 1411 1411 1412 - void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) 1412 + void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) 1413 1413 { 1414 1414 DisasContext ctx; 1415 1415 1416 - translator_loop(&openrisc_tr_ops, &ctx.base, cs, tb); 1416 + translator_loop(&openrisc_tr_ops, &ctx.base, cs, tb, max_insns); 1417 1417 } 1418 1418 1419 1419 void openrisc_cpu_dump_state(CPUState *cs, FILE *f, int flags)
+2 -2
target/ppc/translate.c
··· 7984 7984 .disas_log = ppc_tr_disas_log, 7985 7985 }; 7986 7986 7987 - void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) 7987 + void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) 7988 7988 { 7989 7989 DisasContext ctx; 7990 7990 7991 - translator_loop(&ppc_tr_ops, &ctx.base, cs, tb); 7991 + translator_loop(&ppc_tr_ops, &ctx.base, cs, tb, max_insns); 7992 7992 } 7993 7993 7994 7994 void restore_state_to_opc(CPUPPCState *env, TranslationBlock *tb,
+2 -2
target/riscv/translate.c
··· 783 783 .disas_log = riscv_tr_disas_log, 784 784 }; 785 785 786 - void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) 786 + void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) 787 787 { 788 788 DisasContext ctx; 789 789 790 - translator_loop(&riscv_tr_ops, &ctx.base, cs, tb); 790 + translator_loop(&riscv_tr_ops, &ctx.base, cs, tb, max_insns); 791 791 } 792 792 793 793 void riscv_translate_init(void)
+2 -2
target/s390x/translate.c
··· 6552 6552 .disas_log = s390x_tr_disas_log, 6553 6553 }; 6554 6554 6555 - void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) 6555 + void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) 6556 6556 { 6557 6557 DisasContext dc; 6558 6558 6559 - translator_loop(&s390x_tr_ops, &dc.base, cs, tb); 6559 + translator_loop(&s390x_tr_ops, &dc.base, cs, tb, max_insns); 6560 6560 } 6561 6561 6562 6562 void restore_state_to_opc(CPUS390XState *env, TranslationBlock *tb,
+2 -2
target/sh4/translate.c
··· 2383 2383 .disas_log = sh4_tr_disas_log, 2384 2384 }; 2385 2385 2386 - void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) 2386 + void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) 2387 2387 { 2388 2388 DisasContext ctx; 2389 2389 2390 - translator_loop(&sh4_tr_ops, &ctx.base, cs, tb); 2390 + translator_loop(&sh4_tr_ops, &ctx.base, cs, tb, max_insns); 2391 2391 } 2392 2392 2393 2393 void restore_state_to_opc(CPUSH4State *env, TranslationBlock *tb,
+2 -2
target/sparc/translate.c
··· 5962 5962 .disas_log = sparc_tr_disas_log, 5963 5963 }; 5964 5964 5965 - void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) 5965 + void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) 5966 5966 { 5967 5967 DisasContext dc = {}; 5968 5968 5969 - translator_loop(&sparc_tr_ops, &dc.base, cs, tb); 5969 + translator_loop(&sparc_tr_ops, &dc.base, cs, tb, max_insns); 5970 5970 } 5971 5971 5972 5972 void sparc_tcg_init(void)
+1 -11
target/tilegx/translate.c
··· 2369 2369 } 2370 2370 } 2371 2371 2372 - void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) 2372 + void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) 2373 2373 { 2374 2374 CPUTLGState *env = cs->env_ptr; 2375 2375 DisasContext ctx; ··· 2377 2377 uint64_t pc_start = tb->pc; 2378 2378 uint64_t page_start = pc_start & TARGET_PAGE_MASK; 2379 2379 int num_insns = 0; 2380 - int max_insns = tb_cflags(tb) & CF_COUNT_MASK; 2381 2380 2382 2381 dc->pc = pc_start; 2383 2382 dc->mmuidx = 0; ··· 2391 2390 if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { 2392 2391 qemu_log_lock(); 2393 2392 qemu_log("IN: %s\n", lookup_symbol(pc_start)); 2394 - } 2395 - if (!max_insns) { 2396 - max_insns = CF_COUNT_MASK; 2397 - } 2398 - if (cs->singlestep_enabled || singlestep) { 2399 - max_insns = 1; 2400 - } 2401 - if (max_insns > TCG_MAX_INSNS) { 2402 - max_insns = TCG_MAX_INSNS; 2403 2393 } 2404 2394 gen_tb_start(tb); 2405 2395
+2 -14
target/tricore/translate.c
··· 8807 8807 } 8808 8808 } 8809 8809 8810 - void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) 8810 + void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) 8811 8811 { 8812 8812 CPUTriCoreState *env = cs->env_ptr; 8813 8813 DisasContext ctx; 8814 8814 target_ulong pc_start; 8815 - int num_insns, max_insns; 8816 - 8817 - num_insns = 0; 8818 - max_insns = tb_cflags(tb) & CF_COUNT_MASK; 8819 - if (max_insns == 0) { 8820 - max_insns = CF_COUNT_MASK; 8821 - } 8822 - if (singlestep) { 8823 - max_insns = 1; 8824 - } 8825 - if (max_insns > TCG_MAX_INSNS) { 8826 - max_insns = TCG_MAX_INSNS; 8827 - } 8815 + int num_insns = 0; 8828 8816 8829 8817 pc_start = tb->pc; 8830 8818 ctx.pc = pc_start;
+1 -9
target/unicore32/translate.c
··· 1871 1871 } 1872 1872 1873 1873 /* generate intermediate code for basic block 'tb'. */ 1874 - void gen_intermediate_code(CPUState *cs, TranslationBlock *tb) 1874 + void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) 1875 1875 { 1876 1876 CPUUniCore32State *env = cs->env_ptr; 1877 1877 DisasContext dc1, *dc = &dc1; 1878 1878 target_ulong pc_start; 1879 1879 uint32_t page_start; 1880 1880 int num_insns; 1881 - int max_insns; 1882 1881 1883 1882 /* generate intermediate code */ 1884 1883 num_temps = 0; ··· 1897 1896 cpu_F1d = tcg_temp_new_i64(); 1898 1897 page_start = pc_start & TARGET_PAGE_MASK; 1899 1898 num_insns = 0; 1900 - max_insns = tb_cflags(tb) & CF_COUNT_MASK; 1901 - if (max_insns == 0) { 1902 - max_insns = CF_COUNT_MASK; 1903 - } 1904 - if (max_insns > TCG_MAX_INSNS) { 1905 - max_insns = TCG_MAX_INSNS; 1906 - } 1907 1899 1908 1900 #ifndef CONFIG_USER_ONLY 1909 1901 if ((env->uncached_asr & ASR_M) == ASR_MODE_USER) {
+2 -2
target/xtensa/translate.c
··· 1635 1635 .disas_log = xtensa_tr_disas_log, 1636 1636 }; 1637 1637 1638 - void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) 1638 + void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) 1639 1639 { 1640 1640 DisasContext dc = {}; 1641 - translator_loop(&xtensa_translator_ops, &dc.base, cpu, tb); 1641 + translator_loop(&xtensa_translator_ops, &dc.base, cpu, tb, max_insns); 1642 1642 } 1643 1643 1644 1644 void xtensa_cpu_dump_state(CPUState *cs, FILE *f, int flags)
+7
tcg/README
··· 343 343 344 344 (using an arithmetic right shift). 345 345 346 + * extract2_i32/i64 dest, t1, t2, pos 347 + 348 + For N = {32,64}, extract an N-bit quantity from the concatenation 349 + of t2:t1, beginning at pos. The tcg_gen_extract2_{i32,i64} expander 350 + accepts 0 <= pos <= N as inputs. The backend code generator will 351 + not see either 0 or N as inputs for these opcodes. 352 + 346 353 * extrl_i64_i32 t0, t1 347 354 348 355 For 64-bit hosts only, extract the low 32-bits of input T1 and place it
+2
tcg/aarch64/tcg-target.h
··· 77 77 #define TCG_TARGET_HAS_deposit_i32 1 78 78 #define TCG_TARGET_HAS_extract_i32 1 79 79 #define TCG_TARGET_HAS_sextract_i32 1 80 + #define TCG_TARGET_HAS_extract2_i32 1 80 81 #define TCG_TARGET_HAS_movcond_i32 1 81 82 #define TCG_TARGET_HAS_add2_i32 1 82 83 #define TCG_TARGET_HAS_sub2_i32 1 ··· 113 114 #define TCG_TARGET_HAS_deposit_i64 1 114 115 #define TCG_TARGET_HAS_extract_i64 1 115 116 #define TCG_TARGET_HAS_sextract_i64 1 117 + #define TCG_TARGET_HAS_extract2_i64 1 116 118 #define TCG_TARGET_HAS_movcond_i64 1 117 119 #define TCG_TARGET_HAS_add2_i64 1 118 120 #define TCG_TARGET_HAS_sub2_i64 1
+21 -6
tcg/aarch64/tcg-target.inc.c
··· 1395 1395 tcg_out_insn(s, 3406, ADR, rd, offset); 1396 1396 } 1397 1397 1398 - static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1398 + static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1399 1399 { 1400 1400 TCGMemOpIdx oi = lb->oi; 1401 1401 TCGMemOp opc = get_memop(oi); 1402 1402 TCGMemOp size = opc & MO_SIZE; 1403 1403 1404 - bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr); 1405 - tcg_debug_assert(ok); 1404 + if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) { 1405 + return false; 1406 + } 1406 1407 1407 1408 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1408 1409 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); ··· 1416 1417 } 1417 1418 1418 1419 tcg_out_goto(s, lb->raddr); 1420 + return true; 1419 1421 } 1420 1422 1421 - static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1423 + static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1422 1424 { 1423 1425 TCGMemOpIdx oi = lb->oi; 1424 1426 TCGMemOp opc = get_memop(oi); 1425 1427 TCGMemOp size = opc & MO_SIZE; 1426 1428 1427 - bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr); 1428 - tcg_debug_assert(ok); 1429 + if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) { 1430 + return false; 1431 + } 1429 1432 1430 1433 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1431 1434 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); ··· 1434 1437 tcg_out_adr(s, TCG_REG_X4, lb->raddr); 1435 1438 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1436 1439 tcg_out_goto(s, lb->raddr); 1440 + return true; 1437 1441 } 1438 1442 1439 1443 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, ··· 2058 2062 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2059 2063 break; 2060 2064 2065 + case INDEX_op_extract2_i64: 2066 + case INDEX_op_extract2_i32: 2067 + tcg_out_extr(s, ext, a0, a1, a2, args[3]); 2068 + break; 2069 + 2061 2070 case INDEX_op_add2_i32: 2062 2071 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2063 2072 (int32_t)args[4], args[5], const_args[4], ··· 2300 2309 = { .args_ct_str = { "r", "r", "rAL" } }; 2301 2310 static const TCGTargetOpDef dep 2302 2311 = { .args_ct_str = { "r", "0", "rZ" } }; 2312 + static const TCGTargetOpDef ext2 2313 + = { .args_ct_str = { "r", "rZ", "rZ" } }; 2303 2314 static const TCGTargetOpDef movc 2304 2315 = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } }; 2305 2316 static const TCGTargetOpDef add2 ··· 2429 2440 case INDEX_op_deposit_i32: 2430 2441 case INDEX_op_deposit_i64: 2431 2442 return &dep; 2443 + 2444 + case INDEX_op_extract2_i32: 2445 + case INDEX_op_extract2_i64: 2446 + return &ext2; 2432 2447 2433 2448 case INDEX_op_add2_i32: 2434 2449 case INDEX_op_add2_i64:
+1
tcg/arm/tcg-target.h
··· 116 116 #define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions 117 117 #define TCG_TARGET_HAS_extract_i32 use_armv7_instructions 118 118 #define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions 119 + #define TCG_TARGET_HAS_extract2_i32 1 119 120 #define TCG_TARGET_HAS_movcond_i32 1 120 121 #define TCG_TARGET_HAS_mulu2_i32 1 121 122 #define TCG_TARGET_HAS_muls2_i32 1
+56 -42
tcg/arm/tcg-target.inc.c
··· 197 197 return false; 198 198 } 199 199 200 + static inline bool reloc_pc13(tcg_insn_unit *code_ptr, tcg_insn_unit *target) 201 + { 202 + ptrdiff_t offset = tcg_ptr_byte_diff(target, code_ptr) - 8; 203 + 204 + if (offset >= -0xfff && offset <= 0xfff) { 205 + tcg_insn_unit insn = *code_ptr; 206 + bool u = (offset >= 0); 207 + if (!u) { 208 + offset = -offset; 209 + } 210 + insn = deposit32(insn, 23, 1, u); 211 + insn = deposit32(insn, 0, 12, offset); 212 + *code_ptr = insn; 213 + return true; 214 + } 215 + return false; 216 + } 217 + 200 218 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 201 219 intptr_t value, intptr_t addend) 202 220 { ··· 205 223 if (type == R_ARM_PC24) { 206 224 return reloc_pc24(code_ptr, (tcg_insn_unit *)value); 207 225 } else if (type == R_ARM_PC13) { 208 - intptr_t diff = value - (uintptr_t)(code_ptr + 2); 209 - tcg_insn_unit insn = *code_ptr; 210 - bool u; 211 - 212 - if (diff >= -0xfff && diff <= 0xfff) { 213 - u = (diff >= 0); 214 - if (!u) { 215 - diff = -diff; 216 - } 217 - } else { 218 - int rd = extract32(insn, 12, 4); 219 - int rt = rd == TCG_REG_PC ? TCG_REG_TMP : rd; 220 - 221 - if (diff < 0x1000 || diff >= 0x100000) { 222 - return false; 223 - } 224 - 225 - /* add rt, pc, #high */ 226 - *code_ptr++ = ((insn & 0xf0000000) | (1 << 25) | ARITH_ADD 227 - | (TCG_REG_PC << 16) | (rt << 12) 228 - | (20 << 7) | (diff >> 12)); 229 - /* ldr rd, [rt, #low] */ 230 - insn = deposit32(insn, 12, 4, rt); 231 - diff &= 0xfff; 232 - u = 1; 233 - } 234 - insn = deposit32(insn, 23, 1, u); 235 - insn = deposit32(insn, 0, 12, diff); 236 - *code_ptr = insn; 226 + return reloc_pc13(code_ptr, (tcg_insn_unit *)value); 237 227 } else { 238 228 g_assert_not_reached(); 239 229 } 240 - return true; 241 230 } 242 231 243 232 #define TCG_CT_CONST_ARM 0x100 ··· 605 594 606 595 static void tcg_out_movi_pool(TCGContext *s, int cond, int rd, uint32_t arg) 607 596 { 608 - /* The 12-bit range on the ldr insn is sometimes a bit too small. 609 - In order to get around that we require two insns, one of which 610 - will usually be a nop, but may be replaced in patch_reloc. */ 611 597 new_pool_label(s, arg, R_ARM_PC13, s->code_ptr, 0); 612 598 tcg_out_ld32_12(s, cond, rd, TCG_REG_PC, 0); 613 - tcg_out_nop(s); 614 599 } 615 600 616 601 static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg) ··· 1069 1054 tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); 1070 1055 tcg_out_blx(s, COND_AL, TCG_REG_TMP); 1071 1056 } else { 1072 - /* ??? Know that movi_pool emits exactly 2 insns. */ 1073 - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); 1057 + /* ??? Know that movi_pool emits exactly 1 insn. */ 1058 + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 0); 1074 1059 tcg_out_movi_pool(s, COND_AL, TCG_REG_PC, addri); 1075 1060 } 1076 1061 } ··· 1372 1357 label->label_ptr[0] = label_ptr; 1373 1358 } 1374 1359 1375 - static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1360 + static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1376 1361 { 1377 1362 TCGReg argreg, datalo, datahi; 1378 1363 TCGMemOpIdx oi = lb->oi; 1379 1364 TCGMemOp opc = get_memop(oi); 1380 1365 void *func; 1381 1366 1382 - bool ok = reloc_pc24(lb->label_ptr[0], s->code_ptr); 1383 - tcg_debug_assert(ok); 1367 + if (!reloc_pc24(lb->label_ptr[0], s->code_ptr)) { 1368 + return false; 1369 + } 1384 1370 1385 1371 argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); 1386 1372 if (TARGET_LONG_BITS == 64) { ··· 1432 1418 } 1433 1419 1434 1420 tcg_out_goto(s, COND_AL, lb->raddr); 1421 + return true; 1435 1422 } 1436 1423 1437 - static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1424 + static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1438 1425 { 1439 1426 TCGReg argreg, datalo, datahi; 1440 1427 TCGMemOpIdx oi = lb->oi; 1441 1428 TCGMemOp opc = get_memop(oi); 1442 1429 1443 - bool ok = reloc_pc24(lb->label_ptr[0], s->code_ptr); 1444 - tcg_debug_assert(ok); 1430 + if (!reloc_pc24(lb->label_ptr[0], s->code_ptr)) { 1431 + return false; 1432 + } 1445 1433 1446 1434 argreg = TCG_REG_R0; 1447 1435 argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); ··· 1474 1462 1475 1463 /* Tail-call to the helper, which will return to the fast path. */ 1476 1464 tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1465 + return true; 1477 1466 } 1478 1467 #endif /* SOFTMMU */ 1479 1468 ··· 2064 2053 case INDEX_op_sextract_i32: 2065 2054 tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]); 2066 2055 break; 2056 + case INDEX_op_extract2_i32: 2057 + /* ??? These optimization vs zero should be generic. */ 2058 + /* ??? But we can't substitute 2 for 1 in the opcode stream yet. */ 2059 + if (const_args[1]) { 2060 + if (const_args[2]) { 2061 + tcg_out_movi(s, TCG_TYPE_REG, args[0], 0); 2062 + } else { 2063 + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, 2064 + args[2], SHIFT_IMM_LSL(32 - args[3])); 2065 + } 2066 + } else if (const_args[2]) { 2067 + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, 2068 + args[1], SHIFT_IMM_LSR(args[3])); 2069 + } else { 2070 + /* We can do extract2 in 2 insns, vs the 3 required otherwise. */ 2071 + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, 2072 + args[2], SHIFT_IMM_LSL(32 - args[3])); 2073 + tcg_out_dat_reg(s, COND_AL, ARITH_ORR, args[0], TCG_REG_TMP, 2074 + args[1], SHIFT_IMM_LSR(args[3])); 2075 + } 2076 + break; 2067 2077 2068 2078 case INDEX_op_div_i32: 2069 2079 tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]); ··· 2108 2118 = { .args_ct_str = { "s", "s", "s", "s" } }; 2109 2119 static const TCGTargetOpDef br 2110 2120 = { .args_ct_str = { "r", "rIN" } }; 2121 + static const TCGTargetOpDef ext2 2122 + = { .args_ct_str = { "r", "rZ", "rZ" } }; 2111 2123 static const TCGTargetOpDef dep 2112 2124 = { .args_ct_str = { "r", "0", "rZ" } }; 2113 2125 static const TCGTargetOpDef movc ··· 2174 2186 return &br; 2175 2187 case INDEX_op_deposit_i32: 2176 2188 return &dep; 2189 + case INDEX_op_extract2_i32: 2190 + return &ext2; 2177 2191 case INDEX_op_movcond_i32: 2178 2192 return &movc; 2179 2193 case INDEX_op_add2_i32:
+2
tcg/i386/tcg-target.h
··· 124 124 #define TCG_TARGET_HAS_deposit_i32 1 125 125 #define TCG_TARGET_HAS_extract_i32 1 126 126 #define TCG_TARGET_HAS_sextract_i32 1 127 + #define TCG_TARGET_HAS_extract2_i32 1 127 128 #define TCG_TARGET_HAS_movcond_i32 1 128 129 #define TCG_TARGET_HAS_add2_i32 1 129 130 #define TCG_TARGET_HAS_sub2_i32 1 ··· 162 163 #define TCG_TARGET_HAS_deposit_i64 1 163 164 #define TCG_TARGET_HAS_extract_i64 1 164 165 #define TCG_TARGET_HAS_sextract_i64 0 166 + #define TCG_TARGET_HAS_extract2_i64 1 165 167 #define TCG_TARGET_HAS_movcond_i64 1 166 168 #define TCG_TARGET_HAS_add2_i64 1 167 169 #define TCG_TARGET_HAS_sub2_i64 1
+15 -2
tcg/i386/tcg-target.inc.c
··· 452 452 #define OPC_SHUFPS (0xc6 | P_EXT) 453 453 #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) 454 454 #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) 455 + #define OPC_SHRD_Ib (0xac | P_EXT) 455 456 #define OPC_TESTL (0x85) 456 457 #define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3) 457 458 #define OPC_UD2 (0x0b | P_EXT) ··· 1729 1730 /* 1730 1731 * Generate code for the slow path for a load at the end of block 1731 1732 */ 1732 - static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1733 + static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1733 1734 { 1734 1735 TCGMemOpIdx oi = l->oi; 1735 1736 TCGMemOp opc = get_memop(oi); ··· 1808 1809 1809 1810 /* Jump to the code corresponding to next IR of qemu_st */ 1810 1811 tcg_out_jmp(s, l->raddr); 1812 + return true; 1811 1813 } 1812 1814 1813 1815 /* 1814 1816 * Generate code for the slow path for a store at the end of block 1815 1817 */ 1816 - static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1818 + static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1817 1819 { 1818 1820 TCGMemOpIdx oi = l->oi; 1819 1821 TCGMemOp opc = get_memop(oi); ··· 1876 1878 /* "Tail call" to the helper, with the return address back inline. */ 1877 1879 tcg_out_push(s, retaddr); 1878 1880 tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1881 + return true; 1879 1882 } 1880 1883 #elif TCG_TARGET_REG_BITS == 32 1881 1884 # define x86_guest_base_seg 0 ··· 2587 2590 } 2588 2591 break; 2589 2592 2593 + OP_32_64(extract2): 2594 + /* Note that SHRD outputs to the r/m operand. */ 2595 + tcg_out_modrm(s, OPC_SHRD_Ib + rexw, a2, a0); 2596 + tcg_out8(s, args[3]); 2597 + break; 2598 + 2590 2599 case INDEX_op_mb: 2591 2600 tcg_out_mb(s, a0); 2592 2601 break; ··· 2845 2854 static const TCGTargetOpDef r_0 = { .args_ct_str = { "r", "0" } }; 2846 2855 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } }; 2847 2856 static const TCGTargetOpDef r_r_re = { .args_ct_str = { "r", "r", "re" } }; 2857 + static const TCGTargetOpDef r_0_r = { .args_ct_str = { "r", "0", "r" } }; 2848 2858 static const TCGTargetOpDef r_0_re = { .args_ct_str = { "r", "0", "re" } }; 2849 2859 static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } }; 2850 2860 static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } }; ··· 2970 2980 case INDEX_op_ctpop_i32: 2971 2981 case INDEX_op_ctpop_i64: 2972 2982 return &r_r; 2983 + case INDEX_op_extract2_i32: 2984 + case INDEX_op_extract2_i64: 2985 + return &r_0_r; 2973 2986 2974 2987 case INDEX_op_deposit_i32: 2975 2988 case INDEX_op_deposit_i64:
+2
tcg/mips/tcg-target.h
··· 162 162 #define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions 163 163 #define TCG_TARGET_HAS_extract_i32 use_mips32r2_instructions 164 164 #define TCG_TARGET_HAS_sextract_i32 0 165 + #define TCG_TARGET_HAS_extract2_i32 0 165 166 #define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions 166 167 #define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions 167 168 #define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions ··· 177 178 #define TCG_TARGET_HAS_deposit_i64 use_mips32r2_instructions 178 179 #define TCG_TARGET_HAS_extract_i64 use_mips32r2_instructions 179 180 #define TCG_TARGET_HAS_sextract_i64 0 181 + #define TCG_TARGET_HAS_extract2_i64 0 180 182 #define TCG_TARGET_HAS_ext8s_i64 use_mips32r2_instructions 181 183 #define TCG_TARGET_HAS_ext16s_i64 use_mips32r2_instructions 182 184 #define TCG_TARGET_HAS_rot_i64 use_mips32r2_instructions
+4 -2
tcg/mips/tcg-target.inc.c
··· 1338 1338 } 1339 1339 } 1340 1340 1341 - static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1341 + static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1342 1342 { 1343 1343 TCGMemOpIdx oi = l->oi; 1344 1344 TCGMemOp opc = get_memop(oi); ··· 1385 1385 } else { 1386 1386 tcg_out_opc_reg(s, OPC_OR, v0, TCG_REG_V0, TCG_REG_ZERO); 1387 1387 } 1388 + return true; 1388 1389 } 1389 1390 1390 - static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1391 + static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1391 1392 { 1392 1393 TCGMemOpIdx oi = l->oi; 1393 1394 TCGMemOp opc = get_memop(oi); ··· 1435 1436 tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true); 1436 1437 /* delay slot */ 1437 1438 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); 1439 + return true; 1438 1440 } 1439 1441 #endif 1440 1442
+16
tcg/optimize.c
··· 1202 1202 } 1203 1203 goto do_default; 1204 1204 1205 + CASE_OP_32_64(extract2): 1206 + if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1207 + TCGArg v1 = arg_info(op->args[1])->val; 1208 + TCGArg v2 = arg_info(op->args[2])->val; 1209 + 1210 + if (opc == INDEX_op_extract2_i64) { 1211 + tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3])); 1212 + } else { 1213 + tmp = (v1 >> op->args[3]) | (v2 << (32 - op->args[3])); 1214 + tmp = (int32_t)tmp; 1215 + } 1216 + tcg_opt_gen_movi(s, op, op->args[0], tmp); 1217 + break; 1218 + } 1219 + goto do_default; 1220 + 1205 1221 CASE_OP_32_64(setcond): 1206 1222 tmp = do_constant_folding_cond(opc, op->args[1], 1207 1223 op->args[2], op->args[3]);
+2
tcg/ppc/tcg-target.h
··· 77 77 #define TCG_TARGET_HAS_deposit_i32 1 78 78 #define TCG_TARGET_HAS_extract_i32 1 79 79 #define TCG_TARGET_HAS_sextract_i32 0 80 + #define TCG_TARGET_HAS_extract2_i32 0 80 81 #define TCG_TARGET_HAS_movcond_i32 1 81 82 #define TCG_TARGET_HAS_mulu2_i32 0 82 83 #define TCG_TARGET_HAS_muls2_i32 0 ··· 115 116 #define TCG_TARGET_HAS_deposit_i64 1 116 117 #define TCG_TARGET_HAS_extract_i64 1 117 118 #define TCG_TARGET_HAS_sextract_i64 0 119 + #define TCG_TARGET_HAS_extract2_i64 0 118 120 #define TCG_TARGET_HAS_movcond_i64 1 119 121 #define TCG_TARGET_HAS_add2_i64 1 120 122 #define TCG_TARGET_HAS_sub2_i64 1
+20 -22
tcg/ppc/tcg-target.inc.c
··· 529 529 intptr_t value, intptr_t addend) 530 530 { 531 531 tcg_insn_unit *target; 532 - tcg_insn_unit old; 533 532 534 533 value += addend; 535 534 target = (tcg_insn_unit *)value; ··· 540 539 case R_PPC_REL24: 541 540 return reloc_pc24(code_ptr, target); 542 541 case R_PPC_ADDR16: 543 - /* We are abusing this relocation type. This points to a pair 544 - of insns, addis + load. If the displacement is small, we 545 - can nop out the addis. */ 546 - if (value == (int16_t)value) { 547 - code_ptr[0] = NOP; 548 - old = deposit32(code_ptr[1], 0, 16, value); 549 - code_ptr[1] = deposit32(old, 16, 5, TCG_REG_TB); 550 - } else { 551 - int16_t lo = value; 552 - int hi = value - lo; 553 - if (hi + lo != value) { 554 - return false; 555 - } 556 - code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); 557 - code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); 542 + /* 543 + * We are (slightly) abusing this relocation type. In particular, 544 + * assert that the low 2 bits are zero, and do not modify them. 545 + * That way we can use this with LD et al that have opcode bits 546 + * in the low 2 bits of the insn. 547 + */ 548 + if ((value & 3) || value != (int16_t)value) { 549 + return false; 558 550 } 551 + *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc); 559 552 break; 560 553 default: 561 554 g_assert_not_reached(); ··· 701 694 if (!in_prologue && USE_REG_TB) { 702 695 new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr, 703 696 -(intptr_t)s->code_gen_ptr); 704 - tcg_out32(s, ADDIS | TAI(ret, TCG_REG_TB, 0)); 705 - tcg_out32(s, LD | TAI(ret, ret, 0)); 697 + tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0)); 706 698 return; 707 699 } 708 700 ··· 1653 1645 label->label_ptr[0] = lptr; 1654 1646 } 1655 1647 1656 - static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1648 + static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1657 1649 { 1658 1650 TCGMemOpIdx oi = lb->oi; 1659 1651 TCGMemOp opc = get_memop(oi); 1660 1652 TCGReg hi, lo, arg = TCG_REG_R3; 1661 1653 1662 - **lb->label_ptr |= reloc_pc14_val(*lb->label_ptr, s->code_ptr); 1654 + if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) { 1655 + return false; 1656 + } 1663 1657 1664 1658 tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); 1665 1659 ··· 1695 1689 } 1696 1690 1697 1691 tcg_out_b(s, 0, lb->raddr); 1692 + return true; 1698 1693 } 1699 1694 1700 - static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1695 + static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1701 1696 { 1702 1697 TCGMemOpIdx oi = lb->oi; 1703 1698 TCGMemOp opc = get_memop(oi); 1704 1699 TCGMemOp s_bits = opc & MO_SIZE; 1705 1700 TCGReg hi, lo, arg = TCG_REG_R3; 1706 1701 1707 - **lb->label_ptr |= reloc_pc14_val(*lb->label_ptr, s->code_ptr); 1702 + if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) { 1703 + return false; 1704 + } 1708 1705 1709 1706 tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); 1710 1707 ··· 1753 1750 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1754 1751 1755 1752 tcg_out_b(s, 0, lb->raddr); 1753 + return true; 1756 1754 } 1757 1755 #endif /* SOFTMMU */ 1758 1756
+2
tcg/riscv/tcg-target.h
··· 93 93 #define TCG_TARGET_HAS_deposit_i32 0 94 94 #define TCG_TARGET_HAS_extract_i32 0 95 95 #define TCG_TARGET_HAS_sextract_i32 0 96 + #define TCG_TARGET_HAS_extract2_i32 0 96 97 #define TCG_TARGET_HAS_add2_i32 1 97 98 #define TCG_TARGET_HAS_sub2_i32 1 98 99 #define TCG_TARGET_HAS_mulu2_i32 0 ··· 128 129 #define TCG_TARGET_HAS_deposit_i64 0 129 130 #define TCG_TARGET_HAS_extract_i64 0 130 131 #define TCG_TARGET_HAS_sextract_i64 0 132 + #define TCG_TARGET_HAS_extract2_i64 0 131 133 #define TCG_TARGET_HAS_extrl_i64_i32 1 132 134 #define TCG_TARGET_HAS_extrh_i64_i32 1 133 135 #define TCG_TARGET_HAS_ext8s_i64 1
+12 -4
tcg/riscv/tcg-target.inc.c
··· 1065 1065 label->label_ptr[0] = label_ptr[0]; 1066 1066 } 1067 1067 1068 - static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1068 + static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1069 1069 { 1070 1070 TCGMemOpIdx oi = l->oi; 1071 1071 TCGMemOp opc = get_memop(oi); ··· 1080 1080 } 1081 1081 1082 1082 /* resolve label address */ 1083 - patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, (intptr_t) s->code_ptr, 0); 1083 + if (!patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, 1084 + (intptr_t) s->code_ptr, 0)) { 1085 + return false; 1086 + } 1084 1087 1085 1088 /* call load helper */ 1086 1089 tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0); ··· 1092 1095 tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0); 1093 1096 1094 1097 tcg_out_goto(s, l->raddr); 1098 + return true; 1095 1099 } 1096 1100 1097 - static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1101 + static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1098 1102 { 1099 1103 TCGMemOpIdx oi = l->oi; 1100 1104 TCGMemOp opc = get_memop(oi); ··· 1111 1115 } 1112 1116 1113 1117 /* resolve label address */ 1114 - patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, (intptr_t) s->code_ptr, 0); 1118 + if (!patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, 1119 + (intptr_t) s->code_ptr, 0)) { 1120 + return false; 1121 + } 1115 1122 1116 1123 /* call store helper */ 1117 1124 tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0); ··· 1133 1140 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SSIZE)]); 1134 1141 1135 1142 tcg_out_goto(s, l->raddr); 1143 + return true; 1136 1144 } 1137 1145 #endif /* CONFIG_SOFTMMU */ 1138 1146
+2
tcg/s390/tcg-target.h
··· 85 85 #define TCG_TARGET_HAS_deposit_i32 (s390_facilities & FACILITY_GEN_INST_EXT) 86 86 #define TCG_TARGET_HAS_extract_i32 (s390_facilities & FACILITY_GEN_INST_EXT) 87 87 #define TCG_TARGET_HAS_sextract_i32 0 88 + #define TCG_TARGET_HAS_extract2_i32 0 88 89 #define TCG_TARGET_HAS_movcond_i32 1 89 90 #define TCG_TARGET_HAS_add2_i32 1 90 91 #define TCG_TARGET_HAS_sub2_i32 1 ··· 121 122 #define TCG_TARGET_HAS_deposit_i64 (s390_facilities & FACILITY_GEN_INST_EXT) 122 123 #define TCG_TARGET_HAS_extract_i64 (s390_facilities & FACILITY_GEN_INST_EXT) 123 124 #define TCG_TARGET_HAS_sextract_i64 0 125 + #define TCG_TARGET_HAS_extract2_i64 0 124 126 #define TCG_TARGET_HAS_movcond_i64 1 125 127 #define TCG_TARGET_HAS_add2_i64 1 126 128 #define TCG_TARGET_HAS_sub2_i64 1
+12 -8
tcg/s390/tcg-target.inc.c
··· 1609 1609 label->label_ptr[0] = label_ptr; 1610 1610 } 1611 1611 1612 - static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1612 + static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1613 1613 { 1614 1614 TCGReg addr_reg = lb->addrlo_reg; 1615 1615 TCGReg data_reg = lb->datalo_reg; 1616 1616 TCGMemOpIdx oi = lb->oi; 1617 1617 TCGMemOp opc = get_memop(oi); 1618 1618 1619 - bool ok = patch_reloc(lb->label_ptr[0], R_390_PC16DBL, 1620 - (intptr_t)s->code_ptr, 2); 1621 - tcg_debug_assert(ok); 1619 + if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL, 1620 + (intptr_t)s->code_ptr, 2)) { 1621 + return false; 1622 + } 1622 1623 1623 1624 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); 1624 1625 if (TARGET_LONG_BITS == 64) { ··· 1630 1631 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); 1631 1632 1632 1633 tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); 1634 + return true; 1633 1635 } 1634 1636 1635 - static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1637 + static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1636 1638 { 1637 1639 TCGReg addr_reg = lb->addrlo_reg; 1638 1640 TCGReg data_reg = lb->datalo_reg; 1639 1641 TCGMemOpIdx oi = lb->oi; 1640 1642 TCGMemOp opc = get_memop(oi); 1641 1643 1642 - bool ok = patch_reloc(lb->label_ptr[0], R_390_PC16DBL, 1643 - (intptr_t)s->code_ptr, 2); 1644 - tcg_debug_assert(ok); 1644 + if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL, 1645 + (intptr_t)s->code_ptr, 2)) { 1646 + return false; 1647 + } 1645 1648 1646 1649 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); 1647 1650 if (TARGET_LONG_BITS == 64) { ··· 1668 1671 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1669 1672 1670 1673 tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); 1674 + return true; 1671 1675 } 1672 1676 #else 1673 1677 static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
+2
tcg/sparc/tcg-target.h
··· 116 116 #define TCG_TARGET_HAS_deposit_i32 0 117 117 #define TCG_TARGET_HAS_extract_i32 0 118 118 #define TCG_TARGET_HAS_sextract_i32 0 119 + #define TCG_TARGET_HAS_extract2_i32 0 119 120 #define TCG_TARGET_HAS_movcond_i32 1 120 121 #define TCG_TARGET_HAS_add2_i32 1 121 122 #define TCG_TARGET_HAS_sub2_i32 1 ··· 153 154 #define TCG_TARGET_HAS_deposit_i64 0 154 155 #define TCG_TARGET_HAS_extract_i64 0 155 156 #define TCG_TARGET_HAS_sextract_i64 0 157 + #define TCG_TARGET_HAS_extract2_i64 0 156 158 #define TCG_TARGET_HAS_movcond_i64 1 157 159 #define TCG_TARGET_HAS_add2_i64 1 158 160 #define TCG_TARGET_HAS_sub2_i64 1
+9 -9
tcg/tcg-ldst.inc.c
··· 38 38 * Generate TB finalization at the end of block 39 39 */ 40 40 41 - static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 42 - static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 41 + static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 42 + static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 43 43 44 - static bool tcg_out_ldst_finalize(TCGContext *s) 44 + static int tcg_out_ldst_finalize(TCGContext *s) 45 45 { 46 46 TCGLabelQemuLdst *lb; 47 47 48 48 /* qemu_ld/st slow paths */ 49 49 QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { 50 - if (lb->is_ld) { 51 - tcg_out_qemu_ld_slow_path(s, lb); 52 - } else { 53 - tcg_out_qemu_st_slow_path(s, lb); 50 + if (lb->is_ld 51 + ? !tcg_out_qemu_ld_slow_path(s, lb) 52 + : !tcg_out_qemu_st_slow_path(s, lb)) { 53 + return -2; 54 54 } 55 55 56 56 /* Test for (pending) buffer overflow. The assumption is that any ··· 58 58 the buffer completely. Thus we can test for overflow after 59 59 generating code without having to check during generation. */ 60 60 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 61 - return false; 61 + return -1; 62 62 } 63 63 } 64 - return true; 64 + return 0; 65 65 } 66 66 67 67 /*
+100 -25
tcg/tcg-op.c
··· 611 611 return; 612 612 } 613 613 614 - mask = (1u << len) - 1; 615 614 t1 = tcg_temp_new_i32(); 616 615 616 + if (TCG_TARGET_HAS_extract2_i32) { 617 + if (ofs + len == 32) { 618 + tcg_gen_shli_i32(t1, arg1, len); 619 + tcg_gen_extract2_i32(ret, t1, arg2, len); 620 + goto done; 621 + } 622 + if (ofs == 0) { 623 + tcg_gen_extract2_i32(ret, arg1, arg2, len); 624 + tcg_gen_rotli_i32(ret, ret, len); 625 + goto done; 626 + } 627 + } 628 + 629 + mask = (1u << len) - 1; 617 630 if (ofs + len < 32) { 618 631 tcg_gen_andi_i32(t1, arg2, mask); 619 632 tcg_gen_shli_i32(t1, t1, ofs); ··· 622 635 } 623 636 tcg_gen_andi_i32(ret, arg1, ~(mask << ofs)); 624 637 tcg_gen_or_i32(ret, ret, t1); 625 - 638 + done: 626 639 tcg_temp_free_i32(t1); 627 640 } 628 641 ··· 807 820 808 821 tcg_gen_shli_i32(ret, arg, 32 - len - ofs); 809 822 tcg_gen_sari_i32(ret, ret, 32 - len); 823 + } 824 + 825 + /* 826 + * Extract 32-bits from a 64-bit input, ah:al, starting from ofs. 827 + * Unlike tcg_gen_extract_i32 above, len is fixed at 32. 828 + */ 829 + void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah, 830 + unsigned int ofs) 831 + { 832 + tcg_debug_assert(ofs <= 32); 833 + if (ofs == 0) { 834 + tcg_gen_mov_i32(ret, al); 835 + } else if (ofs == 32) { 836 + tcg_gen_mov_i32(ret, ah); 837 + } else if (al == ah) { 838 + tcg_gen_rotri_i32(ret, al, ofs); 839 + } else if (TCG_TARGET_HAS_extract2_i32) { 840 + tcg_gen_op4i_i32(INDEX_op_extract2_i32, ret, al, ah, ofs); 841 + } else { 842 + TCGv_i32 t0 = tcg_temp_new_i32(); 843 + tcg_gen_shri_i32(t0, al, ofs); 844 + tcg_gen_deposit_i32(ret, t0, ah, 32 - ofs, ofs); 845 + tcg_temp_free_i32(t0); 846 + } 810 847 } 811 848 812 849 void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1, ··· 1331 1368 tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c); 1332 1369 tcg_gen_movi_i32(TCGV_LOW(ret), 0); 1333 1370 } 1371 + } else if (right) { 1372 + if (TCG_TARGET_HAS_extract2_i32) { 1373 + tcg_gen_extract2_i32(TCGV_LOW(ret), 1374 + TCGV_LOW(arg1), TCGV_HIGH(arg1), c); 1375 + } else { 1376 + tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c); 1377 + tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(ret), 1378 + TCGV_HIGH(arg1), 32 - c, c); 1379 + } 1380 + if (arith) { 1381 + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); 1382 + } else { 1383 + tcg_gen_shri_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); 1384 + } 1334 1385 } else { 1335 - TCGv_i32 t0, t1; 1336 - 1337 - t0 = tcg_temp_new_i32(); 1338 - t1 = tcg_temp_new_i32(); 1339 - if (right) { 1340 - tcg_gen_shli_i32(t0, TCGV_HIGH(arg1), 32 - c); 1341 - if (arith) { 1342 - tcg_gen_sari_i32(t1, TCGV_HIGH(arg1), c); 1343 - } else { 1344 - tcg_gen_shri_i32(t1, TCGV_HIGH(arg1), c); 1345 - } 1346 - tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c); 1347 - tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t0); 1348 - tcg_gen_mov_i32(TCGV_HIGH(ret), t1); 1386 + if (TCG_TARGET_HAS_extract2_i32) { 1387 + tcg_gen_extract2_i32(TCGV_HIGH(ret), 1388 + TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c); 1349 1389 } else { 1390 + TCGv_i32 t0 = tcg_temp_new_i32(); 1350 1391 tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c); 1351 - /* Note: ret can be the same as arg1, so we use t1 */ 1352 - tcg_gen_shli_i32(t1, TCGV_LOW(arg1), c); 1353 - tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); 1354 - tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t0); 1355 - tcg_gen_mov_i32(TCGV_LOW(ret), t1); 1392 + tcg_gen_deposit_i32(TCGV_HIGH(ret), t0, 1393 + TCGV_HIGH(arg1), c, 32 - c); 1394 + tcg_temp_free_i32(t0); 1356 1395 } 1357 - tcg_temp_free_i32(t0); 1358 - tcg_temp_free_i32(t1); 1396 + tcg_gen_shli_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c); 1359 1397 } 1360 1398 } 1361 1399 ··· 1999 2037 } 2000 2038 } 2001 2039 2002 - mask = (1ull << len) - 1; 2003 2040 t1 = tcg_temp_new_i64(); 2004 2041 2042 + if (TCG_TARGET_HAS_extract2_i64) { 2043 + if (ofs + len == 64) { 2044 + tcg_gen_shli_i64(t1, arg1, len); 2045 + tcg_gen_extract2_i64(ret, t1, arg2, len); 2046 + goto done; 2047 + } 2048 + if (ofs == 0) { 2049 + tcg_gen_extract2_i64(ret, arg1, arg2, len); 2050 + tcg_gen_rotli_i64(ret, ret, len); 2051 + goto done; 2052 + } 2053 + } 2054 + 2055 + mask = (1ull << len) - 1; 2005 2056 if (ofs + len < 64) { 2006 2057 tcg_gen_andi_i64(t1, arg2, mask); 2007 2058 tcg_gen_shli_i64(t1, t1, ofs); ··· 2010 2061 } 2011 2062 tcg_gen_andi_i64(ret, arg1, ~(mask << ofs)); 2012 2063 tcg_gen_or_i64(ret, ret, t1); 2013 - 2064 + done: 2014 2065 tcg_temp_free_i64(t1); 2015 2066 } 2016 2067 ··· 2295 2346 } 2296 2347 tcg_gen_shli_i64(ret, arg, 64 - len - ofs); 2297 2348 tcg_gen_sari_i64(ret, ret, 64 - len); 2349 + } 2350 + 2351 + /* 2352 + * Extract 64 bits from a 128-bit input, ah:al, starting from ofs. 2353 + * Unlike tcg_gen_extract_i64 above, len is fixed at 64. 2354 + */ 2355 + void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah, 2356 + unsigned int ofs) 2357 + { 2358 + tcg_debug_assert(ofs <= 64); 2359 + if (ofs == 0) { 2360 + tcg_gen_mov_i64(ret, al); 2361 + } else if (ofs == 64) { 2362 + tcg_gen_mov_i64(ret, ah); 2363 + } else if (al == ah) { 2364 + tcg_gen_rotri_i64(ret, al, ofs); 2365 + } else if (TCG_TARGET_HAS_extract2_i64) { 2366 + tcg_gen_op4i_i64(INDEX_op_extract2_i64, ret, al, ah, ofs); 2367 + } else { 2368 + TCGv_i64 t0 = tcg_temp_new_i64(); 2369 + tcg_gen_shri_i64(t0, al, ofs); 2370 + tcg_gen_deposit_i64(ret, t0, ah, 64 - ofs, ofs); 2371 + tcg_temp_free_i64(t0); 2372 + } 2298 2373 } 2299 2374 2300 2375 void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
+6
tcg/tcg-op.h
··· 308 308 unsigned int ofs, unsigned int len); 309 309 void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg, 310 310 unsigned int ofs, unsigned int len); 311 + void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah, 312 + unsigned int ofs); 311 313 void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *); 312 314 void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *); 313 315 void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret, ··· 501 503 unsigned int ofs, unsigned int len); 502 504 void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg, 503 505 unsigned int ofs, unsigned int len); 506 + void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah, 507 + unsigned int ofs); 504 508 void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *); 505 509 void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *); 506 510 void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret, ··· 1068 1072 #define tcg_gen_deposit_z_tl tcg_gen_deposit_z_i64 1069 1073 #define tcg_gen_extract_tl tcg_gen_extract_i64 1070 1074 #define tcg_gen_sextract_tl tcg_gen_sextract_i64 1075 + #define tcg_gen_extract2_tl tcg_gen_extract2_i64 1071 1076 #define tcg_const_tl tcg_const_i64 1072 1077 #define tcg_const_local_tl tcg_const_local_i64 1073 1078 #define tcg_gen_movcond_tl tcg_gen_movcond_i64 ··· 1178 1183 #define tcg_gen_deposit_z_tl tcg_gen_deposit_z_i32 1179 1184 #define tcg_gen_extract_tl tcg_gen_extract_i32 1180 1185 #define tcg_gen_sextract_tl tcg_gen_sextract_i32 1186 + #define tcg_gen_extract2_tl tcg_gen_extract2_i32 1181 1187 #define tcg_const_tl tcg_const_i32 1182 1188 #define tcg_const_local_tl tcg_const_local_i32 1183 1189 #define tcg_gen_movcond_tl tcg_gen_movcond_i32
+2
tcg/tcg-opc.h
··· 79 79 DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32)) 80 80 DEF(extract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_extract_i32)) 81 81 DEF(sextract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_sextract_i32)) 82 + DEF(extract2_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_extract2_i32)) 82 83 83 84 DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END) 84 85 ··· 146 147 DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64)) 147 148 DEF(extract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_extract_i64)) 148 149 DEF(sextract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_sextract_i64)) 150 + DEF(extract2_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_extract2_i64)) 149 151 150 152 /* size changing ops */ 151 153 DEF(ext_i32_i64, 1, 1, 0, IMPL64)
+7 -5
tcg/tcg-pool.inc.c
··· 121 121 /* To be provided by cpu/tcg-target.inc.c. */ 122 122 static void tcg_out_nop_fill(tcg_insn_unit *p, int count); 123 123 124 - static bool tcg_out_pool_finalize(TCGContext *s) 124 + static int tcg_out_pool_finalize(TCGContext *s) 125 125 { 126 126 TCGLabelPoolData *p = s->pool_labels; 127 127 TCGLabelPoolData *l = NULL; 128 128 void *a; 129 129 130 130 if (p == NULL) { 131 - return true; 131 + return 0; 132 132 } 133 133 134 134 /* ??? Round up to qemu_icache_linesize, but then do not round ··· 142 142 size_t size = sizeof(tcg_target_ulong) * p->nlong; 143 143 if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { 144 144 if (unlikely(a > s->code_gen_highwater)) { 145 - return false; 145 + return -1; 146 146 } 147 147 memcpy(a, p->data, size); 148 148 a += size; 149 149 l = p; 150 150 } 151 - patch_reloc(p->label, p->rtype, (intptr_t)a - size, p->addend); 151 + if (!patch_reloc(p->label, p->rtype, (intptr_t)a - size, p->addend)) { 152 + return -2; 153 + } 152 154 } 153 155 154 156 s->code_ptr = a; 155 - return true; 157 + return 0; 156 158 }
+46 -39
tcg/tcg.c
··· 128 128 static int tcg_target_const_match(tcg_target_long val, TCGType type, 129 129 const TCGArgConstraint *arg_ct); 130 130 #ifdef TCG_TARGET_NEED_LDST_LABELS 131 - static bool tcg_out_ldst_finalize(TCGContext *s); 131 + static int tcg_out_ldst_finalize(TCGContext *s); 132 132 #endif 133 133 134 134 #define TCG_HIGHWATER 1024 ··· 263 263 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 264 264 TCGLabel *l, intptr_t addend) 265 265 { 266 - TCGRelocation *r; 266 + TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 267 267 268 - if (l->has_value) { 269 - /* FIXME: This may break relocations on RISC targets that 270 - modify instruction fields in place. The caller may not have 271 - written the initial value. */ 272 - bool ok = patch_reloc(code_ptr, type, l->u.value, addend); 273 - tcg_debug_assert(ok); 274 - } else { 275 - /* add a new relocation entry */ 276 - r = tcg_malloc(sizeof(TCGRelocation)); 277 - r->type = type; 278 - r->ptr = code_ptr; 279 - r->addend = addend; 280 - r->next = l->u.first_reloc; 281 - l->u.first_reloc = r; 282 - } 268 + r->type = type; 269 + r->ptr = code_ptr; 270 + r->addend = addend; 271 + QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 283 272 } 284 273 285 274 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) 286 275 { 287 - intptr_t value = (intptr_t)ptr; 288 - TCGRelocation *r; 289 - 290 276 tcg_debug_assert(!l->has_value); 291 - 292 - for (r = l->u.first_reloc; r != NULL; r = r->next) { 293 - bool ok = patch_reloc(r->ptr, r->type, value, r->addend); 294 - tcg_debug_assert(ok); 295 - } 296 - 297 277 l->has_value = 1; 298 278 l->u.value_ptr = ptr; 299 279 } ··· 303 283 TCGContext *s = tcg_ctx; 304 284 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 305 285 306 - *l = (TCGLabel){ 307 - .id = s->nb_labels++ 308 - }; 309 - #ifdef CONFIG_DEBUG_TCG 286 + memset(l, 0, sizeof(TCGLabel)); 287 + l->id = s->nb_labels++; 288 + QSIMPLEQ_INIT(&l->relocs); 289 + 310 290 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 311 - #endif 312 291 313 292 return l; 293 + } 294 + 295 + static bool tcg_resolve_relocs(TCGContext *s) 296 + { 297 + TCGLabel *l; 298 + 299 + QSIMPLEQ_FOREACH(l, &s->labels, next) { 300 + TCGRelocation *r; 301 + uintptr_t value = l->u.value; 302 + 303 + QSIMPLEQ_FOREACH(r, &l->relocs, next) { 304 + if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 305 + return false; 306 + } 307 + } 308 + } 309 + return true; 314 310 } 315 311 316 312 static void set_jmp_reset_offset(TCGContext *s, int which) ··· 1023 1019 #ifdef TCG_TARGET_NEED_POOL_LABELS 1024 1020 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1025 1021 { 1026 - bool ok = tcg_out_pool_finalize(s); 1027 - tcg_debug_assert(ok); 1022 + int result = tcg_out_pool_finalize(s); 1023 + tcg_debug_assert(result == 0); 1028 1024 } 1029 1025 #endif 1030 1026 ··· 1096 1092 1097 1093 QTAILQ_INIT(&s->ops); 1098 1094 QTAILQ_INIT(&s->free_ops); 1099 - #ifdef CONFIG_DEBUG_TCG 1100 1095 QSIMPLEQ_INIT(&s->labels); 1101 - #endif 1102 1096 } 1103 1097 1104 1098 static inline TCGTemp *tcg_temp_alloc(TCGContext *s) ··· 1426 1420 return TCG_TARGET_HAS_extract_i32; 1427 1421 case INDEX_op_sextract_i32: 1428 1422 return TCG_TARGET_HAS_sextract_i32; 1423 + case INDEX_op_extract2_i32: 1424 + return TCG_TARGET_HAS_extract2_i32; 1429 1425 case INDEX_op_add2_i32: 1430 1426 return TCG_TARGET_HAS_add2_i32; 1431 1427 case INDEX_op_sub2_i32: ··· 1523 1519 return TCG_TARGET_HAS_extract_i64; 1524 1520 case INDEX_op_sextract_i64: 1525 1521 return TCG_TARGET_HAS_sextract_i64; 1522 + case INDEX_op_extract2_i64: 1523 + return TCG_TARGET_HAS_extract2_i64; 1526 1524 case INDEX_op_extrl_i64_i32: 1527 1525 return TCG_TARGET_HAS_extrl_i64_i32; 1528 1526 case INDEX_op_extrh_i64_i32: ··· 3992 3990 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 3993 3991 return -1; 3994 3992 } 3993 + /* Test for TB overflow, as seen by gen_insn_end_off. */ 3994 + if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 3995 + return -2; 3996 + } 3995 3997 } 3996 3998 tcg_debug_assert(num_insns >= 0); 3997 3999 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 3998 4000 3999 4001 /* Generate TB finalization at the end of block */ 4000 4002 #ifdef TCG_TARGET_NEED_LDST_LABELS 4001 - if (!tcg_out_ldst_finalize(s)) { 4002 - return -1; 4003 + i = tcg_out_ldst_finalize(s); 4004 + if (i < 0) { 4005 + return i; 4003 4006 } 4004 4007 #endif 4005 4008 #ifdef TCG_TARGET_NEED_POOL_LABELS 4006 - if (!tcg_out_pool_finalize(s)) { 4007 - return -1; 4009 + i = tcg_out_pool_finalize(s); 4010 + if (i < 0) { 4011 + return i; 4008 4012 } 4009 4013 #endif 4014 + if (!tcg_resolve_relocs(s)) { 4015 + return -2; 4016 + } 4010 4017 4011 4018 /* flush instruction cache */ 4012 4019 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
+8 -8
tcg/tcg.h
··· 125 125 #define TCG_TARGET_HAS_deposit_i64 0 126 126 #define TCG_TARGET_HAS_extract_i64 0 127 127 #define TCG_TARGET_HAS_sextract_i64 0 128 + #define TCG_TARGET_HAS_extract2_i64 0 128 129 #define TCG_TARGET_HAS_movcond_i64 0 129 130 #define TCG_TARGET_HAS_add2_i64 0 130 131 #define TCG_TARGET_HAS_sub2_i64 0 ··· 237 238 do { if (!(X)) { __builtin_unreachable(); } } while (0) 238 239 #endif 239 240 240 - typedef struct TCGRelocation { 241 - struct TCGRelocation *next; 242 - int type; 241 + typedef struct TCGRelocation TCGRelocation; 242 + struct TCGRelocation { 243 + QSIMPLEQ_ENTRY(TCGRelocation) next; 243 244 tcg_insn_unit *ptr; 244 245 intptr_t addend; 245 - } TCGRelocation; 246 + int type; 247 + }; 246 248 247 249 typedef struct TCGLabel TCGLabel; 248 250 struct TCGLabel { ··· 253 255 union { 254 256 uintptr_t value; 255 257 tcg_insn_unit *value_ptr; 256 - TCGRelocation *first_reloc; 257 258 } u; 258 - #ifdef CONFIG_DEBUG_TCG 259 + QSIMPLEQ_HEAD(, TCGRelocation) relocs; 259 260 QSIMPLEQ_ENTRY(TCGLabel) next; 260 - #endif 261 261 }; 262 262 263 263 typedef struct TCGPool { ··· 690 690 #endif 691 691 692 692 #ifdef CONFIG_DEBUG_TCG 693 - QSIMPLEQ_HEAD(, TCGLabel) labels; 694 693 int temps_in_use; 695 694 int goto_tb_issue_mask; 696 695 #endif ··· 728 727 TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ 729 728 730 729 QTAILQ_HEAD(, TCGOp) ops, free_ops; 730 + QSIMPLEQ_HEAD(, TCGLabel) labels; 731 731 732 732 /* Tells which temporary holds a given register. 733 733 It does not take into account fixed registers */
+2
tcg/tci/tcg-target.h
··· 71 71 #define TCG_TARGET_HAS_deposit_i32 1 72 72 #define TCG_TARGET_HAS_extract_i32 0 73 73 #define TCG_TARGET_HAS_sextract_i32 0 74 + #define TCG_TARGET_HAS_extract2_i32 0 74 75 #define TCG_TARGET_HAS_eqv_i32 0 75 76 #define TCG_TARGET_HAS_nand_i32 0 76 77 #define TCG_TARGET_HAS_nor_i32 0 ··· 97 98 #define TCG_TARGET_HAS_deposit_i64 1 98 99 #define TCG_TARGET_HAS_extract_i64 0 99 100 #define TCG_TARGET_HAS_sextract_i64 0 101 + #define TCG_TARGET_HAS_extract2_i64 0 100 102 #define TCG_TARGET_HAS_div_i64 0 101 103 #define TCG_TARGET_HAS_rem_i64 0 102 104 #define TCG_TARGET_HAS_ext8s_i64 1