qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

tcg: Add opcode for ctpop

The number of actual invocations of ctpop itself does not warrent
an opcode, but it is very helpful for POWER7 to use in generating
an expansion for ctz.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>

+79
+10
tcg-runtime.c
··· 131 131 return clrsb64(arg); 132 132 } 133 133 134 + uint32_t HELPER(ctpop_i32)(uint32_t arg) 135 + { 136 + return ctpop32(arg); 137 + } 138 + 139 + uint64_t HELPER(ctpop_i64)(uint64_t arg) 140 + { 141 + return ctpop64(arg); 142 + } 143 + 134 144 void HELPER(exit_atomic)(CPUArchState *env) 135 145 { 136 146 cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
+2
tcg/aarch64/tcg-target.h
··· 64 64 #define TCG_TARGET_HAS_nor_i32 0 65 65 #define TCG_TARGET_HAS_clz_i32 1 66 66 #define TCG_TARGET_HAS_ctz_i32 1 67 + #define TCG_TARGET_HAS_ctpop_i32 0 67 68 #define TCG_TARGET_HAS_deposit_i32 1 68 69 #define TCG_TARGET_HAS_extract_i32 1 69 70 #define TCG_TARGET_HAS_sextract_i32 1 ··· 98 99 #define TCG_TARGET_HAS_nor_i64 0 99 100 #define TCG_TARGET_HAS_clz_i64 1 100 101 #define TCG_TARGET_HAS_ctz_i64 1 102 + #define TCG_TARGET_HAS_ctpop_i64 0 101 103 #define TCG_TARGET_HAS_deposit_i64 1 102 104 #define TCG_TARGET_HAS_extract_i64 1 103 105 #define TCG_TARGET_HAS_sextract_i64 1
+1
tcg/arm/tcg-target.h
··· 112 112 #define TCG_TARGET_HAS_nor_i32 0 113 113 #define TCG_TARGET_HAS_clz_i32 use_armv5t_instructions 114 114 #define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions 115 + #define TCG_TARGET_HAS_ctpop_i32 0 115 116 #define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions 116 117 #define TCG_TARGET_HAS_extract_i32 use_armv7_instructions 117 118 #define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions
+2
tcg/i386/tcg-target.h
··· 95 95 #define TCG_TARGET_HAS_nor_i32 0 96 96 #define TCG_TARGET_HAS_clz_i32 1 97 97 #define TCG_TARGET_HAS_ctz_i32 1 98 + #define TCG_TARGET_HAS_ctpop_i32 0 98 99 #define TCG_TARGET_HAS_deposit_i32 1 99 100 #define TCG_TARGET_HAS_extract_i32 1 100 101 #define TCG_TARGET_HAS_sextract_i32 1 ··· 129 130 #define TCG_TARGET_HAS_nor_i64 0 130 131 #define TCG_TARGET_HAS_clz_i64 1 131 132 #define TCG_TARGET_HAS_ctz_i64 1 133 + #define TCG_TARGET_HAS_ctpop_i64 0 132 134 #define TCG_TARGET_HAS_deposit_i64 1 133 135 #define TCG_TARGET_HAS_extract_i64 1 134 136 #define TCG_TARGET_HAS_sextract_i64 0
+2
tcg/ia64/tcg-target.h
··· 144 144 #define TCG_TARGET_HAS_clz_i64 0 145 145 #define TCG_TARGET_HAS_ctz_i32 0 146 146 #define TCG_TARGET_HAS_ctz_i64 0 147 + #define TCG_TARGET_HAS_ctpop_i32 0 148 + #define TCG_TARGET_HAS_ctpop_i64 0 147 149 #define TCG_TARGET_HAS_nor_i64 1 148 150 #define TCG_TARGET_HAS_orc_i32 1 149 151 #define TCG_TARGET_HAS_orc_i64 1
+2
tcg/mips/tcg-target.h
··· 165 165 #define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions 166 166 #define TCG_TARGET_HAS_clz_i32 use_mips32r2_instructions 167 167 #define TCG_TARGET_HAS_ctz_i32 0 168 + #define TCG_TARGET_HAS_ctpop_i32 0 168 169 169 170 #if TCG_TARGET_REG_BITS == 64 170 171 #define TCG_TARGET_HAS_movcond_i64 use_movnz_instructions ··· 179 180 #define TCG_TARGET_HAS_rot_i64 use_mips32r2_instructions 180 181 #define TCG_TARGET_HAS_clz_i64 use_mips32r2_instructions 181 182 #define TCG_TARGET_HAS_ctz_i64 0 183 + #define TCG_TARGET_HAS_ctpop_i64 0 182 184 #endif 183 185 184 186 /* optional instructions automatically implemented */
+14
tcg/optimize.c
··· 308 308 case INDEX_op_ctz_i64: 309 309 return x ? ctz64(x) : y; 310 310 311 + case INDEX_op_ctpop_i32: 312 + return ctpop32(x); 313 + 314 + case INDEX_op_ctpop_i64: 315 + return ctpop64(x); 316 + 311 317 CASE_OP_32_64(ext8s): 312 318 return (int8_t)x; 313 319 ··· 918 924 mask = temps[args[2]].mask | 63; 919 925 break; 920 926 927 + case INDEX_op_ctpop_i32: 928 + mask = 32 | 31; 929 + break; 930 + case INDEX_op_ctpop_i64: 931 + mask = 64 | 63; 932 + break; 933 + 921 934 CASE_OP_32_64(setcond): 922 935 case INDEX_op_setcond2_i32: 923 936 mask = 1; ··· 1031 1044 CASE_OP_32_64(ext8u): 1032 1045 CASE_OP_32_64(ext16s): 1033 1046 CASE_OP_32_64(ext16u): 1047 + CASE_OP_32_64(ctpop): 1034 1048 case INDEX_op_ext32s_i64: 1035 1049 case INDEX_op_ext32u_i64: 1036 1050 case INDEX_op_ext_i32_i64:
+2
tcg/ppc/tcg-target.h
··· 72 72 #define TCG_TARGET_HAS_nor_i32 1 73 73 #define TCG_TARGET_HAS_clz_i32 1 74 74 #define TCG_TARGET_HAS_ctz_i32 have_isa_3_00 75 + #define TCG_TARGET_HAS_ctpop_i32 0 75 76 #define TCG_TARGET_HAS_deposit_i32 1 76 77 #define TCG_TARGET_HAS_extract_i32 1 77 78 #define TCG_TARGET_HAS_sextract_i32 0 ··· 107 108 #define TCG_TARGET_HAS_nor_i64 1 108 109 #define TCG_TARGET_HAS_clz_i64 1 109 110 #define TCG_TARGET_HAS_ctz_i64 have_isa_3_00 111 + #define TCG_TARGET_HAS_ctpop_i64 0 110 112 #define TCG_TARGET_HAS_deposit_i64 1 111 113 #define TCG_TARGET_HAS_extract_i64 1 112 114 #define TCG_TARGET_HAS_sextract_i64 0
+2
tcg/s390/tcg-target.h
··· 79 79 #define TCG_TARGET_HAS_nor_i32 0 80 80 #define TCG_TARGET_HAS_clz_i32 0 81 81 #define TCG_TARGET_HAS_ctz_i32 0 82 + #define TCG_TARGET_HAS_ctpop_i32 0 82 83 #define TCG_TARGET_HAS_deposit_i32 (s390_facilities & FACILITY_GEN_INST_EXT) 83 84 #define TCG_TARGET_HAS_extract_i32 (s390_facilities & FACILITY_GEN_INST_EXT) 84 85 #define TCG_TARGET_HAS_sextract_i32 0 ··· 112 113 #define TCG_TARGET_HAS_nor_i64 0 113 114 #define TCG_TARGET_HAS_clz_i64 (s390_facilities & FACILITY_EXT_IMM) 114 115 #define TCG_TARGET_HAS_ctz_i64 0 116 + #define TCG_TARGET_HAS_ctpop_i64 0 115 117 #define TCG_TARGET_HAS_deposit_i64 (s390_facilities & FACILITY_GEN_INST_EXT) 116 118 #define TCG_TARGET_HAS_extract_i64 (s390_facilities & FACILITY_GEN_INST_EXT) 117 119 #define TCG_TARGET_HAS_sextract_i64 0
+2
tcg/sparc/tcg-target.h
··· 112 112 #define TCG_TARGET_HAS_nor_i32 0 113 113 #define TCG_TARGET_HAS_clz_i32 0 114 114 #define TCG_TARGET_HAS_ctz_i32 0 115 + #define TCG_TARGET_HAS_ctpop_i32 0 115 116 #define TCG_TARGET_HAS_deposit_i32 0 116 117 #define TCG_TARGET_HAS_extract_i32 0 117 118 #define TCG_TARGET_HAS_sextract_i32 0 ··· 146 147 #define TCG_TARGET_HAS_nor_i64 0 147 148 #define TCG_TARGET_HAS_clz_i64 0 148 149 #define TCG_TARGET_HAS_ctz_i64 0 150 + #define TCG_TARGET_HAS_ctpop_i64 0 149 151 #define TCG_TARGET_HAS_deposit_i64 0 150 152 #define TCG_TARGET_HAS_extract_i64 0 151 153 #define TCG_TARGET_HAS_sextract_i64 0
+29
tcg/tcg-op.c
··· 550 550 } 551 551 } 552 552 553 + void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1) 554 + { 555 + if (TCG_TARGET_HAS_ctpop_i32) { 556 + tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1); 557 + } else if (TCG_TARGET_HAS_ctpop_i64) { 558 + TCGv_i64 t = tcg_temp_new_i64(); 559 + tcg_gen_extu_i32_i64(t, arg1); 560 + tcg_gen_ctpop_i64(t, t); 561 + tcg_gen_extrl_i64_i32(ret, t); 562 + tcg_temp_free_i64(t); 563 + } else { 564 + gen_helper_ctpop_i32(ret, arg1); 565 + } 566 + } 567 + 553 568 void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) 554 569 { 555 570 if (TCG_TARGET_HAS_rot_i32) { ··· 1871 1886 tcg_temp_free_i64(t); 1872 1887 } else { 1873 1888 gen_helper_clrsb_i64(ret, arg); 1889 + } 1890 + } 1891 + 1892 + void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1) 1893 + { 1894 + if (TCG_TARGET_HAS_ctpop_i64) { 1895 + tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1); 1896 + } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) { 1897 + tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1)); 1898 + tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); 1899 + tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret)); 1900 + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); 1901 + } else { 1902 + gen_helper_ctpop_i64(ret, arg1); 1874 1903 } 1875 1904 } 1876 1905
+4
tcg/tcg-op.h
··· 291 291 void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2); 292 292 void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2); 293 293 void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg); 294 + void tcg_gen_ctpop_i32(TCGv_i32 a1, TCGv_i32 a2); 294 295 void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); 295 296 void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2); 296 297 void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); ··· 479 480 void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2); 480 481 void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2); 481 482 void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg); 483 + void tcg_gen_ctpop_i64(TCGv_i64 a1, TCGv_i64 a2); 482 484 void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); 483 485 void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2); 484 486 void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); ··· 973 975 #define tcg_gen_clzi_tl tcg_gen_clzi_i64 974 976 #define tcg_gen_ctzi_tl tcg_gen_ctzi_i64 975 977 #define tcg_gen_clrsb_tl tcg_gen_clrsb_i64 978 + #define tcg_gen_ctpop_tl tcg_gen_ctpop_i64 976 979 #define tcg_gen_rotl_tl tcg_gen_rotl_i64 977 980 #define tcg_gen_rotli_tl tcg_gen_rotli_i64 978 981 #define tcg_gen_rotr_tl tcg_gen_rotr_i64 ··· 1069 1072 #define tcg_gen_clzi_tl tcg_gen_clzi_i32 1070 1073 #define tcg_gen_ctzi_tl tcg_gen_ctzi_i32 1071 1074 #define tcg_gen_clrsb_tl tcg_gen_clrsb_i32 1075 + #define tcg_gen_ctpop_tl tcg_gen_ctpop_i32 1072 1076 #define tcg_gen_rotl_tl tcg_gen_rotl_i32 1073 1077 #define tcg_gen_rotli_tl tcg_gen_rotli_i32 1074 1078 #define tcg_gen_rotr_tl tcg_gen_rotr_i32
+2
tcg/tcg-opc.h
··· 106 106 DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32)) 107 107 DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32)) 108 108 DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32)) 109 + DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32)) 109 110 110 111 DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) 111 112 DEF(movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) ··· 175 176 DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64)) 176 177 DEF(clz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_clz_i64)) 177 178 DEF(ctz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctz_i64)) 179 + DEF(ctpop_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctpop_i64)) 178 180 179 181 DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64)) 180 182 DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
+2
tcg/tcg-runtime.h
··· 21 21 DEF_HELPER_FLAGS_2(ctz_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) 22 22 DEF_HELPER_FLAGS_1(clrsb_i32, TCG_CALL_NO_RWG_SE, i32, i32) 23 23 DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64) 24 + DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32) 25 + DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64) 24 26 25 27 DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) 26 28
+1
tcg/tcg.h
··· 113 113 #define TCG_TARGET_HAS_nor_i64 0 114 114 #define TCG_TARGET_HAS_clz_i64 0 115 115 #define TCG_TARGET_HAS_ctz_i64 0 116 + #define TCG_TARGET_HAS_ctpop_i64 0 116 117 #define TCG_TARGET_HAS_deposit_i64 0 117 118 #define TCG_TARGET_HAS_extract_i64 0 118 119 #define TCG_TARGET_HAS_sextract_i64 0
+2
tcg/tci/tcg-target.h
··· 76 76 #define TCG_TARGET_HAS_nor_i32 0 77 77 #define TCG_TARGET_HAS_clz_i32 0 78 78 #define TCG_TARGET_HAS_ctz_i32 0 79 + #define TCG_TARGET_HAS_ctpop_i32 0 79 80 #define TCG_TARGET_HAS_neg_i32 1 80 81 #define TCG_TARGET_HAS_not_i32 1 81 82 #define TCG_TARGET_HAS_orc_i32 0 ··· 108 109 #define TCG_TARGET_HAS_nor_i64 0 109 110 #define TCG_TARGET_HAS_clz_i64 0 110 111 #define TCG_TARGET_HAS_ctz_i64 0 112 + #define TCG_TARGET_HAS_ctpop_i64 0 111 113 #define TCG_TARGET_HAS_neg_i64 1 112 114 #define TCG_TARGET_HAS_not_i64 1 113 115 #define TCG_TARGET_HAS_orc_i64 0