qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200616' into staging

* hw: arm: Set vendor property for IMX SDHCI emulations
* sd: sdhci: Implement basic vendor specific register support
* hw/net/imx_fec: Convert debug fprintf() to trace events
* target/arm/cpu: adjust virtual time for all KVM arm cpus
* Implement configurable descriptor size in ftgmac100
* hw/misc/imx6ul_ccm: Implement non writable bits in CCM registers
* target/arm: More Neon decodetree conversion work

# gpg: Signature made Tue 16 Jun 2020 10:56:10 BST
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20200616: (23 commits)
hw: arm: Set vendor property for IMX SDHCI emulations
sd: sdhci: Implement basic vendor specific register support
hw/net/imx_fec: Convert debug fprintf() to trace events
target/arm/cpu: adjust virtual time for all KVM arm cpus
Implement configurable descriptor size in ftgmac100
hw/misc/imx6ul_ccm: Implement non writable bits in CCM registers
target/arm: Convert Neon VDUP (scalar) to decodetree
target/arm: Convert Neon VTBL, VTBX to decodetree
target/arm: Convert Neon VEXT to decodetree
target/arm: Convert Neon 2-reg-scalar long multiplies to decodetree
target/arm: Convert Neon 2-reg-scalar VQRDMLAH, VQRDMLSH to decodetree
target/arm: Convert Neon 2-reg-scalar VQDMULH, VQRDMULH to decodetree
target/arm: Convert Neon 2-reg-scalar float multiplies to decodetree
target/arm: Convert Neon 2-reg-scalar integer multiplies to decodetree
target/arm: Add missing TCG temp free in do_2shift_env_64()
target/arm: Add 'static' and 'const' annotations to VSHLL function arrays
target/arm: Convert Neon 3-reg-diff polynomial VMULL
target/arm: Convert Neon 3-reg-diff saturating doubling multiplies
target/arm: Convert Neon 3-reg-diff long multiplies
target/arm: Convert Neon 3-reg-diff VABAL, VABDL to decodetree
...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

# Conflicts:
# hw/arm/fsl-imx25.c
# hw/arm/fsl-imx6.c
# hw/arm/fsl-imx6ul.c
# hw/arm/fsl-imx7.c

+1495 -766
+6
hw/arm/fsl-imx25.c
··· 263 263 &err); 264 264 object_property_set_uint(OBJECT(&s->esdhc[i]), IMX25_ESDHC_CAPABILITIES, 265 265 "capareg", &err); 266 + object_property_set_uint(OBJECT(&s->esdhc[i]), SDHCI_VENDOR_IMX, 267 + "vendor", &err); 268 + if (err) { 269 + error_propagate(errp, err); 270 + return; 271 + } 266 272 sysbus_realize(SYS_BUS_DEVICE(&s->esdhc[i]), &err); 267 273 if (err) { 268 274 error_propagate(errp, err);
+6
hw/arm/fsl-imx6.c
··· 339 339 &err); 340 340 object_property_set_uint(OBJECT(&s->esdhc[i]), IMX6_ESDHC_CAPABILITIES, 341 341 "capareg", &err); 342 + object_property_set_uint(OBJECT(&s->esdhc[i]), SDHCI_VENDOR_IMX, 343 + "vendor", &err); 344 + if (err) { 345 + error_propagate(errp, err); 346 + return; 347 + } 342 348 sysbus_realize(SYS_BUS_DEVICE(&s->esdhc[i]), &err); 343 349 if (err) { 344 350 error_propagate(errp, err);
+2
hw/arm/fsl-imx6ul.c
··· 479 479 FSL_IMX6UL_USDHC2_IRQ, 480 480 }; 481 481 482 + object_property_set_uint(OBJECT(&s->usdhc[i]), SDHCI_VENDOR_IMX, 483 + "vendor", &error_abort); 482 484 sysbus_realize(SYS_BUS_DEVICE(&s->usdhc[i]), &error_abort); 483 485 484 486 sysbus_mmio_map(SYS_BUS_DEVICE(&s->usdhc[i]), 0,
+2
hw/arm/fsl-imx7.c
··· 393 393 FSL_IMX7_USDHC3_IRQ, 394 394 }; 395 395 396 + object_property_set_uint(OBJECT(&s->usdhc[i]), SDHCI_VENDOR_IMX, 397 + "vendor", &error_abort); 396 398 sysbus_realize(SYS_BUS_DEVICE(&s->usdhc[i]), &error_abort); 397 399 398 400 sysbus_mmio_map(SYS_BUS_DEVICE(&s->usdhc[i]), 0,
+63 -13
hw/misc/imx6ul_ccm.c
··· 19 19 20 20 #include "trace.h" 21 21 22 + static const uint32_t ccm_mask[CCM_MAX] = { 23 + [CCM_CCR] = 0xf01fef80, 24 + [CCM_CCDR] = 0xfffeffff, 25 + [CCM_CSR] = 0xffffffff, 26 + [CCM_CCSR] = 0xfffffef2, 27 + [CCM_CACRR] = 0xfffffff8, 28 + [CCM_CBCDR] = 0xc1f8e000, 29 + [CCM_CBCMR] = 0xfc03cfff, 30 + [CCM_CSCMR1] = 0x80700000, 31 + [CCM_CSCMR2] = 0xe01ff003, 32 + [CCM_CSCDR1] = 0xfe00c780, 33 + [CCM_CS1CDR] = 0xfe00fe00, 34 + [CCM_CS2CDR] = 0xf8007000, 35 + [CCM_CDCDR] = 0xf00fffff, 36 + [CCM_CHSCCDR] = 0xfffc01ff, 37 + [CCM_CSCDR2] = 0xfe0001ff, 38 + [CCM_CSCDR3] = 0xffffc1ff, 39 + [CCM_CDHIPR] = 0xffffffff, 40 + [CCM_CTOR] = 0x00000000, 41 + [CCM_CLPCR] = 0xf39ff01c, 42 + [CCM_CISR] = 0xfb85ffbe, 43 + [CCM_CIMR] = 0xfb85ffbf, 44 + [CCM_CCOSR] = 0xfe00fe00, 45 + [CCM_CGPR] = 0xfffc3fea, 46 + [CCM_CCGR0] = 0x00000000, 47 + [CCM_CCGR1] = 0x00000000, 48 + [CCM_CCGR2] = 0x00000000, 49 + [CCM_CCGR3] = 0x00000000, 50 + [CCM_CCGR4] = 0x00000000, 51 + [CCM_CCGR5] = 0x00000000, 52 + [CCM_CCGR6] = 0x00000000, 53 + [CCM_CMEOR] = 0xafffff1f, 54 + }; 55 + 56 + static const uint32_t analog_mask[CCM_ANALOG_MAX] = { 57 + [CCM_ANALOG_PLL_ARM] = 0xfff60f80, 58 + [CCM_ANALOG_PLL_USB1] = 0xfffe0fbc, 59 + [CCM_ANALOG_PLL_USB2] = 0xfffe0fbc, 60 + [CCM_ANALOG_PLL_SYS] = 0xfffa0ffe, 61 + [CCM_ANALOG_PLL_SYS_SS] = 0x00000000, 62 + [CCM_ANALOG_PLL_SYS_NUM] = 0xc0000000, 63 + [CCM_ANALOG_PLL_SYS_DENOM] = 0xc0000000, 64 + [CCM_ANALOG_PLL_AUDIO] = 0xffe20f80, 65 + [CCM_ANALOG_PLL_AUDIO_NUM] = 0xc0000000, 66 + [CCM_ANALOG_PLL_AUDIO_DENOM] = 0xc0000000, 67 + [CCM_ANALOG_PLL_VIDEO] = 0xffe20f80, 68 + [CCM_ANALOG_PLL_VIDEO_NUM] = 0xc0000000, 69 + [CCM_ANALOG_PLL_VIDEO_DENOM] = 0xc0000000, 70 + [CCM_ANALOG_PLL_ENET] = 0xffc20ff0, 71 + [CCM_ANALOG_PFD_480] = 0x40404040, 72 + [CCM_ANALOG_PFD_528] = 0x40404040, 73 + [PMU_MISC0] = 0x01fe8306, 74 + [PMU_MISC1] = 0x07fcede0, 75 + [PMU_MISC2] = 0x005f5f5f, 76 + }; 77 + 22 78 static const char *imx6ul_ccm_reg_name(uint32_t reg) 23 79 { 24 80 static char unknown[20]; ··· 596 652 597 653 trace_ccm_write_reg(imx6ul_ccm_reg_name(index), (uint32_t)value); 598 654 599 - /* 600 - * We will do a better implementation later. In particular some bits 601 - * cannot be written to. 602 - */ 603 - s->ccm[index] = (uint32_t)value; 655 + s->ccm[index] = (s->ccm[index] & ccm_mask[index]) | 656 + ((uint32_t)value & ~ccm_mask[index]); 604 657 } 605 658 606 659 static uint64_t imx6ul_analog_read(void *opaque, hwaddr offset, unsigned size) ··· 737 790 * the REG_NAME register. So we change the value of the 738 791 * REG_NAME register, setting bits passed in the value. 739 792 */ 740 - s->analog[index - 1] |= value; 793 + s->analog[index - 1] |= (value & ~analog_mask[index - 1]); 741 794 break; 742 795 case CCM_ANALOG_PLL_ARM_CLR: 743 796 case CCM_ANALOG_PLL_USB1_CLR: ··· 762 815 * the REG_NAME register. So we change the value of the 763 816 * REG_NAME register, unsetting bits passed in the value. 764 817 */ 765 - s->analog[index - 2] &= ~value; 818 + s->analog[index - 2] &= ~(value & ~analog_mask[index - 2]); 766 819 break; 767 820 case CCM_ANALOG_PLL_ARM_TOG: 768 821 case CCM_ANALOG_PLL_USB1_TOG: ··· 787 840 * the REG_NAME register. So we change the value of the 788 841 * REG_NAME register, toggling bits passed in the value. 789 842 */ 790 - s->analog[index - 3] ^= value; 843 + s->analog[index - 3] ^= (value & ~analog_mask[index - 3]); 791 844 break; 792 845 default: 793 - /* 794 - * We will do a better implementation later. In particular some bits 795 - * cannot be written to. 796 - */ 797 - s->analog[index] = value; 846 + s->analog[index] = (s->analog[index] & analog_mask[index]) | 847 + (value & ~analog_mask[index]); 798 848 break; 799 849 } 800 850 }
+24 -2
hw/net/ftgmac100.c
··· 80 80 #define FTGMAC100_APTC_TXPOLL_TIME_SEL (1 << 12) 81 81 82 82 /* 83 + * DMA burst length and arbitration control register 84 + */ 85 + #define FTGMAC100_DBLAC_RXBURST_SIZE(x) (((x) >> 8) & 0x3) 86 + #define FTGMAC100_DBLAC_TXBURST_SIZE(x) (((x) >> 10) & 0x3) 87 + #define FTGMAC100_DBLAC_RXDES_SIZE(x) ((((x) >> 12) & 0xf) * 8) 88 + #define FTGMAC100_DBLAC_TXDES_SIZE(x) ((((x) >> 16) & 0xf) * 8) 89 + #define FTGMAC100_DBLAC_IFG_CNT(x) (((x) >> 20) & 0x7) 90 + #define FTGMAC100_DBLAC_IFG_INC (1 << 23) 91 + 92 + /* 83 93 * PHY control register 84 94 */ 85 95 #define FTGMAC100_PHYCR_MIIRD (1 << 26) ··· 553 563 if (bd.des0 & s->txdes0_edotr) { 554 564 addr = tx_ring; 555 565 } else { 556 - addr += sizeof(FTGMAC100Desc); 566 + addr += FTGMAC100_DBLAC_TXDES_SIZE(s->dblac); 557 567 } 558 568 } 559 569 ··· 800 810 s->phydata = value & 0xffff; 801 811 break; 802 812 case FTGMAC100_DBLAC: /* DMA Burst Length and Arbitration Control */ 813 + if (FTGMAC100_DBLAC_TXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) { 814 + qemu_log_mask(LOG_GUEST_ERROR, 815 + "%s: transmit descriptor too small : %d bytes\n", 816 + __func__, FTGMAC100_DBLAC_TXDES_SIZE(s->dblac)); 817 + break; 818 + } 819 + if (FTGMAC100_DBLAC_RXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) { 820 + qemu_log_mask(LOG_GUEST_ERROR, 821 + "%s: receive descriptor too small : %d bytes\n", 822 + __func__, FTGMAC100_DBLAC_RXDES_SIZE(s->dblac)); 823 + break; 824 + } 803 825 s->dblac = value; 804 826 break; 805 827 case FTGMAC100_REVR: /* Feature Register */ ··· 982 1004 if (bd.des0 & s->rxdes0_edorr) { 983 1005 addr = s->rx_ring; 984 1006 } else { 985 - addr += sizeof(FTGMAC100Desc); 1007 + addr += FTGMAC100_DBLAC_RXDES_SIZE(s->dblac); 986 1008 } 987 1009 } 988 1010 s->rx_descriptor = addr;
+45 -61
hw/net/imx_fec.c
··· 31 31 #include "qemu/module.h" 32 32 #include "net/checksum.h" 33 33 #include "net/eth.h" 34 + #include "trace.h" 34 35 35 36 /* For crc32 */ 36 37 #include <zlib.h> 37 - 38 - #ifndef DEBUG_IMX_FEC 39 - #define DEBUG_IMX_FEC 0 40 - #endif 41 - 42 - #define FEC_PRINTF(fmt, args...) \ 43 - do { \ 44 - if (DEBUG_IMX_FEC) { \ 45 - fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_FEC, \ 46 - __func__, ##args); \ 47 - } \ 48 - } while (0) 49 - 50 - #ifndef DEBUG_IMX_PHY 51 - #define DEBUG_IMX_PHY 0 52 - #endif 53 - 54 - #define PHY_PRINTF(fmt, args...) \ 55 - do { \ 56 - if (DEBUG_IMX_PHY) { \ 57 - fprintf(stderr, "[%s.phy]%s: " fmt , TYPE_IMX_FEC, \ 58 - __func__, ##args); \ 59 - } \ 60 - } while (0) 61 38 62 39 #define IMX_MAX_DESC 1024 63 40 ··· 262 239 * For now we don't handle any GPIO/interrupt line, so the OS will 263 240 * have to poll for the PHY status. 264 241 */ 265 - static void phy_update_irq(IMXFECState *s) 242 + static void imx_phy_update_irq(IMXFECState *s) 266 243 { 267 244 imx_eth_update(s); 268 245 } 269 246 270 - static void phy_update_link(IMXFECState *s) 247 + static void imx_phy_update_link(IMXFECState *s) 271 248 { 272 249 /* Autonegotiation status mirrors link status. */ 273 250 if (qemu_get_queue(s->nic)->link_down) { 274 - PHY_PRINTF("link is down\n"); 251 + trace_imx_phy_update_link("down"); 275 252 s->phy_status &= ~0x0024; 276 253 s->phy_int |= PHY_INT_DOWN; 277 254 } else { 278 - PHY_PRINTF("link is up\n"); 255 + trace_imx_phy_update_link("up"); 279 256 s->phy_status |= 0x0024; 280 257 s->phy_int |= PHY_INT_ENERGYON; 281 258 s->phy_int |= PHY_INT_AUTONEG_COMPLETE; 282 259 } 283 - phy_update_irq(s); 260 + imx_phy_update_irq(s); 284 261 } 285 262 286 263 static void imx_eth_set_link(NetClientState *nc) 287 264 { 288 - phy_update_link(IMX_FEC(qemu_get_nic_opaque(nc))); 265 + imx_phy_update_link(IMX_FEC(qemu_get_nic_opaque(nc))); 289 266 } 290 267 291 - static void phy_reset(IMXFECState *s) 268 + static void imx_phy_reset(IMXFECState *s) 292 269 { 270 + trace_imx_phy_reset(); 271 + 293 272 s->phy_status = 0x7809; 294 273 s->phy_control = 0x3000; 295 274 s->phy_advertise = 0x01e1; 296 275 s->phy_int_mask = 0; 297 276 s->phy_int = 0; 298 - phy_update_link(s); 277 + imx_phy_update_link(s); 299 278 } 300 279 301 - static uint32_t do_phy_read(IMXFECState *s, int reg) 280 + static uint32_t imx_phy_read(IMXFECState *s, int reg) 302 281 { 303 282 uint32_t val; 304 283 ··· 332 311 case 29: /* Interrupt source. */ 333 312 val = s->phy_int; 334 313 s->phy_int = 0; 335 - phy_update_irq(s); 314 + imx_phy_update_irq(s); 336 315 break; 337 316 case 30: /* Interrupt mask */ 338 317 val = s->phy_int_mask; ··· 352 331 break; 353 332 } 354 333 355 - PHY_PRINTF("read 0x%04x @ %d\n", val, reg); 334 + trace_imx_phy_read(val, reg); 356 335 357 336 return val; 358 337 } 359 338 360 - static void do_phy_write(IMXFECState *s, int reg, uint32_t val) 339 + static void imx_phy_write(IMXFECState *s, int reg, uint32_t val) 361 340 { 362 - PHY_PRINTF("write 0x%04x @ %d\n", val, reg); 341 + trace_imx_phy_write(val, reg); 363 342 364 343 if (reg > 31) { 365 344 /* we only advertise one phy */ ··· 369 348 switch (reg) { 370 349 case 0: /* Basic Control */ 371 350 if (val & 0x8000) { 372 - phy_reset(s); 351 + imx_phy_reset(s); 373 352 } else { 374 353 s->phy_control = val & 0x7980; 375 354 /* Complete autonegotiation immediately. */ ··· 383 362 break; 384 363 case 30: /* Interrupt mask */ 385 364 s->phy_int_mask = val & 0xff; 386 - phy_update_irq(s); 365 + imx_phy_update_irq(s); 387 366 break; 388 367 case 17: 389 368 case 18: ··· 402 381 static void imx_fec_read_bd(IMXFECBufDesc *bd, dma_addr_t addr) 403 382 { 404 383 dma_memory_read(&address_space_memory, addr, bd, sizeof(*bd)); 384 + 385 + trace_imx_fec_read_bd(addr, bd->flags, bd->length, bd->data); 405 386 } 406 387 407 388 static void imx_fec_write_bd(IMXFECBufDesc *bd, dma_addr_t addr) ··· 412 393 static void imx_enet_read_bd(IMXENETBufDesc *bd, dma_addr_t addr) 413 394 { 414 395 dma_memory_read(&address_space_memory, addr, bd, sizeof(*bd)); 396 + 397 + trace_imx_enet_read_bd(addr, bd->flags, bd->length, bd->data, 398 + bd->option, bd->status); 415 399 } 416 400 417 401 static void imx_enet_write_bd(IMXENETBufDesc *bd, dma_addr_t addr) ··· 471 455 int len; 472 456 473 457 imx_fec_read_bd(&bd, addr); 474 - FEC_PRINTF("tx_bd %x flags %04x len %d data %08x\n", 475 - addr, bd.flags, bd.length, bd.data); 476 458 if ((bd.flags & ENET_BD_R) == 0) { 459 + 477 460 /* Run out of descriptors to transmit. */ 478 - FEC_PRINTF("tx_bd ran out of descriptors to transmit\n"); 461 + trace_imx_eth_tx_bd_busy(); 462 + 479 463 break; 480 464 } 481 465 len = bd.length; ··· 552 536 int len; 553 537 554 538 imx_enet_read_bd(&bd, addr); 555 - FEC_PRINTF("tx_bd %x flags %04x len %d data %08x option %04x " 556 - "status %04x\n", addr, bd.flags, bd.length, bd.data, 557 - bd.option, bd.status); 558 539 if ((bd.flags & ENET_BD_R) == 0) { 559 540 /* Run out of descriptors to transmit. */ 541 + 542 + trace_imx_eth_tx_bd_busy(); 543 + 560 544 break; 561 545 } 562 546 len = bd.length; ··· 633 617 s->regs[ENET_RDAR] = (bd.flags & ENET_BD_E) ? ENET_RDAR_RDAR : 0; 634 618 635 619 if (!s->regs[ENET_RDAR]) { 636 - FEC_PRINTF("RX buffer full\n"); 620 + trace_imx_eth_rx_bd_full(); 637 621 } else if (flush) { 638 622 qemu_flush_queued_packets(qemu_get_queue(s->nic)); 639 623 } ··· 676 660 memset(s->tx_descriptor, 0, sizeof(s->tx_descriptor)); 677 661 678 662 /* We also reset the PHY */ 679 - phy_reset(s); 663 + imx_phy_reset(s); 680 664 } 681 665 682 666 static uint32_t imx_default_read(IMXFECState *s, uint32_t index) ··· 774 758 break; 775 759 } 776 760 777 - FEC_PRINTF("reg[%s] => 0x%" PRIx32 "\n", imx_eth_reg_name(s, index), 778 - value); 761 + trace_imx_eth_read(index, imx_eth_reg_name(s, index), value); 779 762 780 763 return value; 781 764 } ··· 884 867 const bool single_tx_ring = !imx_eth_is_multi_tx_ring(s); 885 868 uint32_t index = offset >> 2; 886 869 887 - FEC_PRINTF("reg[%s] <= 0x%" PRIx32 "\n", imx_eth_reg_name(s, index), 888 - (uint32_t)value); 870 + trace_imx_eth_write(index, imx_eth_reg_name(s, index), value); 889 871 890 872 switch (index) { 891 873 case ENET_EIR: ··· 940 922 if (extract32(value, 29, 1)) { 941 923 /* This is a read operation */ 942 924 s->regs[ENET_MMFR] = deposit32(s->regs[ENET_MMFR], 0, 16, 943 - do_phy_read(s, 925 + imx_phy_read(s, 944 926 extract32(value, 945 927 18, 10))); 946 928 } else { 947 929 /* This a write operation */ 948 - do_phy_write(s, extract32(value, 18, 10), extract32(value, 0, 16)); 930 + imx_phy_write(s, extract32(value, 18, 10), extract32(value, 0, 16)); 949 931 } 950 932 /* raise the interrupt as the PHY operation is done */ 951 933 s->regs[ENET_EIR] |= ENET_INT_MII; ··· 1053 1035 { 1054 1036 IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc)); 1055 1037 1056 - FEC_PRINTF("\n"); 1057 - 1058 1038 return !!s->regs[ENET_RDAR]; 1059 1039 } 1060 1040 ··· 1071 1051 unsigned int buf_len; 1072 1052 size_t size = len; 1073 1053 1074 - FEC_PRINTF("len %d\n", (int)size); 1054 + trace_imx_fec_receive(size); 1075 1055 1076 1056 if (!s->regs[ENET_RDAR]) { 1077 1057 qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n", ··· 1113 1093 bd.length = buf_len; 1114 1094 size -= buf_len; 1115 1095 1116 - FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length); 1096 + trace_imx_fec_receive_len(addr, bd.length); 1117 1097 1118 1098 /* The last 4 bytes are the CRC. */ 1119 1099 if (size < 4) { ··· 1131 1111 if (size == 0) { 1132 1112 /* Last buffer in frame. */ 1133 1113 bd.flags |= flags | ENET_BD_L; 1134 - FEC_PRINTF("rx frame flags %04x\n", bd.flags); 1114 + 1115 + trace_imx_fec_receive_last(bd.flags); 1116 + 1135 1117 s->regs[ENET_EIR] |= ENET_INT_RXF; 1136 1118 } else { 1137 1119 s->regs[ENET_EIR] |= ENET_INT_RXB; ··· 1164 1146 size_t size = len; 1165 1147 bool shift16 = s->regs[ENET_RACC] & ENET_RACC_SHIFT16; 1166 1148 1167 - FEC_PRINTF("len %d\n", (int)size); 1149 + trace_imx_enet_receive(size); 1168 1150 1169 1151 if (!s->regs[ENET_RDAR]) { 1170 1152 qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n", ··· 1210 1192 bd.length = buf_len; 1211 1193 size -= buf_len; 1212 1194 1213 - FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length); 1195 + trace_imx_enet_receive_len(addr, bd.length); 1214 1196 1215 1197 /* The last 4 bytes are the CRC. */ 1216 1198 if (size < 4) { ··· 1246 1228 if (size == 0) { 1247 1229 /* Last buffer in frame. */ 1248 1230 bd.flags |= flags | ENET_BD_L; 1249 - FEC_PRINTF("rx frame flags %04x\n", bd.flags); 1231 + 1232 + trace_imx_enet_receive_last(bd.flags); 1233 + 1250 1234 /* Indicate that we've updated the last buffer descriptor. */ 1251 1235 bd.last_buffer = ENET_BD_BDU; 1252 1236 if (bd.option & ENET_BD_RX_INT) {
+18
hw/net/trace-events
··· 408 408 i82596_new_mac(const char *id_with_mac) "New MAC for: %s" 409 409 i82596_set_multicast(uint16_t count) "Added %d multicast entries" 410 410 i82596_channel_attention(void *s) "%p: Received CHANNEL ATTENTION" 411 + 412 + # imx_fec.c 413 + imx_phy_read(uint32_t val, int reg) "0x%04"PRIx32" <= reg[%d]" 414 + imx_phy_write(uint32_t val, int reg) "0x%04"PRIx32" => reg[%d]" 415 + imx_phy_update_link(const char *s) "%s" 416 + imx_phy_reset(void) "" 417 + imx_fec_read_bd(uint64_t addr, int flags, int len, int data) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x" 418 + imx_enet_read_bd(uint64_t addr, int flags, int len, int data, int options, int status) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x option 0x%04x status 0x%04x" 419 + imx_eth_tx_bd_busy(void) "tx_bd ran out of descriptors to transmit" 420 + imx_eth_rx_bd_full(void) "RX buffer is full" 421 + imx_eth_read(int reg, const char *reg_name, uint32_t value) "reg[%d:%s] => 0x%08"PRIx32 422 + imx_eth_write(int reg, const char *reg_name, uint64_t value) "reg[%d:%s] <= 0x%08"PRIx64 423 + imx_fec_receive(size_t size) "len %zu" 424 + imx_fec_receive_len(uint64_t addr, int len) "rx_bd 0x%"PRIx64" length %d" 425 + imx_fec_receive_last(int last) "rx frame flags 0x%04x" 426 + imx_enet_receive(size_t size) "len %zu" 427 + imx_enet_receive_len(uint64_t addr, int len) "rx_bd 0x%"PRIx64" length %d" 428 + imx_enet_receive_last(int last) "rx frame flags 0x%04x"
+5
hw/sd/sdhci-internal.h
··· 75 75 #define SDHC_CMD_INHIBIT 0x00000001 76 76 #define SDHC_DATA_INHIBIT 0x00000002 77 77 #define SDHC_DAT_LINE_ACTIVE 0x00000004 78 + #define SDHC_IMX_CLOCK_GATE_OFF 0x00000080 78 79 #define SDHC_DOING_WRITE 0x00000100 79 80 #define SDHC_DOING_READ 0x00000200 80 81 #define SDHC_SPACE_AVAILABLE 0x00000400 ··· 289 290 290 291 291 292 #define ESDHC_MIX_CTRL 0x48 293 + 292 294 #define ESDHC_VENDOR_SPEC 0xc0 295 + #define ESDHC_IMX_FRC_SDCLK_ON (1 << 8) 296 + 293 297 #define ESDHC_DLL_CTRL 0x60 294 298 295 299 #define ESDHC_TUNING_CTRL 0xcc ··· 326 330 #define DEFINE_SDHCI_COMMON_PROPERTIES(_state) \ 327 331 DEFINE_PROP_UINT8("sd-spec-version", _state, sd_spec_version, 2), \ 328 332 DEFINE_PROP_UINT8("uhs", _state, uhs_mode, UHS_NOT_SUPPORTED), \ 333 + DEFINE_PROP_UINT8("vendor", _state, vendor, SDHCI_VENDOR_NONE), \ 329 334 \ 330 335 /* Capabilities registers provide information on supported 331 336 * features of this specific host controller implementation */ \
+17 -1
hw/sd/sdhci.c
··· 1569 1569 } 1570 1570 break; 1571 1571 1572 + case ESDHC_VENDOR_SPEC: 1573 + ret = s->vendor_spec; 1574 + break; 1572 1575 case ESDHC_DLL_CTRL: 1573 1576 case ESDHC_TUNE_CTRL_STATUS: 1574 1577 case ESDHC_UNDOCUMENTED_REG27: 1575 1578 case ESDHC_TUNING_CTRL: 1576 - case ESDHC_VENDOR_SPEC: 1577 1579 case ESDHC_MIX_CTRL: 1578 1580 case ESDHC_WTMK_LVL: 1579 1581 ret = 0; ··· 1596 1598 case ESDHC_UNDOCUMENTED_REG27: 1597 1599 case ESDHC_TUNING_CTRL: 1598 1600 case ESDHC_WTMK_LVL: 1601 + break; 1602 + 1599 1603 case ESDHC_VENDOR_SPEC: 1604 + s->vendor_spec = value; 1605 + switch (s->vendor) { 1606 + case SDHCI_VENDOR_IMX: 1607 + if (value & ESDHC_IMX_FRC_SDCLK_ON) { 1608 + s->prnsts &= ~SDHC_IMX_CLOCK_GATE_OFF; 1609 + } else { 1610 + s->prnsts |= SDHC_IMX_CLOCK_GATE_OFF; 1611 + } 1612 + break; 1613 + default: 1614 + break; 1615 + } 1600 1616 break; 1601 1617 1602 1618 case SDHC_HOSTCTL:
+5
include/hw/sd/sdhci.h
··· 74 74 uint16_t acmd12errsts; /* Auto CMD12 error status register */ 75 75 uint16_t hostctl2; /* Host Control 2 */ 76 76 uint64_t admasysaddr; /* ADMA System Address Register */ 77 + uint16_t vendor_spec; /* Vendor specific register */ 77 78 78 79 /* Read-only registers */ 79 80 uint64_t capareg; /* Capabilities Register */ ··· 96 97 uint32_t quirks; 97 98 uint8_t sd_spec_version; 98 99 uint8_t uhs_mode; 100 + uint8_t vendor; /* For vendor specific functionality */ 99 101 } SDHCIState; 102 + 103 + #define SDHCI_VENDOR_NONE 0 104 + #define SDHCI_VENDOR_IMX 1 100 105 101 106 /* 102 107 * Controller does not provide transfer-complete interrupt when not
+4 -2
target/arm/cpu.c
··· 1245 1245 if (arm_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER)) { 1246 1246 qdev_property_add_static(DEVICE(cpu), &arm_cpu_gt_cntfrq_property); 1247 1247 } 1248 + 1249 + if (kvm_enabled()) { 1250 + kvm_arm_add_vcpu_properties(obj); 1251 + } 1248 1252 } 1249 1253 1250 1254 static void arm_cpu_finalizefn(Object *obj) ··· 2029 2033 2030 2034 if (kvm_enabled()) { 2031 2035 kvm_arm_set_cpu_features_from_host(cpu); 2032 - kvm_arm_add_vcpu_properties(obj); 2033 2036 } else { 2034 2037 cortex_a15_initfn(obj); 2035 2038 ··· 2183 2186 if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { 2184 2187 aarch64_add_sve_properties(obj); 2185 2188 } 2186 - kvm_arm_add_vcpu_properties(obj); 2187 2189 arm_cpu_post_init(obj); 2188 2190 } 2189 2191
-1
target/arm/cpu64.c
··· 592 592 593 593 if (kvm_enabled()) { 594 594 kvm_arm_set_cpu_features_from_host(cpu); 595 - kvm_arm_add_vcpu_properties(obj); 596 595 } else { 597 596 uint64_t t; 598 597 uint32_t u;
+11 -10
target/arm/kvm.c
··· 194 194 /* KVM VCPU properties should be prefixed with "kvm-". */ 195 195 void kvm_arm_add_vcpu_properties(Object *obj) 196 196 { 197 - if (!kvm_enabled()) { 198 - return; 197 + ARMCPU *cpu = ARM_CPU(obj); 198 + CPUARMState *env = &cpu->env; 199 + 200 + if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) { 201 + cpu->kvm_adjvtime = true; 202 + object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get, 203 + kvm_no_adjvtime_set); 204 + object_property_set_description(obj, "kvm-no-adjvtime", 205 + "Set on to disable the adjustment of " 206 + "the virtual counter. VM stopped time " 207 + "will be counted."); 199 208 } 200 - 201 - ARM_CPU(obj)->kvm_adjvtime = true; 202 - object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get, 203 - kvm_no_adjvtime_set); 204 - object_property_set_description(obj, "kvm-no-adjvtime", 205 - "Set on to disable the adjustment of " 206 - "the virtual counter. VM stopped time " 207 - "will be counted."); 208 209 } 209 210 210 211 bool kvm_arm_pmu_supported(CPUState *cpu)
+130
target/arm/neon-dp.decode
··· 397 397 # So we have a single decode line and check the cmode/op in the 398 398 # trans function. 399 399 Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm 400 + 401 + ###################################################################### 402 + # Within the "two registers, or three registers of different lengths" 403 + # grouping ([23,4]=0b10), bits [21:20] are either part of the opcode 404 + # decode: 0b11 for VEXT, two-reg-misc, VTBL, and duplicate-scalar; 405 + # or they are a size field for the three-reg-different-lengths and 406 + # two-reg-and-scalar insn groups (where size cannot be 0b11). This 407 + # is slightly awkward for decodetree: we handle it with this 408 + # non-exclusive group which contains within it two exclusive groups: 409 + # one for the size=0b11 patterns, and one for the size-not-0b11 410 + # patterns. This allows us to check that none of the insns within 411 + # each subgroup accidentally overlap each other. Note that all the 412 + # trans functions for the size-not-0b11 patterns must check and 413 + # return false for size==3. 414 + ###################################################################### 415 + { 416 + [ 417 + ################################################################## 418 + # Miscellaneous size=0b11 insns 419 + ################################################################## 420 + VEXT 1111 001 0 1 . 11 .... .... imm:4 . q:1 . 0 .... \ 421 + vm=%vm_dp vn=%vn_dp vd=%vd_dp 422 + 423 + VTBL 1111 001 1 1 . 11 .... .... 10 len:2 . op:1 . 0 .... \ 424 + vm=%vm_dp vn=%vn_dp vd=%vd_dp 425 + 426 + VDUP_scalar 1111 001 1 1 . 11 index:3 1 .... 11 000 q:1 . 0 .... \ 427 + vm=%vm_dp vd=%vd_dp size=0 428 + VDUP_scalar 1111 001 1 1 . 11 index:2 10 .... 11 000 q:1 . 0 .... \ 429 + vm=%vm_dp vd=%vd_dp size=1 430 + VDUP_scalar 1111 001 1 1 . 11 index:1 100 .... 11 000 q:1 . 0 .... \ 431 + vm=%vm_dp vd=%vd_dp size=2 432 + ] 433 + 434 + # Subgroup for size != 0b11 435 + [ 436 + ################################################################## 437 + # 3-reg-different-length grouping: 438 + # 1111 001 U 1 D sz!=11 Vn:4 Vd:4 opc:4 N 0 M 0 Vm:4 439 + ################################################################## 440 + 441 + &3diff vm vn vd size 442 + 443 + @3diff .... ... . . . size:2 .... .... .... . . . . .... \ 444 + &3diff vm=%vm_dp vn=%vn_dp vd=%vd_dp 445 + 446 + VADDL_S_3d 1111 001 0 1 . .. .... .... 0000 . 0 . 0 .... @3diff 447 + VADDL_U_3d 1111 001 1 1 . .. .... .... 0000 . 0 . 0 .... @3diff 448 + 449 + VADDW_S_3d 1111 001 0 1 . .. .... .... 0001 . 0 . 0 .... @3diff 450 + VADDW_U_3d 1111 001 1 1 . .. .... .... 0001 . 0 . 0 .... @3diff 451 + 452 + VSUBL_S_3d 1111 001 0 1 . .. .... .... 0010 . 0 . 0 .... @3diff 453 + VSUBL_U_3d 1111 001 1 1 . .. .... .... 0010 . 0 . 0 .... @3diff 454 + 455 + VSUBW_S_3d 1111 001 0 1 . .. .... .... 0011 . 0 . 0 .... @3diff 456 + VSUBW_U_3d 1111 001 1 1 . .. .... .... 0011 . 0 . 0 .... @3diff 457 + 458 + VADDHN_3d 1111 001 0 1 . .. .... .... 0100 . 0 . 0 .... @3diff 459 + VRADDHN_3d 1111 001 1 1 . .. .... .... 0100 . 0 . 0 .... @3diff 460 + 461 + VABAL_S_3d 1111 001 0 1 . .. .... .... 0101 . 0 . 0 .... @3diff 462 + VABAL_U_3d 1111 001 1 1 . .. .... .... 0101 . 0 . 0 .... @3diff 463 + 464 + VSUBHN_3d 1111 001 0 1 . .. .... .... 0110 . 0 . 0 .... @3diff 465 + VRSUBHN_3d 1111 001 1 1 . .. .... .... 0110 . 0 . 0 .... @3diff 466 + 467 + VABDL_S_3d 1111 001 0 1 . .. .... .... 0111 . 0 . 0 .... @3diff 468 + VABDL_U_3d 1111 001 1 1 . .. .... .... 0111 . 0 . 0 .... @3diff 469 + 470 + VMLAL_S_3d 1111 001 0 1 . .. .... .... 1000 . 0 . 0 .... @3diff 471 + VMLAL_U_3d 1111 001 1 1 . .. .... .... 1000 . 0 . 0 .... @3diff 472 + 473 + VQDMLAL_3d 1111 001 0 1 . .. .... .... 1001 . 0 . 0 .... @3diff 474 + 475 + VMLSL_S_3d 1111 001 0 1 . .. .... .... 1010 . 0 . 0 .... @3diff 476 + VMLSL_U_3d 1111 001 1 1 . .. .... .... 1010 . 0 . 0 .... @3diff 477 + 478 + VQDMLSL_3d 1111 001 0 1 . .. .... .... 1011 . 0 . 0 .... @3diff 479 + 480 + VMULL_S_3d 1111 001 0 1 . .. .... .... 1100 . 0 . 0 .... @3diff 481 + VMULL_U_3d 1111 001 1 1 . .. .... .... 1100 . 0 . 0 .... @3diff 482 + 483 + VQDMULL_3d 1111 001 0 1 . .. .... .... 1101 . 0 . 0 .... @3diff 484 + 485 + VMULL_P_3d 1111 001 0 1 . .. .... .... 1110 . 0 . 0 .... @3diff 486 + 487 + ################################################################## 488 + # 2-regs-plus-scalar grouping: 489 + # 1111 001 Q 1 D sz!=11 Vn:4 Vd:4 opc:4 N 1 M 0 Vm:4 490 + ################################################################## 491 + &2scalar vm vn vd size q 492 + 493 + @2scalar .... ... q:1 . . size:2 .... .... .... . . . . .... \ 494 + &2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp 495 + # For the 'long' ops the Q bit is part of insn decode 496 + @2scalar_q0 .... ... . . . size:2 .... .... .... . . . . .... \ 497 + &2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0 498 + 499 + VMLA_2sc 1111 001 . 1 . .. .... .... 0000 . 1 . 0 .... @2scalar 500 + VMLA_F_2sc 1111 001 . 1 . .. .... .... 0001 . 1 . 0 .... @2scalar 501 + 502 + VMLAL_S_2sc 1111 001 0 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0 503 + VMLAL_U_2sc 1111 001 1 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0 504 + 505 + VQDMLAL_2sc 1111 001 0 1 . .. .... .... 0011 . 1 . 0 .... @2scalar_q0 506 + 507 + VMLS_2sc 1111 001 . 1 . .. .... .... 0100 . 1 . 0 .... @2scalar 508 + VMLS_F_2sc 1111 001 . 1 . .. .... .... 0101 . 1 . 0 .... @2scalar 509 + 510 + VMLSL_S_2sc 1111 001 0 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0 511 + VMLSL_U_2sc 1111 001 1 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0 512 + 513 + VQDMLSL_2sc 1111 001 0 1 . .. .... .... 0111 . 1 . 0 .... @2scalar_q0 514 + 515 + VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar 516 + VMUL_F_2sc 1111 001 . 1 . .. .... .... 1001 . 1 . 0 .... @2scalar 517 + 518 + VMULL_S_2sc 1111 001 0 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0 519 + VMULL_U_2sc 1111 001 1 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0 520 + 521 + VQDMULL_2sc 1111 001 0 1 . .. .... .... 1011 . 1 . 0 .... @2scalar_q0 522 + 523 + VQDMULH_2sc 1111 001 . 1 . .. .... .... 1100 . 1 . 0 .... @2scalar 524 + VQRDMULH_2sc 1111 001 . 1 . .. .... .... 1101 . 1 . 0 .... @2scalar 525 + 526 + VQRDMLAH_2sc 1111 001 . 1 . .. .... .... 1110 . 1 . 0 .... @2scalar 527 + VQRDMLSH_2sc 1111 001 . 1 . .. .... .... 1111 . 1 . 0 .... @2scalar 528 + ] 529 + }
+1146 -2
target/arm/translate-neon.inc.c
··· 1329 1329 neon_load_reg64(tmp, a->vm + pass); 1330 1330 fn(tmp, cpu_env, tmp, constimm); 1331 1331 neon_store_reg64(tmp, a->vd + pass); 1332 + tcg_temp_free_i64(tmp); 1332 1333 } 1333 1334 tcg_temp_free_i64(constimm); 1334 1335 return true; ··· 1624 1625 tmp = tcg_temp_new_i64(); 1625 1626 1626 1627 widenfn(tmp, rm0); 1628 + tcg_temp_free_i32(rm0); 1627 1629 if (a->shift != 0) { 1628 1630 tcg_gen_shli_i64(tmp, tmp, a->shift); 1629 1631 tcg_gen_andi_i64(tmp, tmp, ~widen_mask); ··· 1631 1633 neon_store_reg64(tmp, a->vd); 1632 1634 1633 1635 widenfn(tmp, rm1); 1636 + tcg_temp_free_i32(rm1); 1634 1637 if (a->shift != 0) { 1635 1638 tcg_gen_shli_i64(tmp, tmp, a->shift); 1636 1639 tcg_gen_andi_i64(tmp, tmp, ~widen_mask); ··· 1642 1645 1643 1646 static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a) 1644 1647 { 1645 - NeonGenWidenFn *widenfn[] = { 1648 + static NeonGenWidenFn * const widenfn[] = { 1646 1649 gen_helper_neon_widen_s8, 1647 1650 gen_helper_neon_widen_s16, 1648 1651 tcg_gen_ext_i32_i64, ··· 1652 1655 1653 1656 static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a) 1654 1657 { 1655 - NeonGenWidenFn *widenfn[] = { 1658 + static NeonGenWidenFn * const widenfn[] = { 1656 1659 gen_helper_neon_widen_u8, 1657 1660 gen_helper_neon_widen_u16, 1658 1661 tcg_gen_extu_i32_i64, ··· 1826 1829 } 1827 1830 return do_1reg_imm(s, a, fn); 1828 1831 } 1832 + 1833 + static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, 1834 + NeonGenWidenFn *widenfn, 1835 + NeonGenTwo64OpFn *opfn, 1836 + bool src1_wide) 1837 + { 1838 + /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */ 1839 + TCGv_i64 rn0_64, rn1_64, rm_64; 1840 + TCGv_i32 rm; 1841 + 1842 + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1843 + return false; 1844 + } 1845 + 1846 + /* UNDEF accesses to D16-D31 if they don't exist. */ 1847 + if (!dc_isar_feature(aa32_simd_r32, s) && 1848 + ((a->vd | a->vn | a->vm) & 0x10)) { 1849 + return false; 1850 + } 1851 + 1852 + if (!widenfn || !opfn) { 1853 + /* size == 3 case, which is an entirely different insn group */ 1854 + return false; 1855 + } 1856 + 1857 + if ((a->vd & 1) || (src1_wide && (a->vn & 1))) { 1858 + return false; 1859 + } 1860 + 1861 + if (!vfp_access_check(s)) { 1862 + return true; 1863 + } 1864 + 1865 + rn0_64 = tcg_temp_new_i64(); 1866 + rn1_64 = tcg_temp_new_i64(); 1867 + rm_64 = tcg_temp_new_i64(); 1868 + 1869 + if (src1_wide) { 1870 + neon_load_reg64(rn0_64, a->vn); 1871 + } else { 1872 + TCGv_i32 tmp = neon_load_reg(a->vn, 0); 1873 + widenfn(rn0_64, tmp); 1874 + tcg_temp_free_i32(tmp); 1875 + } 1876 + rm = neon_load_reg(a->vm, 0); 1877 + 1878 + widenfn(rm_64, rm); 1879 + tcg_temp_free_i32(rm); 1880 + opfn(rn0_64, rn0_64, rm_64); 1881 + 1882 + /* 1883 + * Load second pass inputs before storing the first pass result, to 1884 + * avoid incorrect results if a narrow input overlaps with the result. 1885 + */ 1886 + if (src1_wide) { 1887 + neon_load_reg64(rn1_64, a->vn + 1); 1888 + } else { 1889 + TCGv_i32 tmp = neon_load_reg(a->vn, 1); 1890 + widenfn(rn1_64, tmp); 1891 + tcg_temp_free_i32(tmp); 1892 + } 1893 + rm = neon_load_reg(a->vm, 1); 1894 + 1895 + neon_store_reg64(rn0_64, a->vd); 1896 + 1897 + widenfn(rm_64, rm); 1898 + tcg_temp_free_i32(rm); 1899 + opfn(rn1_64, rn1_64, rm_64); 1900 + neon_store_reg64(rn1_64, a->vd + 1); 1901 + 1902 + tcg_temp_free_i64(rn0_64); 1903 + tcg_temp_free_i64(rn1_64); 1904 + tcg_temp_free_i64(rm_64); 1905 + 1906 + return true; 1907 + } 1908 + 1909 + #define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \ 1910 + static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 1911 + { \ 1912 + static NeonGenWidenFn * const widenfn[] = { \ 1913 + gen_helper_neon_widen_##S##8, \ 1914 + gen_helper_neon_widen_##S##16, \ 1915 + tcg_gen_##EXT##_i32_i64, \ 1916 + NULL, \ 1917 + }; \ 1918 + static NeonGenTwo64OpFn * const addfn[] = { \ 1919 + gen_helper_neon_##OP##l_u16, \ 1920 + gen_helper_neon_##OP##l_u32, \ 1921 + tcg_gen_##OP##_i64, \ 1922 + NULL, \ 1923 + }; \ 1924 + return do_prewiden_3d(s, a, widenfn[a->size], \ 1925 + addfn[a->size], SRC1WIDE); \ 1926 + } 1927 + 1928 + DO_PREWIDEN(VADDL_S, s, ext, add, false) 1929 + DO_PREWIDEN(VADDL_U, u, extu, add, false) 1930 + DO_PREWIDEN(VSUBL_S, s, ext, sub, false) 1931 + DO_PREWIDEN(VSUBL_U, u, extu, sub, false) 1932 + DO_PREWIDEN(VADDW_S, s, ext, add, true) 1933 + DO_PREWIDEN(VADDW_U, u, extu, add, true) 1934 + DO_PREWIDEN(VSUBW_S, s, ext, sub, true) 1935 + DO_PREWIDEN(VSUBW_U, u, extu, sub, true) 1936 + 1937 + static bool do_narrow_3d(DisasContext *s, arg_3diff *a, 1938 + NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn) 1939 + { 1940 + /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */ 1941 + TCGv_i64 rn_64, rm_64; 1942 + TCGv_i32 rd0, rd1; 1943 + 1944 + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1945 + return false; 1946 + } 1947 + 1948 + /* UNDEF accesses to D16-D31 if they don't exist. */ 1949 + if (!dc_isar_feature(aa32_simd_r32, s) && 1950 + ((a->vd | a->vn | a->vm) & 0x10)) { 1951 + return false; 1952 + } 1953 + 1954 + if (!opfn || !narrowfn) { 1955 + /* size == 3 case, which is an entirely different insn group */ 1956 + return false; 1957 + } 1958 + 1959 + if ((a->vn | a->vm) & 1) { 1960 + return false; 1961 + } 1962 + 1963 + if (!vfp_access_check(s)) { 1964 + return true; 1965 + } 1966 + 1967 + rn_64 = tcg_temp_new_i64(); 1968 + rm_64 = tcg_temp_new_i64(); 1969 + rd0 = tcg_temp_new_i32(); 1970 + rd1 = tcg_temp_new_i32(); 1971 + 1972 + neon_load_reg64(rn_64, a->vn); 1973 + neon_load_reg64(rm_64, a->vm); 1974 + 1975 + opfn(rn_64, rn_64, rm_64); 1976 + 1977 + narrowfn(rd0, rn_64); 1978 + 1979 + neon_load_reg64(rn_64, a->vn + 1); 1980 + neon_load_reg64(rm_64, a->vm + 1); 1981 + 1982 + opfn(rn_64, rn_64, rm_64); 1983 + 1984 + narrowfn(rd1, rn_64); 1985 + 1986 + neon_store_reg(a->vd, 0, rd0); 1987 + neon_store_reg(a->vd, 1, rd1); 1988 + 1989 + tcg_temp_free_i64(rn_64); 1990 + tcg_temp_free_i64(rm_64); 1991 + 1992 + return true; 1993 + } 1994 + 1995 + #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \ 1996 + static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 1997 + { \ 1998 + static NeonGenTwo64OpFn * const addfn[] = { \ 1999 + gen_helper_neon_##OP##l_u16, \ 2000 + gen_helper_neon_##OP##l_u32, \ 2001 + tcg_gen_##OP##_i64, \ 2002 + NULL, \ 2003 + }; \ 2004 + static NeonGenNarrowFn * const narrowfn[] = { \ 2005 + gen_helper_neon_##NARROWTYPE##_high_u8, \ 2006 + gen_helper_neon_##NARROWTYPE##_high_u16, \ 2007 + EXTOP, \ 2008 + NULL, \ 2009 + }; \ 2010 + return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \ 2011 + } 2012 + 2013 + static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn) 2014 + { 2015 + tcg_gen_addi_i64(rn, rn, 1u << 31); 2016 + tcg_gen_extrh_i64_i32(rd, rn); 2017 + } 2018 + 2019 + DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32) 2020 + DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32) 2021 + DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32) 2022 + DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32) 2023 + 2024 + static bool do_long_3d(DisasContext *s, arg_3diff *a, 2025 + NeonGenTwoOpWidenFn *opfn, 2026 + NeonGenTwo64OpFn *accfn) 2027 + { 2028 + /* 2029 + * 3-regs different lengths, long operations. 2030 + * These perform an operation on two inputs that returns a double-width 2031 + * result, and then possibly perform an accumulation operation of 2032 + * that result into the double-width destination. 2033 + */ 2034 + TCGv_i64 rd0, rd1, tmp; 2035 + TCGv_i32 rn, rm; 2036 + 2037 + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2038 + return false; 2039 + } 2040 + 2041 + /* UNDEF accesses to D16-D31 if they don't exist. */ 2042 + if (!dc_isar_feature(aa32_simd_r32, s) && 2043 + ((a->vd | a->vn | a->vm) & 0x10)) { 2044 + return false; 2045 + } 2046 + 2047 + if (!opfn) { 2048 + /* size == 3 case, which is an entirely different insn group */ 2049 + return false; 2050 + } 2051 + 2052 + if (a->vd & 1) { 2053 + return false; 2054 + } 2055 + 2056 + if (!vfp_access_check(s)) { 2057 + return true; 2058 + } 2059 + 2060 + rd0 = tcg_temp_new_i64(); 2061 + rd1 = tcg_temp_new_i64(); 2062 + 2063 + rn = neon_load_reg(a->vn, 0); 2064 + rm = neon_load_reg(a->vm, 0); 2065 + opfn(rd0, rn, rm); 2066 + tcg_temp_free_i32(rn); 2067 + tcg_temp_free_i32(rm); 2068 + 2069 + rn = neon_load_reg(a->vn, 1); 2070 + rm = neon_load_reg(a->vm, 1); 2071 + opfn(rd1, rn, rm); 2072 + tcg_temp_free_i32(rn); 2073 + tcg_temp_free_i32(rm); 2074 + 2075 + /* Don't store results until after all loads: they might overlap */ 2076 + if (accfn) { 2077 + tmp = tcg_temp_new_i64(); 2078 + neon_load_reg64(tmp, a->vd); 2079 + accfn(tmp, tmp, rd0); 2080 + neon_store_reg64(tmp, a->vd); 2081 + neon_load_reg64(tmp, a->vd + 1); 2082 + accfn(tmp, tmp, rd1); 2083 + neon_store_reg64(tmp, a->vd + 1); 2084 + tcg_temp_free_i64(tmp); 2085 + } else { 2086 + neon_store_reg64(rd0, a->vd); 2087 + neon_store_reg64(rd1, a->vd + 1); 2088 + } 2089 + 2090 + tcg_temp_free_i64(rd0); 2091 + tcg_temp_free_i64(rd1); 2092 + 2093 + return true; 2094 + } 2095 + 2096 + static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a) 2097 + { 2098 + static NeonGenTwoOpWidenFn * const opfn[] = { 2099 + gen_helper_neon_abdl_s16, 2100 + gen_helper_neon_abdl_s32, 2101 + gen_helper_neon_abdl_s64, 2102 + NULL, 2103 + }; 2104 + 2105 + return do_long_3d(s, a, opfn[a->size], NULL); 2106 + } 2107 + 2108 + static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a) 2109 + { 2110 + static NeonGenTwoOpWidenFn * const opfn[] = { 2111 + gen_helper_neon_abdl_u16, 2112 + gen_helper_neon_abdl_u32, 2113 + gen_helper_neon_abdl_u64, 2114 + NULL, 2115 + }; 2116 + 2117 + return do_long_3d(s, a, opfn[a->size], NULL); 2118 + } 2119 + 2120 + static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a) 2121 + { 2122 + static NeonGenTwoOpWidenFn * const opfn[] = { 2123 + gen_helper_neon_abdl_s16, 2124 + gen_helper_neon_abdl_s32, 2125 + gen_helper_neon_abdl_s64, 2126 + NULL, 2127 + }; 2128 + static NeonGenTwo64OpFn * const addfn[] = { 2129 + gen_helper_neon_addl_u16, 2130 + gen_helper_neon_addl_u32, 2131 + tcg_gen_add_i64, 2132 + NULL, 2133 + }; 2134 + 2135 + return do_long_3d(s, a, opfn[a->size], addfn[a->size]); 2136 + } 2137 + 2138 + static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a) 2139 + { 2140 + static NeonGenTwoOpWidenFn * const opfn[] = { 2141 + gen_helper_neon_abdl_u16, 2142 + gen_helper_neon_abdl_u32, 2143 + gen_helper_neon_abdl_u64, 2144 + NULL, 2145 + }; 2146 + static NeonGenTwo64OpFn * const addfn[] = { 2147 + gen_helper_neon_addl_u16, 2148 + gen_helper_neon_addl_u32, 2149 + tcg_gen_add_i64, 2150 + NULL, 2151 + }; 2152 + 2153 + return do_long_3d(s, a, opfn[a->size], addfn[a->size]); 2154 + } 2155 + 2156 + static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2157 + { 2158 + TCGv_i32 lo = tcg_temp_new_i32(); 2159 + TCGv_i32 hi = tcg_temp_new_i32(); 2160 + 2161 + tcg_gen_muls2_i32(lo, hi, rn, rm); 2162 + tcg_gen_concat_i32_i64(rd, lo, hi); 2163 + 2164 + tcg_temp_free_i32(lo); 2165 + tcg_temp_free_i32(hi); 2166 + } 2167 + 2168 + static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2169 + { 2170 + TCGv_i32 lo = tcg_temp_new_i32(); 2171 + TCGv_i32 hi = tcg_temp_new_i32(); 2172 + 2173 + tcg_gen_mulu2_i32(lo, hi, rn, rm); 2174 + tcg_gen_concat_i32_i64(rd, lo, hi); 2175 + 2176 + tcg_temp_free_i32(lo); 2177 + tcg_temp_free_i32(hi); 2178 + } 2179 + 2180 + static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a) 2181 + { 2182 + static NeonGenTwoOpWidenFn * const opfn[] = { 2183 + gen_helper_neon_mull_s8, 2184 + gen_helper_neon_mull_s16, 2185 + gen_mull_s32, 2186 + NULL, 2187 + }; 2188 + 2189 + return do_long_3d(s, a, opfn[a->size], NULL); 2190 + } 2191 + 2192 + static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a) 2193 + { 2194 + static NeonGenTwoOpWidenFn * const opfn[] = { 2195 + gen_helper_neon_mull_u8, 2196 + gen_helper_neon_mull_u16, 2197 + gen_mull_u32, 2198 + NULL, 2199 + }; 2200 + 2201 + return do_long_3d(s, a, opfn[a->size], NULL); 2202 + } 2203 + 2204 + #define DO_VMLAL(INSN,MULL,ACC) \ 2205 + static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 2206 + { \ 2207 + static NeonGenTwoOpWidenFn * const opfn[] = { \ 2208 + gen_helper_neon_##MULL##8, \ 2209 + gen_helper_neon_##MULL##16, \ 2210 + gen_##MULL##32, \ 2211 + NULL, \ 2212 + }; \ 2213 + static NeonGenTwo64OpFn * const accfn[] = { \ 2214 + gen_helper_neon_##ACC##l_u16, \ 2215 + gen_helper_neon_##ACC##l_u32, \ 2216 + tcg_gen_##ACC##_i64, \ 2217 + NULL, \ 2218 + }; \ 2219 + return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \ 2220 + } 2221 + 2222 + DO_VMLAL(VMLAL_S,mull_s,add) 2223 + DO_VMLAL(VMLAL_U,mull_u,add) 2224 + DO_VMLAL(VMLSL_S,mull_s,sub) 2225 + DO_VMLAL(VMLSL_U,mull_u,sub) 2226 + 2227 + static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2228 + { 2229 + gen_helper_neon_mull_s16(rd, rn, rm); 2230 + gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd); 2231 + } 2232 + 2233 + static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2234 + { 2235 + gen_mull_s32(rd, rn, rm); 2236 + gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd); 2237 + } 2238 + 2239 + static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a) 2240 + { 2241 + static NeonGenTwoOpWidenFn * const opfn[] = { 2242 + NULL, 2243 + gen_VQDMULL_16, 2244 + gen_VQDMULL_32, 2245 + NULL, 2246 + }; 2247 + 2248 + return do_long_3d(s, a, opfn[a->size], NULL); 2249 + } 2250 + 2251 + static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2252 + { 2253 + gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm); 2254 + } 2255 + 2256 + static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2257 + { 2258 + gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm); 2259 + } 2260 + 2261 + static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a) 2262 + { 2263 + static NeonGenTwoOpWidenFn * const opfn[] = { 2264 + NULL, 2265 + gen_VQDMULL_16, 2266 + gen_VQDMULL_32, 2267 + NULL, 2268 + }; 2269 + static NeonGenTwo64OpFn * const accfn[] = { 2270 + NULL, 2271 + gen_VQDMLAL_acc_16, 2272 + gen_VQDMLAL_acc_32, 2273 + NULL, 2274 + }; 2275 + 2276 + return do_long_3d(s, a, opfn[a->size], accfn[a->size]); 2277 + } 2278 + 2279 + static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2280 + { 2281 + gen_helper_neon_negl_u32(rm, rm); 2282 + gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm); 2283 + } 2284 + 2285 + static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2286 + { 2287 + tcg_gen_neg_i64(rm, rm); 2288 + gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm); 2289 + } 2290 + 2291 + static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a) 2292 + { 2293 + static NeonGenTwoOpWidenFn * const opfn[] = { 2294 + NULL, 2295 + gen_VQDMULL_16, 2296 + gen_VQDMULL_32, 2297 + NULL, 2298 + }; 2299 + static NeonGenTwo64OpFn * const accfn[] = { 2300 + NULL, 2301 + gen_VQDMLSL_acc_16, 2302 + gen_VQDMLSL_acc_32, 2303 + NULL, 2304 + }; 2305 + 2306 + return do_long_3d(s, a, opfn[a->size], accfn[a->size]); 2307 + } 2308 + 2309 + static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a) 2310 + { 2311 + gen_helper_gvec_3 *fn_gvec; 2312 + 2313 + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2314 + return false; 2315 + } 2316 + 2317 + /* UNDEF accesses to D16-D31 if they don't exist. */ 2318 + if (!dc_isar_feature(aa32_simd_r32, s) && 2319 + ((a->vd | a->vn | a->vm) & 0x10)) { 2320 + return false; 2321 + } 2322 + 2323 + if (a->vd & 1) { 2324 + return false; 2325 + } 2326 + 2327 + switch (a->size) { 2328 + case 0: 2329 + fn_gvec = gen_helper_neon_pmull_h; 2330 + break; 2331 + case 2: 2332 + if (!dc_isar_feature(aa32_pmull, s)) { 2333 + return false; 2334 + } 2335 + fn_gvec = gen_helper_gvec_pmull_q; 2336 + break; 2337 + default: 2338 + return false; 2339 + } 2340 + 2341 + if (!vfp_access_check(s)) { 2342 + return true; 2343 + } 2344 + 2345 + tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0), 2346 + neon_reg_offset(a->vn, 0), 2347 + neon_reg_offset(a->vm, 0), 2348 + 16, 16, 0, fn_gvec); 2349 + return true; 2350 + } 2351 + 2352 + static void gen_neon_dup_low16(TCGv_i32 var) 2353 + { 2354 + TCGv_i32 tmp = tcg_temp_new_i32(); 2355 + tcg_gen_ext16u_i32(var, var); 2356 + tcg_gen_shli_i32(tmp, var, 16); 2357 + tcg_gen_or_i32(var, var, tmp); 2358 + tcg_temp_free_i32(tmp); 2359 + } 2360 + 2361 + static void gen_neon_dup_high16(TCGv_i32 var) 2362 + { 2363 + TCGv_i32 tmp = tcg_temp_new_i32(); 2364 + tcg_gen_andi_i32(var, var, 0xffff0000); 2365 + tcg_gen_shri_i32(tmp, var, 16); 2366 + tcg_gen_or_i32(var, var, tmp); 2367 + tcg_temp_free_i32(tmp); 2368 + } 2369 + 2370 + static inline TCGv_i32 neon_get_scalar(int size, int reg) 2371 + { 2372 + TCGv_i32 tmp; 2373 + if (size == 1) { 2374 + tmp = neon_load_reg(reg & 7, reg >> 4); 2375 + if (reg & 8) { 2376 + gen_neon_dup_high16(tmp); 2377 + } else { 2378 + gen_neon_dup_low16(tmp); 2379 + } 2380 + } else { 2381 + tmp = neon_load_reg(reg & 15, reg >> 4); 2382 + } 2383 + return tmp; 2384 + } 2385 + 2386 + static bool do_2scalar(DisasContext *s, arg_2scalar *a, 2387 + NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn) 2388 + { 2389 + /* 2390 + * Two registers and a scalar: perform an operation between 2391 + * the input elements and the scalar, and then possibly 2392 + * perform an accumulation operation of that result into the 2393 + * destination. 2394 + */ 2395 + TCGv_i32 scalar; 2396 + int pass; 2397 + 2398 + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2399 + return false; 2400 + } 2401 + 2402 + /* UNDEF accesses to D16-D31 if they don't exist. */ 2403 + if (!dc_isar_feature(aa32_simd_r32, s) && 2404 + ((a->vd | a->vn | a->vm) & 0x10)) { 2405 + return false; 2406 + } 2407 + 2408 + if (!opfn) { 2409 + /* Bad size (including size == 3, which is a different insn group) */ 2410 + return false; 2411 + } 2412 + 2413 + if (a->q && ((a->vd | a->vn) & 1)) { 2414 + return false; 2415 + } 2416 + 2417 + if (!vfp_access_check(s)) { 2418 + return true; 2419 + } 2420 + 2421 + scalar = neon_get_scalar(a->size, a->vm); 2422 + 2423 + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 2424 + TCGv_i32 tmp = neon_load_reg(a->vn, pass); 2425 + opfn(tmp, tmp, scalar); 2426 + if (accfn) { 2427 + TCGv_i32 rd = neon_load_reg(a->vd, pass); 2428 + accfn(tmp, rd, tmp); 2429 + tcg_temp_free_i32(rd); 2430 + } 2431 + neon_store_reg(a->vd, pass, tmp); 2432 + } 2433 + tcg_temp_free_i32(scalar); 2434 + return true; 2435 + } 2436 + 2437 + static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a) 2438 + { 2439 + static NeonGenTwoOpFn * const opfn[] = { 2440 + NULL, 2441 + gen_helper_neon_mul_u16, 2442 + tcg_gen_mul_i32, 2443 + NULL, 2444 + }; 2445 + 2446 + return do_2scalar(s, a, opfn[a->size], NULL); 2447 + } 2448 + 2449 + static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a) 2450 + { 2451 + static NeonGenTwoOpFn * const opfn[] = { 2452 + NULL, 2453 + gen_helper_neon_mul_u16, 2454 + tcg_gen_mul_i32, 2455 + NULL, 2456 + }; 2457 + static NeonGenTwoOpFn * const accfn[] = { 2458 + NULL, 2459 + gen_helper_neon_add_u16, 2460 + tcg_gen_add_i32, 2461 + NULL, 2462 + }; 2463 + 2464 + return do_2scalar(s, a, opfn[a->size], accfn[a->size]); 2465 + } 2466 + 2467 + static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a) 2468 + { 2469 + static NeonGenTwoOpFn * const opfn[] = { 2470 + NULL, 2471 + gen_helper_neon_mul_u16, 2472 + tcg_gen_mul_i32, 2473 + NULL, 2474 + }; 2475 + static NeonGenTwoOpFn * const accfn[] = { 2476 + NULL, 2477 + gen_helper_neon_sub_u16, 2478 + tcg_gen_sub_i32, 2479 + NULL, 2480 + }; 2481 + 2482 + return do_2scalar(s, a, opfn[a->size], accfn[a->size]); 2483 + } 2484 + 2485 + /* 2486 + * Rather than have a float-specific version of do_2scalar just for 2487 + * three insns, we wrap a NeonGenTwoSingleOpFn to turn it into 2488 + * a NeonGenTwoOpFn. 2489 + */ 2490 + #define WRAP_FP_FN(WRAPNAME, FUNC) \ 2491 + static void WRAPNAME(TCGv_i32 rd, TCGv_i32 rn, TCGv_i32 rm) \ 2492 + { \ 2493 + TCGv_ptr fpstatus = get_fpstatus_ptr(1); \ 2494 + FUNC(rd, rn, rm, fpstatus); \ 2495 + tcg_temp_free_ptr(fpstatus); \ 2496 + } 2497 + 2498 + WRAP_FP_FN(gen_VMUL_F_mul, gen_helper_vfp_muls) 2499 + WRAP_FP_FN(gen_VMUL_F_add, gen_helper_vfp_adds) 2500 + WRAP_FP_FN(gen_VMUL_F_sub, gen_helper_vfp_subs) 2501 + 2502 + static bool trans_VMUL_F_2sc(DisasContext *s, arg_2scalar *a) 2503 + { 2504 + static NeonGenTwoOpFn * const opfn[] = { 2505 + NULL, 2506 + NULL, /* TODO: fp16 support */ 2507 + gen_VMUL_F_mul, 2508 + NULL, 2509 + }; 2510 + 2511 + return do_2scalar(s, a, opfn[a->size], NULL); 2512 + } 2513 + 2514 + static bool trans_VMLA_F_2sc(DisasContext *s, arg_2scalar *a) 2515 + { 2516 + static NeonGenTwoOpFn * const opfn[] = { 2517 + NULL, 2518 + NULL, /* TODO: fp16 support */ 2519 + gen_VMUL_F_mul, 2520 + NULL, 2521 + }; 2522 + static NeonGenTwoOpFn * const accfn[] = { 2523 + NULL, 2524 + NULL, /* TODO: fp16 support */ 2525 + gen_VMUL_F_add, 2526 + NULL, 2527 + }; 2528 + 2529 + return do_2scalar(s, a, opfn[a->size], accfn[a->size]); 2530 + } 2531 + 2532 + static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a) 2533 + { 2534 + static NeonGenTwoOpFn * const opfn[] = { 2535 + NULL, 2536 + NULL, /* TODO: fp16 support */ 2537 + gen_VMUL_F_mul, 2538 + NULL, 2539 + }; 2540 + static NeonGenTwoOpFn * const accfn[] = { 2541 + NULL, 2542 + NULL, /* TODO: fp16 support */ 2543 + gen_VMUL_F_sub, 2544 + NULL, 2545 + }; 2546 + 2547 + return do_2scalar(s, a, opfn[a->size], accfn[a->size]); 2548 + } 2549 + 2550 + WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16) 2551 + WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32) 2552 + WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16) 2553 + WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32) 2554 + 2555 + static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a) 2556 + { 2557 + static NeonGenTwoOpFn * const opfn[] = { 2558 + NULL, 2559 + gen_VQDMULH_16, 2560 + gen_VQDMULH_32, 2561 + NULL, 2562 + }; 2563 + 2564 + return do_2scalar(s, a, opfn[a->size], NULL); 2565 + } 2566 + 2567 + static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a) 2568 + { 2569 + static NeonGenTwoOpFn * const opfn[] = { 2570 + NULL, 2571 + gen_VQRDMULH_16, 2572 + gen_VQRDMULH_32, 2573 + NULL, 2574 + }; 2575 + 2576 + return do_2scalar(s, a, opfn[a->size], NULL); 2577 + } 2578 + 2579 + static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a, 2580 + NeonGenThreeOpEnvFn *opfn) 2581 + { 2582 + /* 2583 + * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn 2584 + * performs a kind of fused op-then-accumulate using a helper 2585 + * function that takes all of rd, rn and the scalar at once. 2586 + */ 2587 + TCGv_i32 scalar; 2588 + int pass; 2589 + 2590 + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2591 + return false; 2592 + } 2593 + 2594 + if (!dc_isar_feature(aa32_rdm, s)) { 2595 + return false; 2596 + } 2597 + 2598 + /* UNDEF accesses to D16-D31 if they don't exist. */ 2599 + if (!dc_isar_feature(aa32_simd_r32, s) && 2600 + ((a->vd | a->vn | a->vm) & 0x10)) { 2601 + return false; 2602 + } 2603 + 2604 + if (!opfn) { 2605 + /* Bad size (including size == 3, which is a different insn group) */ 2606 + return false; 2607 + } 2608 + 2609 + if (a->q && ((a->vd | a->vn) & 1)) { 2610 + return false; 2611 + } 2612 + 2613 + if (!vfp_access_check(s)) { 2614 + return true; 2615 + } 2616 + 2617 + scalar = neon_get_scalar(a->size, a->vm); 2618 + 2619 + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 2620 + TCGv_i32 rn = neon_load_reg(a->vn, pass); 2621 + TCGv_i32 rd = neon_load_reg(a->vd, pass); 2622 + opfn(rd, cpu_env, rn, scalar, rd); 2623 + tcg_temp_free_i32(rn); 2624 + neon_store_reg(a->vd, pass, rd); 2625 + } 2626 + tcg_temp_free_i32(scalar); 2627 + 2628 + return true; 2629 + } 2630 + 2631 + static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a) 2632 + { 2633 + static NeonGenThreeOpEnvFn *opfn[] = { 2634 + NULL, 2635 + gen_helper_neon_qrdmlah_s16, 2636 + gen_helper_neon_qrdmlah_s32, 2637 + NULL, 2638 + }; 2639 + return do_vqrdmlah_2sc(s, a, opfn[a->size]); 2640 + } 2641 + 2642 + static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a) 2643 + { 2644 + static NeonGenThreeOpEnvFn *opfn[] = { 2645 + NULL, 2646 + gen_helper_neon_qrdmlsh_s16, 2647 + gen_helper_neon_qrdmlsh_s32, 2648 + NULL, 2649 + }; 2650 + return do_vqrdmlah_2sc(s, a, opfn[a->size]); 2651 + } 2652 + 2653 + static bool do_2scalar_long(DisasContext *s, arg_2scalar *a, 2654 + NeonGenTwoOpWidenFn *opfn, 2655 + NeonGenTwo64OpFn *accfn) 2656 + { 2657 + /* 2658 + * Two registers and a scalar, long operations: perform an 2659 + * operation on the input elements and the scalar which produces 2660 + * a double-width result, and then possibly perform an accumulation 2661 + * operation of that result into the destination. 2662 + */ 2663 + TCGv_i32 scalar, rn; 2664 + TCGv_i64 rn0_64, rn1_64; 2665 + 2666 + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2667 + return false; 2668 + } 2669 + 2670 + /* UNDEF accesses to D16-D31 if they don't exist. */ 2671 + if (!dc_isar_feature(aa32_simd_r32, s) && 2672 + ((a->vd | a->vn | a->vm) & 0x10)) { 2673 + return false; 2674 + } 2675 + 2676 + if (!opfn) { 2677 + /* Bad size (including size == 3, which is a different insn group) */ 2678 + return false; 2679 + } 2680 + 2681 + if (a->vd & 1) { 2682 + return false; 2683 + } 2684 + 2685 + if (!vfp_access_check(s)) { 2686 + return true; 2687 + } 2688 + 2689 + scalar = neon_get_scalar(a->size, a->vm); 2690 + 2691 + /* Load all inputs before writing any outputs, in case of overlap */ 2692 + rn = neon_load_reg(a->vn, 0); 2693 + rn0_64 = tcg_temp_new_i64(); 2694 + opfn(rn0_64, rn, scalar); 2695 + tcg_temp_free_i32(rn); 2696 + 2697 + rn = neon_load_reg(a->vn, 1); 2698 + rn1_64 = tcg_temp_new_i64(); 2699 + opfn(rn1_64, rn, scalar); 2700 + tcg_temp_free_i32(rn); 2701 + tcg_temp_free_i32(scalar); 2702 + 2703 + if (accfn) { 2704 + TCGv_i64 t64 = tcg_temp_new_i64(); 2705 + neon_load_reg64(t64, a->vd); 2706 + accfn(t64, t64, rn0_64); 2707 + neon_store_reg64(t64, a->vd); 2708 + neon_load_reg64(t64, a->vd + 1); 2709 + accfn(t64, t64, rn1_64); 2710 + neon_store_reg64(t64, a->vd + 1); 2711 + tcg_temp_free_i64(t64); 2712 + } else { 2713 + neon_store_reg64(rn0_64, a->vd); 2714 + neon_store_reg64(rn1_64, a->vd + 1); 2715 + } 2716 + tcg_temp_free_i64(rn0_64); 2717 + tcg_temp_free_i64(rn1_64); 2718 + return true; 2719 + } 2720 + 2721 + static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a) 2722 + { 2723 + static NeonGenTwoOpWidenFn * const opfn[] = { 2724 + NULL, 2725 + gen_helper_neon_mull_s16, 2726 + gen_mull_s32, 2727 + NULL, 2728 + }; 2729 + 2730 + return do_2scalar_long(s, a, opfn[a->size], NULL); 2731 + } 2732 + 2733 + static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a) 2734 + { 2735 + static NeonGenTwoOpWidenFn * const opfn[] = { 2736 + NULL, 2737 + gen_helper_neon_mull_u16, 2738 + gen_mull_u32, 2739 + NULL, 2740 + }; 2741 + 2742 + return do_2scalar_long(s, a, opfn[a->size], NULL); 2743 + } 2744 + 2745 + #define DO_VMLAL_2SC(INSN, MULL, ACC) \ 2746 + static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \ 2747 + { \ 2748 + static NeonGenTwoOpWidenFn * const opfn[] = { \ 2749 + NULL, \ 2750 + gen_helper_neon_##MULL##16, \ 2751 + gen_##MULL##32, \ 2752 + NULL, \ 2753 + }; \ 2754 + static NeonGenTwo64OpFn * const accfn[] = { \ 2755 + NULL, \ 2756 + gen_helper_neon_##ACC##l_u32, \ 2757 + tcg_gen_##ACC##_i64, \ 2758 + NULL, \ 2759 + }; \ 2760 + return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \ 2761 + } 2762 + 2763 + DO_VMLAL_2SC(VMLAL_S, mull_s, add) 2764 + DO_VMLAL_2SC(VMLAL_U, mull_u, add) 2765 + DO_VMLAL_2SC(VMLSL_S, mull_s, sub) 2766 + DO_VMLAL_2SC(VMLSL_U, mull_u, sub) 2767 + 2768 + static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a) 2769 + { 2770 + static NeonGenTwoOpWidenFn * const opfn[] = { 2771 + NULL, 2772 + gen_VQDMULL_16, 2773 + gen_VQDMULL_32, 2774 + NULL, 2775 + }; 2776 + 2777 + return do_2scalar_long(s, a, opfn[a->size], NULL); 2778 + } 2779 + 2780 + static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a) 2781 + { 2782 + static NeonGenTwoOpWidenFn * const opfn[] = { 2783 + NULL, 2784 + gen_VQDMULL_16, 2785 + gen_VQDMULL_32, 2786 + NULL, 2787 + }; 2788 + static NeonGenTwo64OpFn * const accfn[] = { 2789 + NULL, 2790 + gen_VQDMLAL_acc_16, 2791 + gen_VQDMLAL_acc_32, 2792 + NULL, 2793 + }; 2794 + 2795 + return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); 2796 + } 2797 + 2798 + static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a) 2799 + { 2800 + static NeonGenTwoOpWidenFn * const opfn[] = { 2801 + NULL, 2802 + gen_VQDMULL_16, 2803 + gen_VQDMULL_32, 2804 + NULL, 2805 + }; 2806 + static NeonGenTwo64OpFn * const accfn[] = { 2807 + NULL, 2808 + gen_VQDMLSL_acc_16, 2809 + gen_VQDMLSL_acc_32, 2810 + NULL, 2811 + }; 2812 + 2813 + return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); 2814 + } 2815 + 2816 + static bool trans_VEXT(DisasContext *s, arg_VEXT *a) 2817 + { 2818 + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2819 + return false; 2820 + } 2821 + 2822 + /* UNDEF accesses to D16-D31 if they don't exist. */ 2823 + if (!dc_isar_feature(aa32_simd_r32, s) && 2824 + ((a->vd | a->vn | a->vm) & 0x10)) { 2825 + return false; 2826 + } 2827 + 2828 + if ((a->vn | a->vm | a->vd) & a->q) { 2829 + return false; 2830 + } 2831 + 2832 + if (a->imm > 7 && !a->q) { 2833 + return false; 2834 + } 2835 + 2836 + if (!vfp_access_check(s)) { 2837 + return true; 2838 + } 2839 + 2840 + if (!a->q) { 2841 + /* Extract 64 bits from <Vm:Vn> */ 2842 + TCGv_i64 left, right, dest; 2843 + 2844 + left = tcg_temp_new_i64(); 2845 + right = tcg_temp_new_i64(); 2846 + dest = tcg_temp_new_i64(); 2847 + 2848 + neon_load_reg64(right, a->vn); 2849 + neon_load_reg64(left, a->vm); 2850 + tcg_gen_extract2_i64(dest, right, left, a->imm * 8); 2851 + neon_store_reg64(dest, a->vd); 2852 + 2853 + tcg_temp_free_i64(left); 2854 + tcg_temp_free_i64(right); 2855 + tcg_temp_free_i64(dest); 2856 + } else { 2857 + /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */ 2858 + TCGv_i64 left, middle, right, destleft, destright; 2859 + 2860 + left = tcg_temp_new_i64(); 2861 + middle = tcg_temp_new_i64(); 2862 + right = tcg_temp_new_i64(); 2863 + destleft = tcg_temp_new_i64(); 2864 + destright = tcg_temp_new_i64(); 2865 + 2866 + if (a->imm < 8) { 2867 + neon_load_reg64(right, a->vn); 2868 + neon_load_reg64(middle, a->vn + 1); 2869 + tcg_gen_extract2_i64(destright, right, middle, a->imm * 8); 2870 + neon_load_reg64(left, a->vm); 2871 + tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8); 2872 + } else { 2873 + neon_load_reg64(right, a->vn + 1); 2874 + neon_load_reg64(middle, a->vm); 2875 + tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8); 2876 + neon_load_reg64(left, a->vm + 1); 2877 + tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8); 2878 + } 2879 + 2880 + neon_store_reg64(destright, a->vd); 2881 + neon_store_reg64(destleft, a->vd + 1); 2882 + 2883 + tcg_temp_free_i64(destright); 2884 + tcg_temp_free_i64(destleft); 2885 + tcg_temp_free_i64(right); 2886 + tcg_temp_free_i64(middle); 2887 + tcg_temp_free_i64(left); 2888 + } 2889 + return true; 2890 + } 2891 + 2892 + static bool trans_VTBL(DisasContext *s, arg_VTBL *a) 2893 + { 2894 + int n; 2895 + TCGv_i32 tmp, tmp2, tmp3, tmp4; 2896 + TCGv_ptr ptr1; 2897 + 2898 + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2899 + return false; 2900 + } 2901 + 2902 + /* UNDEF accesses to D16-D31 if they don't exist. */ 2903 + if (!dc_isar_feature(aa32_simd_r32, s) && 2904 + ((a->vd | a->vn | a->vm) & 0x10)) { 2905 + return false; 2906 + } 2907 + 2908 + if (!vfp_access_check(s)) { 2909 + return true; 2910 + } 2911 + 2912 + n = a->len + 1; 2913 + if ((a->vn + n) > 32) { 2914 + /* 2915 + * This is UNPREDICTABLE; we choose to UNDEF to avoid the 2916 + * helper function running off the end of the register file. 2917 + */ 2918 + return false; 2919 + } 2920 + n <<= 3; 2921 + if (a->op) { 2922 + tmp = neon_load_reg(a->vd, 0); 2923 + } else { 2924 + tmp = tcg_temp_new_i32(); 2925 + tcg_gen_movi_i32(tmp, 0); 2926 + } 2927 + tmp2 = neon_load_reg(a->vm, 0); 2928 + ptr1 = vfp_reg_ptr(true, a->vn); 2929 + tmp4 = tcg_const_i32(n); 2930 + gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4); 2931 + tcg_temp_free_i32(tmp); 2932 + if (a->op) { 2933 + tmp = neon_load_reg(a->vd, 1); 2934 + } else { 2935 + tmp = tcg_temp_new_i32(); 2936 + tcg_gen_movi_i32(tmp, 0); 2937 + } 2938 + tmp3 = neon_load_reg(a->vm, 1); 2939 + gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4); 2940 + tcg_temp_free_i32(tmp4); 2941 + tcg_temp_free_ptr(ptr1); 2942 + neon_store_reg(a->vd, 0, tmp2); 2943 + neon_store_reg(a->vd, 1, tmp3); 2944 + tcg_temp_free_i32(tmp); 2945 + return true; 2946 + } 2947 + 2948 + static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a) 2949 + { 2950 + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2951 + return false; 2952 + } 2953 + 2954 + /* UNDEF accesses to D16-D31 if they don't exist. */ 2955 + if (!dc_isar_feature(aa32_simd_r32, s) && 2956 + ((a->vd | a->vm) & 0x10)) { 2957 + return false; 2958 + } 2959 + 2960 + if (a->vd & a->q) { 2961 + return false; 2962 + } 2963 + 2964 + if (!vfp_access_check(s)) { 2965 + return true; 2966 + } 2967 + 2968 + tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0), 2969 + neon_element_offset(a->vm, a->index, a->size), 2970 + a->q ? 16 : 8, a->q ? 16 : 8); 2971 + return true; 2972 + }
+10 -674
target/arm/translate.c
··· 377 377 tcg_gen_ext16s_i32(dest, var); 378 378 } 379 379 380 - /* 32x32->64 multiply. Marks inputs as dead. */ 381 - static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b) 382 - { 383 - TCGv_i32 lo = tcg_temp_new_i32(); 384 - TCGv_i32 hi = tcg_temp_new_i32(); 385 - TCGv_i64 ret; 386 - 387 - tcg_gen_mulu2_i32(lo, hi, a, b); 388 - tcg_temp_free_i32(a); 389 - tcg_temp_free_i32(b); 390 - 391 - ret = tcg_temp_new_i64(); 392 - tcg_gen_concat_i32_i64(ret, lo, hi); 393 - tcg_temp_free_i32(lo); 394 - tcg_temp_free_i32(hi); 395 - 396 - return ret; 397 - } 398 - 399 - static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b) 400 - { 401 - TCGv_i32 lo = tcg_temp_new_i32(); 402 - TCGv_i32 hi = tcg_temp_new_i32(); 403 - TCGv_i64 ret; 404 - 405 - tcg_gen_muls2_i32(lo, hi, a, b); 406 - tcg_temp_free_i32(a); 407 - tcg_temp_free_i32(b); 408 - 409 - ret = tcg_temp_new_i64(); 410 - tcg_gen_concat_i32_i64(ret, lo, hi); 411 - tcg_temp_free_i32(lo); 412 - tcg_temp_free_i32(hi); 413 - 414 - return ret; 415 - } 416 - 417 380 /* Swap low and high halfwords. */ 418 381 static void gen_swap_half(TCGv_i32 var) 419 382 { ··· 2624 2587 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7) 2625 2588 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5) 2626 2589 2627 - static void gen_neon_dup_low16(TCGv_i32 var) 2628 - { 2629 - TCGv_i32 tmp = tcg_temp_new_i32(); 2630 - tcg_gen_ext16u_i32(var, var); 2631 - tcg_gen_shli_i32(tmp, var, 16); 2632 - tcg_gen_or_i32(var, var, tmp); 2633 - tcg_temp_free_i32(tmp); 2634 - } 2635 - 2636 - static void gen_neon_dup_high16(TCGv_i32 var) 2637 - { 2638 - TCGv_i32 tmp = tcg_temp_new_i32(); 2639 - tcg_gen_andi_i32(var, var, 0xffff0000); 2640 - tcg_gen_shri_i32(tmp, var, 16); 2641 - tcg_gen_or_i32(var, var, tmp); 2642 - tcg_temp_free_i32(tmp); 2643 - } 2644 - 2645 2590 static inline bool use_goto_tb(DisasContext *s, target_ulong dest) 2646 2591 { 2647 2592 #ifndef CONFIG_USER_ONLY ··· 2991 2936 2992 2937 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1 2993 2938 2994 - static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1) 2995 - { 2996 - switch (size) { 2997 - case 0: gen_helper_neon_add_u8(t0, t0, t1); break; 2998 - case 1: gen_helper_neon_add_u16(t0, t0, t1); break; 2999 - case 2: tcg_gen_add_i32(t0, t0, t1); break; 3000 - default: abort(); 3001 - } 3002 - } 3003 - 3004 - static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1) 3005 - { 3006 - switch (size) { 3007 - case 0: gen_helper_neon_sub_u8(t0, t1, t0); break; 3008 - case 1: gen_helper_neon_sub_u16(t0, t1, t0); break; 3009 - case 2: tcg_gen_sub_i32(t0, t1, t0); break; 3010 - default: return; 3011 - } 3012 - } 3013 - 3014 - static TCGv_i32 neon_load_scratch(int scratch) 3015 - { 3016 - TCGv_i32 tmp = tcg_temp_new_i32(); 3017 - tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch])); 3018 - return tmp; 3019 - } 3020 - 3021 - static void neon_store_scratch(int scratch, TCGv_i32 var) 3022 - { 3023 - tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch])); 3024 - tcg_temp_free_i32(var); 3025 - } 3026 - 3027 - static inline TCGv_i32 neon_get_scalar(int size, int reg) 3028 - { 3029 - TCGv_i32 tmp; 3030 - if (size == 1) { 3031 - tmp = neon_load_reg(reg & 7, reg >> 4); 3032 - if (reg & 8) { 3033 - gen_neon_dup_high16(tmp); 3034 - } else { 3035 - gen_neon_dup_low16(tmp); 3036 - } 3037 - } else { 3038 - tmp = neon_load_reg(reg & 15, reg >> 4); 3039 - } 3040 - return tmp; 3041 - } 3042 - 3043 2939 static int gen_neon_unzip(int rd, int rm, int size, int q) 3044 2940 { 3045 2941 TCGv_ptr pd, pm; ··· 3228 3124 case 1: gen_helper_neon_addl_u32(CPU_V001); break; 3229 3125 case 2: tcg_gen_add_i64(CPU_V001); break; 3230 3126 default: abort(); 3231 - } 3232 - } 3233 - 3234 - static inline void gen_neon_subl(int size) 3235 - { 3236 - switch (size) { 3237 - case 0: gen_helper_neon_subl_u16(CPU_V001); break; 3238 - case 1: gen_helper_neon_subl_u32(CPU_V001); break; 3239 - case 2: tcg_gen_sub_i64(CPU_V001); break; 3240 - default: abort(); 3241 - } 3242 - } 3243 - 3244 - static inline void gen_neon_negl(TCGv_i64 var, int size) 3245 - { 3246 - switch (size) { 3247 - case 0: gen_helper_neon_negl_u16(var, var); break; 3248 - case 1: gen_helper_neon_negl_u32(var, var); break; 3249 - case 2: 3250 - tcg_gen_neg_i64(var, var); 3251 - break; 3252 - default: abort(); 3253 - } 3254 - } 3255 - 3256 - static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size) 3257 - { 3258 - switch (size) { 3259 - case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break; 3260 - case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break; 3261 - default: abort(); 3262 - } 3263 - } 3264 - 3265 - static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b, 3266 - int size, int u) 3267 - { 3268 - TCGv_i64 tmp; 3269 - 3270 - switch ((size << 1) | u) { 3271 - case 0: gen_helper_neon_mull_s8(dest, a, b); break; 3272 - case 1: gen_helper_neon_mull_u8(dest, a, b); break; 3273 - case 2: gen_helper_neon_mull_s16(dest, a, b); break; 3274 - case 3: gen_helper_neon_mull_u16(dest, a, b); break; 3275 - case 4: 3276 - tmp = gen_muls_i64_i32(a, b); 3277 - tcg_gen_mov_i64(dest, tmp); 3278 - tcg_temp_free_i64(tmp); 3279 - break; 3280 - case 5: 3281 - tmp = gen_mulu_i64_i32(a, b); 3282 - tcg_gen_mov_i64(dest, tmp); 3283 - tcg_temp_free_i64(tmp); 3284 - break; 3285 - default: abort(); 3286 - } 3287 - 3288 - /* gen_helper_neon_mull_[su]{8|16} do not free their parameters. 3289 - Don't forget to clean them now. */ 3290 - if (size < 2) { 3291 - tcg_temp_free_i32(a); 3292 - tcg_temp_free_i32(b); 3293 3127 } 3294 3128 } 3295 3129 ··· 5191 5025 { 5192 5026 int op; 5193 5027 int q; 5194 - int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs; 5028 + int rd, rm, rd_ofs, rm_ofs; 5195 5029 int size; 5196 5030 int pass; 5197 5031 int u; 5198 5032 int vec_size; 5199 - uint32_t imm; 5200 - TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5; 5201 - TCGv_ptr ptr1; 5202 - TCGv_i64 tmp64; 5033 + TCGv_i32 tmp, tmp2, tmp3; 5203 5034 5204 5035 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 5205 5036 return 1; ··· 5220 5051 q = (insn & (1 << 6)) != 0; 5221 5052 u = (insn >> 24) & 1; 5222 5053 VFP_DREG_D(rd, insn); 5223 - VFP_DREG_N(rn, insn); 5224 5054 VFP_DREG_M(rm, insn); 5225 5055 size = (insn >> 20) & 3; 5226 5056 vec_size = q ? 16 : 8; 5227 5057 rd_ofs = neon_reg_offset(rd, 0); 5228 - rn_ofs = neon_reg_offset(rn, 0); 5229 5058 rm_ofs = neon_reg_offset(rm, 0); 5230 5059 5231 5060 if ((insn & (1 << 23)) == 0) { ··· 5236 5065 return 1; 5237 5066 } else { /* (insn & 0x00800010 == 0x00800000) */ 5238 5067 if (size != 3) { 5239 - op = (insn >> 8) & 0xf; 5240 - if ((insn & (1 << 6)) == 0) { 5241 - /* Three registers of different lengths. */ 5242 - int src1_wide; 5243 - int src2_wide; 5244 - int prewiden; 5245 - /* undefreq: bit 0 : UNDEF if size == 0 5246 - * bit 1 : UNDEF if size == 1 5247 - * bit 2 : UNDEF if size == 2 5248 - * bit 3 : UNDEF if U == 1 5249 - * Note that [2:0] set implies 'always UNDEF' 5250 - */ 5251 - int undefreq; 5252 - /* prewiden, src1_wide, src2_wide, undefreq */ 5253 - static const int neon_3reg_wide[16][4] = { 5254 - {1, 0, 0, 0}, /* VADDL */ 5255 - {1, 1, 0, 0}, /* VADDW */ 5256 - {1, 0, 0, 0}, /* VSUBL */ 5257 - {1, 1, 0, 0}, /* VSUBW */ 5258 - {0, 1, 1, 0}, /* VADDHN */ 5259 - {0, 0, 0, 0}, /* VABAL */ 5260 - {0, 1, 1, 0}, /* VSUBHN */ 5261 - {0, 0, 0, 0}, /* VABDL */ 5262 - {0, 0, 0, 0}, /* VMLAL */ 5263 - {0, 0, 0, 9}, /* VQDMLAL */ 5264 - {0, 0, 0, 0}, /* VMLSL */ 5265 - {0, 0, 0, 9}, /* VQDMLSL */ 5266 - {0, 0, 0, 0}, /* Integer VMULL */ 5267 - {0, 0, 0, 9}, /* VQDMULL */ 5268 - {0, 0, 0, 0xa}, /* Polynomial VMULL */ 5269 - {0, 0, 0, 7}, /* Reserved: always UNDEF */ 5270 - }; 5271 - 5272 - prewiden = neon_3reg_wide[op][0]; 5273 - src1_wide = neon_3reg_wide[op][1]; 5274 - src2_wide = neon_3reg_wide[op][2]; 5275 - undefreq = neon_3reg_wide[op][3]; 5276 - 5277 - if ((undefreq & (1 << size)) || 5278 - ((undefreq & 8) && u)) { 5279 - return 1; 5280 - } 5281 - if ((src1_wide && (rn & 1)) || 5282 - (src2_wide && (rm & 1)) || 5283 - (!src2_wide && (rd & 1))) { 5284 - return 1; 5285 - } 5286 - 5287 - /* Handle polynomial VMULL in a single pass. */ 5288 - if (op == 14) { 5289 - if (size == 0) { 5290 - /* VMULL.P8 */ 5291 - tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16, 5292 - 0, gen_helper_neon_pmull_h); 5293 - } else { 5294 - /* VMULL.P64 */ 5295 - if (!dc_isar_feature(aa32_pmull, s)) { 5296 - return 1; 5297 - } 5298 - tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16, 5299 - 0, gen_helper_gvec_pmull_q); 5300 - } 5301 - return 0; 5302 - } 5303 - 5304 - /* Avoid overlapping operands. Wide source operands are 5305 - always aligned so will never overlap with wide 5306 - destinations in problematic ways. */ 5307 - if (rd == rm && !src2_wide) { 5308 - tmp = neon_load_reg(rm, 1); 5309 - neon_store_scratch(2, tmp); 5310 - } else if (rd == rn && !src1_wide) { 5311 - tmp = neon_load_reg(rn, 1); 5312 - neon_store_scratch(2, tmp); 5313 - } 5314 - tmp3 = NULL; 5315 - for (pass = 0; pass < 2; pass++) { 5316 - if (src1_wide) { 5317 - neon_load_reg64(cpu_V0, rn + pass); 5318 - tmp = NULL; 5319 - } else { 5320 - if (pass == 1 && rd == rn) { 5321 - tmp = neon_load_scratch(2); 5322 - } else { 5323 - tmp = neon_load_reg(rn, pass); 5324 - } 5325 - if (prewiden) { 5326 - gen_neon_widen(cpu_V0, tmp, size, u); 5327 - } 5328 - } 5329 - if (src2_wide) { 5330 - neon_load_reg64(cpu_V1, rm + pass); 5331 - tmp2 = NULL; 5332 - } else { 5333 - if (pass == 1 && rd == rm) { 5334 - tmp2 = neon_load_scratch(2); 5335 - } else { 5336 - tmp2 = neon_load_reg(rm, pass); 5337 - } 5338 - if (prewiden) { 5339 - gen_neon_widen(cpu_V1, tmp2, size, u); 5340 - } 5341 - } 5342 - switch (op) { 5343 - case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */ 5344 - gen_neon_addl(size); 5345 - break; 5346 - case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */ 5347 - gen_neon_subl(size); 5348 - break; 5349 - case 5: case 7: /* VABAL, VABDL */ 5350 - switch ((size << 1) | u) { 5351 - case 0: 5352 - gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2); 5353 - break; 5354 - case 1: 5355 - gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2); 5356 - break; 5357 - case 2: 5358 - gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2); 5359 - break; 5360 - case 3: 5361 - gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2); 5362 - break; 5363 - case 4: 5364 - gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2); 5365 - break; 5366 - case 5: 5367 - gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2); 5368 - break; 5369 - default: abort(); 5370 - } 5371 - tcg_temp_free_i32(tmp2); 5372 - tcg_temp_free_i32(tmp); 5373 - break; 5374 - case 8: case 9: case 10: case 11: case 12: case 13: 5375 - /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */ 5376 - gen_neon_mull(cpu_V0, tmp, tmp2, size, u); 5377 - break; 5378 - default: /* 15 is RESERVED: caught earlier */ 5379 - abort(); 5380 - } 5381 - if (op == 13) { 5382 - /* VQDMULL */ 5383 - gen_neon_addl_saturate(cpu_V0, cpu_V0, size); 5384 - neon_store_reg64(cpu_V0, rd + pass); 5385 - } else if (op == 5 || (op >= 8 && op <= 11)) { 5386 - /* Accumulate. */ 5387 - neon_load_reg64(cpu_V1, rd + pass); 5388 - switch (op) { 5389 - case 10: /* VMLSL */ 5390 - gen_neon_negl(cpu_V0, size); 5391 - /* Fall through */ 5392 - case 5: case 8: /* VABAL, VMLAL */ 5393 - gen_neon_addl(size); 5394 - break; 5395 - case 9: case 11: /* VQDMLAL, VQDMLSL */ 5396 - gen_neon_addl_saturate(cpu_V0, cpu_V0, size); 5397 - if (op == 11) { 5398 - gen_neon_negl(cpu_V0, size); 5399 - } 5400 - gen_neon_addl_saturate(cpu_V0, cpu_V1, size); 5401 - break; 5402 - default: 5403 - abort(); 5404 - } 5405 - neon_store_reg64(cpu_V0, rd + pass); 5406 - } else if (op == 4 || op == 6) { 5407 - /* Narrowing operation. */ 5408 - tmp = tcg_temp_new_i32(); 5409 - if (!u) { 5410 - switch (size) { 5411 - case 0: 5412 - gen_helper_neon_narrow_high_u8(tmp, cpu_V0); 5413 - break; 5414 - case 1: 5415 - gen_helper_neon_narrow_high_u16(tmp, cpu_V0); 5416 - break; 5417 - case 2: 5418 - tcg_gen_extrh_i64_i32(tmp, cpu_V0); 5419 - break; 5420 - default: abort(); 5421 - } 5422 - } else { 5423 - switch (size) { 5424 - case 0: 5425 - gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0); 5426 - break; 5427 - case 1: 5428 - gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0); 5429 - break; 5430 - case 2: 5431 - tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31); 5432 - tcg_gen_extrh_i64_i32(tmp, cpu_V0); 5433 - break; 5434 - default: abort(); 5435 - } 5436 - } 5437 - if (pass == 0) { 5438 - tmp3 = tmp; 5439 - } else { 5440 - neon_store_reg(rd, 0, tmp3); 5441 - neon_store_reg(rd, 1, tmp); 5442 - } 5443 - } else { 5444 - /* Write back the result. */ 5445 - neon_store_reg64(cpu_V0, rd + pass); 5446 - } 5447 - } 5448 - } else { 5449 - /* Two registers and a scalar. NB that for ops of this form 5450 - * the ARM ARM labels bit 24 as Q, but it is in our variable 5451 - * 'u', not 'q'. 5452 - */ 5453 - if (size == 0) { 5454 - return 1; 5455 - } 5456 - switch (op) { 5457 - case 1: /* Float VMLA scalar */ 5458 - case 5: /* Floating point VMLS scalar */ 5459 - case 9: /* Floating point VMUL scalar */ 5460 - if (size == 1) { 5461 - return 1; 5462 - } 5463 - /* fall through */ 5464 - case 0: /* Integer VMLA scalar */ 5465 - case 4: /* Integer VMLS scalar */ 5466 - case 8: /* Integer VMUL scalar */ 5467 - case 12: /* VQDMULH scalar */ 5468 - case 13: /* VQRDMULH scalar */ 5469 - if (u && ((rd | rn) & 1)) { 5470 - return 1; 5471 - } 5472 - tmp = neon_get_scalar(size, rm); 5473 - neon_store_scratch(0, tmp); 5474 - for (pass = 0; pass < (u ? 4 : 2); pass++) { 5475 - tmp = neon_load_scratch(0); 5476 - tmp2 = neon_load_reg(rn, pass); 5477 - if (op == 12) { 5478 - if (size == 1) { 5479 - gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); 5480 - } else { 5481 - gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); 5482 - } 5483 - } else if (op == 13) { 5484 - if (size == 1) { 5485 - gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); 5486 - } else { 5487 - gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); 5488 - } 5489 - } else if (op & 1) { 5490 - TCGv_ptr fpstatus = get_fpstatus_ptr(1); 5491 - gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus); 5492 - tcg_temp_free_ptr(fpstatus); 5493 - } else { 5494 - switch (size) { 5495 - case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; 5496 - case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; 5497 - case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; 5498 - default: abort(); 5499 - } 5500 - } 5501 - tcg_temp_free_i32(tmp2); 5502 - if (op < 8) { 5503 - /* Accumulate. */ 5504 - tmp2 = neon_load_reg(rd, pass); 5505 - switch (op) { 5506 - case 0: 5507 - gen_neon_add(size, tmp, tmp2); 5508 - break; 5509 - case 1: 5510 - { 5511 - TCGv_ptr fpstatus = get_fpstatus_ptr(1); 5512 - gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus); 5513 - tcg_temp_free_ptr(fpstatus); 5514 - break; 5515 - } 5516 - case 4: 5517 - gen_neon_rsb(size, tmp, tmp2); 5518 - break; 5519 - case 5: 5520 - { 5521 - TCGv_ptr fpstatus = get_fpstatus_ptr(1); 5522 - gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus); 5523 - tcg_temp_free_ptr(fpstatus); 5524 - break; 5525 - } 5526 - default: 5527 - abort(); 5528 - } 5529 - tcg_temp_free_i32(tmp2); 5530 - } 5531 - neon_store_reg(rd, pass, tmp); 5532 - } 5533 - break; 5534 - case 3: /* VQDMLAL scalar */ 5535 - case 7: /* VQDMLSL scalar */ 5536 - case 11: /* VQDMULL scalar */ 5537 - if (u == 1) { 5538 - return 1; 5539 - } 5540 - /* fall through */ 5541 - case 2: /* VMLAL sclar */ 5542 - case 6: /* VMLSL scalar */ 5543 - case 10: /* VMULL scalar */ 5544 - if (rd & 1) { 5545 - return 1; 5546 - } 5547 - tmp2 = neon_get_scalar(size, rm); 5548 - /* We need a copy of tmp2 because gen_neon_mull 5549 - * deletes it during pass 0. */ 5550 - tmp4 = tcg_temp_new_i32(); 5551 - tcg_gen_mov_i32(tmp4, tmp2); 5552 - tmp3 = neon_load_reg(rn, 1); 5553 - 5554 - for (pass = 0; pass < 2; pass++) { 5555 - if (pass == 0) { 5556 - tmp = neon_load_reg(rn, 0); 5557 - } else { 5558 - tmp = tmp3; 5559 - tmp2 = tmp4; 5560 - } 5561 - gen_neon_mull(cpu_V0, tmp, tmp2, size, u); 5562 - if (op != 11) { 5563 - neon_load_reg64(cpu_V1, rd + pass); 5564 - } 5565 - switch (op) { 5566 - case 6: 5567 - gen_neon_negl(cpu_V0, size); 5568 - /* Fall through */ 5569 - case 2: 5570 - gen_neon_addl(size); 5571 - break; 5572 - case 3: case 7: 5573 - gen_neon_addl_saturate(cpu_V0, cpu_V0, size); 5574 - if (op == 7) { 5575 - gen_neon_negl(cpu_V0, size); 5576 - } 5577 - gen_neon_addl_saturate(cpu_V0, cpu_V1, size); 5578 - break; 5579 - case 10: 5580 - /* no-op */ 5581 - break; 5582 - case 11: 5583 - gen_neon_addl_saturate(cpu_V0, cpu_V0, size); 5584 - break; 5585 - default: 5586 - abort(); 5587 - } 5588 - neon_store_reg64(cpu_V0, rd + pass); 5589 - } 5590 - break; 5591 - case 14: /* VQRDMLAH scalar */ 5592 - case 15: /* VQRDMLSH scalar */ 5593 - { 5594 - NeonGenThreeOpEnvFn *fn; 5595 - 5596 - if (!dc_isar_feature(aa32_rdm, s)) { 5597 - return 1; 5598 - } 5599 - if (u && ((rd | rn) & 1)) { 5600 - return 1; 5601 - } 5602 - if (op == 14) { 5603 - if (size == 1) { 5604 - fn = gen_helper_neon_qrdmlah_s16; 5605 - } else { 5606 - fn = gen_helper_neon_qrdmlah_s32; 5607 - } 5608 - } else { 5609 - if (size == 1) { 5610 - fn = gen_helper_neon_qrdmlsh_s16; 5611 - } else { 5612 - fn = gen_helper_neon_qrdmlsh_s32; 5613 - } 5614 - } 5615 - 5616 - tmp2 = neon_get_scalar(size, rm); 5617 - for (pass = 0; pass < (u ? 4 : 2); pass++) { 5618 - tmp = neon_load_reg(rn, pass); 5619 - tmp3 = neon_load_reg(rd, pass); 5620 - fn(tmp, cpu_env, tmp, tmp2, tmp3); 5621 - tcg_temp_free_i32(tmp3); 5622 - neon_store_reg(rd, pass, tmp); 5623 - } 5624 - tcg_temp_free_i32(tmp2); 5625 - } 5626 - break; 5627 - default: 5628 - g_assert_not_reached(); 5629 - } 5630 - } 5068 + /* 5069 + * Three registers of different lengths, or two registers and 5070 + * a scalar: handled by decodetree 5071 + */ 5072 + return 1; 5631 5073 } else { /* size == 3 */ 5632 5074 if (!u) { 5633 - /* Extract. */ 5634 - imm = (insn >> 8) & 0xf; 5635 - 5636 - if (imm > 7 && !q) 5637 - return 1; 5638 - 5639 - if (q && ((rd | rn | rm) & 1)) { 5640 - return 1; 5641 - } 5642 - 5643 - if (imm == 0) { 5644 - neon_load_reg64(cpu_V0, rn); 5645 - if (q) { 5646 - neon_load_reg64(cpu_V1, rn + 1); 5647 - } 5648 - } else if (imm == 8) { 5649 - neon_load_reg64(cpu_V0, rn + 1); 5650 - if (q) { 5651 - neon_load_reg64(cpu_V1, rm); 5652 - } 5653 - } else if (q) { 5654 - tmp64 = tcg_temp_new_i64(); 5655 - if (imm < 8) { 5656 - neon_load_reg64(cpu_V0, rn); 5657 - neon_load_reg64(tmp64, rn + 1); 5658 - } else { 5659 - neon_load_reg64(cpu_V0, rn + 1); 5660 - neon_load_reg64(tmp64, rm); 5661 - } 5662 - tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8); 5663 - tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8)); 5664 - tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); 5665 - if (imm < 8) { 5666 - neon_load_reg64(cpu_V1, rm); 5667 - } else { 5668 - neon_load_reg64(cpu_V1, rm + 1); 5669 - imm -= 8; 5670 - } 5671 - tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8)); 5672 - tcg_gen_shri_i64(tmp64, tmp64, imm * 8); 5673 - tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64); 5674 - tcg_temp_free_i64(tmp64); 5675 - } else { 5676 - /* BUGFIX */ 5677 - neon_load_reg64(cpu_V0, rn); 5678 - tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8); 5679 - neon_load_reg64(cpu_V1, rm); 5680 - tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8)); 5681 - tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); 5682 - } 5683 - neon_store_reg64(cpu_V0, rd); 5684 - if (q) { 5685 - neon_store_reg64(cpu_V1, rd + 1); 5686 - } 5075 + /* Extract: handled by decodetree */ 5076 + return 1; 5687 5077 } else if ((insn & (1 << 11)) == 0) { 5688 5078 /* Two register misc. */ 5689 5079 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf); ··· 6184 5574 } 6185 5575 break; 6186 5576 } 6187 - } else if ((insn & (1 << 10)) == 0) { 6188 - /* VTBL, VTBX. */ 6189 - int n = ((insn >> 8) & 3) + 1; 6190 - if ((rn + n) > 32) { 6191 - /* This is UNPREDICTABLE; we choose to UNDEF to avoid the 6192 - * helper function running off the end of the register file. 6193 - */ 6194 - return 1; 6195 - } 6196 - n <<= 3; 6197 - if (insn & (1 << 6)) { 6198 - tmp = neon_load_reg(rd, 0); 6199 - } else { 6200 - tmp = tcg_temp_new_i32(); 6201 - tcg_gen_movi_i32(tmp, 0); 6202 - } 6203 - tmp2 = neon_load_reg(rm, 0); 6204 - ptr1 = vfp_reg_ptr(true, rn); 6205 - tmp5 = tcg_const_i32(n); 6206 - gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5); 6207 - tcg_temp_free_i32(tmp); 6208 - if (insn & (1 << 6)) { 6209 - tmp = neon_load_reg(rd, 1); 6210 - } else { 6211 - tmp = tcg_temp_new_i32(); 6212 - tcg_gen_movi_i32(tmp, 0); 6213 - } 6214 - tmp3 = neon_load_reg(rm, 1); 6215 - gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5); 6216 - tcg_temp_free_i32(tmp5); 6217 - tcg_temp_free_ptr(ptr1); 6218 - neon_store_reg(rd, 0, tmp2); 6219 - neon_store_reg(rd, 1, tmp3); 6220 - tcg_temp_free_i32(tmp); 6221 - } else if ((insn & 0x380) == 0) { 6222 - /* VDUP */ 6223 - int element; 6224 - MemOp size; 6225 - 6226 - if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) { 6227 - return 1; 6228 - } 6229 - if (insn & (1 << 16)) { 6230 - size = MO_8; 6231 - element = (insn >> 17) & 7; 6232 - } else if (insn & (1 << 17)) { 6233 - size = MO_16; 6234 - element = (insn >> 18) & 3; 6235 - } else { 6236 - size = MO_32; 6237 - element = (insn >> 19) & 1; 6238 - } 6239 - tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0), 6240 - neon_element_offset(rm, element, size), 6241 - q ? 16 : 8, q ? 16 : 8); 6242 5577 } else { 5578 + /* VTBL, VTBX, VDUP: handled by decodetree */ 6243 5579 return 1; 6244 5580 } 6245 5581 }
+1
target/arm/translate.h
··· 371 371 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64); 372 372 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64); 373 373 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32); 374 + typedef void NeonGenTwoOpWidenFn(TCGv_i64, TCGv_i32, TCGv_i32); 374 375 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 375 376 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); 376 377 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);