qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

target/arm: Add helper_mte_check_zva

Use a special helper for DC_ZVA, rather than the more
general mte_checkN.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200626033144.790098-28-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

authored by

Richard Henderson and committed by
Peter Maydell
46dc1bc0 5add8248

+122 -1
+1
target/arm/helper-a64.h
··· 106 106 107 107 DEF_HELPER_FLAGS_3(mte_check1, TCG_CALL_NO_WG, i64, env, i32, i64) 108 108 DEF_HELPER_FLAGS_3(mte_checkN, TCG_CALL_NO_WG, i64, env, i32, i64) 109 + DEF_HELPER_FLAGS_3(mte_check_zva, TCG_CALL_NO_WG, i64, env, i32, i64) 109 110 DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64) 110 111 DEF_HELPER_FLAGS_4(addsubg, TCG_CALL_NO_RWG_SE, i64, env, i64, s32, i32) 111 112 DEF_HELPER_FLAGS_3(ldg, TCG_CALL_NO_WG, i64, env, i64, i64)
+106
target/arm/mte_helper.c
··· 667 667 { 668 668 return mte_checkN(env, desc, ptr, GETPC()); 669 669 } 670 + 671 + /* 672 + * Perform an MTE checked access for DC_ZVA. 673 + */ 674 + uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr) 675 + { 676 + uintptr_t ra = GETPC(); 677 + int log2_dcz_bytes, log2_tag_bytes; 678 + int mmu_idx, bit55; 679 + intptr_t dcz_bytes, tag_bytes, i; 680 + void *mem; 681 + uint64_t ptr_tag, mem_tag, align_ptr; 682 + 683 + bit55 = extract64(ptr, 55, 1); 684 + 685 + /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ 686 + if (unlikely(!tbi_check(desc, bit55))) { 687 + return ptr; 688 + } 689 + 690 + ptr_tag = allocation_tag_from_addr(ptr); 691 + 692 + if (tcma_check(desc, bit55, ptr_tag)) { 693 + goto done; 694 + } 695 + 696 + /* 697 + * In arm_cpu_realizefn, we asserted that dcz > LOG2_TAG_GRANULE+1, 698 + * i.e. 32 bytes, which is an unreasonably small dcz anyway, to make 699 + * sure that we can access one complete tag byte here. 700 + */ 701 + log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2; 702 + log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1); 703 + dcz_bytes = (intptr_t)1 << log2_dcz_bytes; 704 + tag_bytes = (intptr_t)1 << log2_tag_bytes; 705 + align_ptr = ptr & -dcz_bytes; 706 + 707 + /* 708 + * Trap if accessing an invalid page. DC_ZVA requires that we supply 709 + * the original pointer for an invalid page. But watchpoints require 710 + * that we probe the actual space. So do both. 711 + */ 712 + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); 713 + (void) probe_write(env, ptr, 1, mmu_idx, ra); 714 + mem = allocation_tag_mem(env, mmu_idx, align_ptr, MMU_DATA_STORE, 715 + dcz_bytes, MMU_DATA_LOAD, tag_bytes, ra); 716 + if (!mem) { 717 + goto done; 718 + } 719 + 720 + /* 721 + * Unlike the reasoning for checkN, DC_ZVA is always aligned, and thus 722 + * it is quite easy to perform all of the comparisons at once without 723 + * any extra masking. 724 + * 725 + * The most common zva block size is 64; some of the thunderx cpus use 726 + * a block size of 128. For user-only, aarch64_max_initfn will set the 727 + * block size to 512. Fill out the other cases for future-proofing. 728 + * 729 + * In order to be able to find the first miscompare later, we want the 730 + * tag bytes to be in little-endian order. 731 + */ 732 + switch (log2_tag_bytes) { 733 + case 0: /* zva_blocksize 32 */ 734 + mem_tag = *(uint8_t *)mem; 735 + ptr_tag *= 0x11u; 736 + break; 737 + case 1: /* zva_blocksize 64 */ 738 + mem_tag = cpu_to_le16(*(uint16_t *)mem); 739 + ptr_tag *= 0x1111u; 740 + break; 741 + case 2: /* zva_blocksize 128 */ 742 + mem_tag = cpu_to_le32(*(uint32_t *)mem); 743 + ptr_tag *= 0x11111111u; 744 + break; 745 + case 3: /* zva_blocksize 256 */ 746 + mem_tag = cpu_to_le64(*(uint64_t *)mem); 747 + ptr_tag *= 0x1111111111111111ull; 748 + break; 749 + 750 + default: /* zva_blocksize 512, 1024, 2048 */ 751 + ptr_tag *= 0x1111111111111111ull; 752 + i = 0; 753 + do { 754 + mem_tag = cpu_to_le64(*(uint64_t *)(mem + i)); 755 + if (unlikely(mem_tag != ptr_tag)) { 756 + goto fail; 757 + } 758 + i += 8; 759 + align_ptr += 16 * TAG_GRANULE; 760 + } while (i < tag_bytes); 761 + goto done; 762 + } 763 + 764 + if (likely(mem_tag == ptr_tag)) { 765 + goto done; 766 + } 767 + 768 + fail: 769 + /* Locate the first nibble that differs. */ 770 + i = ctz64(mem_tag ^ ptr_tag) >> 4; 771 + mte_check_fail(env, mmu_idx, align_ptr + i * TAG_GRANULE, ra); 772 + 773 + done: 774 + return useronly_clean_ptr(ptr); 775 + }
+15 -1
target/arm/translate-a64.c
··· 1857 1857 return; 1858 1858 case ARM_CP_DC_ZVA: 1859 1859 /* Writes clear the aligned block of memory which rt points into. */ 1860 - tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 1860 + if (s->mte_active[0]) { 1861 + TCGv_i32 t_desc; 1862 + int desc = 0; 1863 + 1864 + desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 1865 + desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 1866 + desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 1867 + t_desc = tcg_const_i32(desc); 1868 + 1869 + tcg_rt = new_tmp_a64(s); 1870 + gen_helper_mte_check_zva(tcg_rt, cpu_env, t_desc, cpu_reg(s, rt)); 1871 + tcg_temp_free_i32(t_desc); 1872 + } else { 1873 + tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 1874 + } 1861 1875 gen_helper_dc_zva(cpu_env, tcg_rt); 1862 1876 return; 1863 1877 default: