qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

linux-user: deal with address wrap for ARM_COMMPAGE on 32 bit

We rely on the pointer to wrap when accessing the high address of the
COMMPAGE so it lands somewhere reasonable. However on 32 bit hosts we
cannot afford just to map the entire 4gb address range. The old mmap
trial and error code handled this by just checking we could map both
the guest_base and the computed COMMPAGE address.

We can't just manipulate loadaddr to get what we want so we introduce
an offset which pgb_find_hole can apply when looking for a gap for
guest_base that ensures there is space left to map the COMMPAGE
afterwards.

This is arguably a little inefficient for the one 32 bit
value (kuser_helper_version) we need to keep there given all the
actual code entries are picked up during the translation phase.

Fixes: ee94743034b
Bug: https://bugs.launchpad.net/qemu/+bug/1880225
Cc: Bug 1880225 <1880225@bugs.launchpad.net>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Tested-by: Aleksandar Markovic <aleksandar.qemu.devel@gmail.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Peter Maydell <peter.maydell@linaro.org>
Message-Id: <20200605154929.26910-13-alex.bennee@linaro.org>

+17 -14
+17 -14
linux-user/elfload.c
··· 389 389 { 390 390 void *want = g2h(ARM_COMMPAGE & -qemu_host_page_size); 391 391 void *addr = mmap(want, qemu_host_page_size, PROT_READ | PROT_WRITE, 392 - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 392 + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); 393 393 394 394 if (addr == MAP_FAILED) { 395 395 perror("Allocating guest commpage"); ··· 2113 2113 * only dumbly iterate up the host address space seeing if the 2114 2114 * allocation would work. 2115 2115 */ 2116 - static uintptr_t pgd_find_hole_fallback(uintptr_t guest_size, uintptr_t brk, long align) 2116 + static uintptr_t pgd_find_hole_fallback(uintptr_t guest_size, uintptr_t brk, 2117 + long align, uintptr_t offset) 2117 2118 { 2118 2119 uintptr_t base; 2119 2120 ··· 2123 2124 while (true) { 2124 2125 uintptr_t align_start, end; 2125 2126 align_start = ROUND_UP(base, align); 2126 - end = align_start + guest_size; 2127 + end = align_start + guest_size + offset; 2127 2128 2128 2129 /* if brk is anywhere in the range give ourselves some room to grow. */ 2129 2130 if (align_start <= brk && brk < end) { ··· 2138 2139 PROT_NONE, flags, -1, 0); 2139 2140 if (mmap_start != MAP_FAILED) { 2140 2141 munmap((void *) align_start, guest_size); 2141 - return (uintptr_t) mmap_start; 2142 + return (uintptr_t) mmap_start + offset; 2142 2143 } 2143 2144 base += qemu_host_page_size; 2144 2145 } ··· 2147 2148 2148 2149 /* Return value for guest_base, or -1 if no hole found. */ 2149 2150 static uintptr_t pgb_find_hole(uintptr_t guest_loaddr, uintptr_t guest_size, 2150 - long align) 2151 + long align, uintptr_t offset) 2151 2152 { 2152 2153 GSList *maps, *iter; 2153 2154 uintptr_t this_start, this_end, next_start, brk; ··· 2161 2162 brk = (uintptr_t)sbrk(0); 2162 2163 2163 2164 if (!maps) { 2164 - return pgd_find_hole_fallback(guest_size, brk, align); 2165 + return pgd_find_hole_fallback(guest_size, brk, align, offset); 2165 2166 } 2166 2167 2167 2168 /* The first hole is before the first map entry. */ ··· 2173 2174 2174 2175 this_end = ((MapInfo *)iter->data)->start; 2175 2176 next_start = ((MapInfo *)iter->data)->end; 2176 - align_start = ROUND_UP(this_start, align); 2177 + align_start = ROUND_UP(this_start + offset, align); 2177 2178 2178 2179 /* Skip holes that are too small. */ 2179 2180 if (align_start >= this_end) { ··· 2223 2224 { 2224 2225 uintptr_t loaddr = orig_loaddr; 2225 2226 uintptr_t hiaddr = orig_hiaddr; 2227 + uintptr_t offset = 0; 2226 2228 uintptr_t addr; 2227 2229 2228 2230 if (hiaddr != orig_hiaddr) { ··· 2236 2238 if (ARM_COMMPAGE) { 2237 2239 /* 2238 2240 * Extend the allocation to include the commpage. 2239 - * For a 64-bit host, this is just 4GiB; for a 32-bit host, 2240 - * the address arithmetic will wrap around, but the difference 2241 - * will produce the correct allocation size. 2241 + * For a 64-bit host, this is just 4GiB; for a 32-bit host we 2242 + * need to ensure there is space bellow the guest_base so we 2243 + * can map the commpage in the place needed when the address 2244 + * arithmetic wraps around. 2242 2245 */ 2243 2246 if (sizeof(uintptr_t) == 8 || loaddr >= 0x80000000u) { 2244 - hiaddr = (uintptr_t)4 << 30; 2247 + hiaddr = (uintptr_t) 4 << 30; 2245 2248 } else { 2246 - loaddr = ARM_COMMPAGE & -align; 2249 + offset = -(ARM_COMMPAGE & -align); 2247 2250 } 2248 2251 } 2249 2252 2250 - addr = pgb_find_hole(loaddr, hiaddr - loaddr, align); 2253 + addr = pgb_find_hole(loaddr, hiaddr - loaddr, align, offset); 2251 2254 if (addr == -1) { 2252 2255 /* 2253 2256 * If ARM_COMMPAGE, there *might* be a non-consecutive allocation ··· 2282 2285 * just above that, and maximises the positive guest addresses. 2283 2286 */ 2284 2287 commpage = ARM_COMMPAGE & -align; 2285 - addr = pgb_find_hole(commpage, -commpage, align); 2288 + addr = pgb_find_hole(commpage, -commpage, align, 0); 2286 2289 assert(addr != -1); 2287 2290 guest_base = addr; 2288 2291 }