qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

qemu/host-utils.h: Reduce the operation count in the fallback ctpop

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>

+11 -14
+11 -14
include/qemu/host-utils.h
··· 327 327 #else 328 328 val = (val & 0x55) + ((val >> 1) & 0x55); 329 329 val = (val & 0x33) + ((val >> 2) & 0x33); 330 - val = (val & 0x0f) + ((val >> 4) & 0x0f); 330 + val = (val + (val >> 4)) & 0x0f; 331 331 332 332 return val; 333 333 #endif ··· 344 344 #else 345 345 val = (val & 0x5555) + ((val >> 1) & 0x5555); 346 346 val = (val & 0x3333) + ((val >> 2) & 0x3333); 347 - val = (val & 0x0f0f) + ((val >> 4) & 0x0f0f); 348 - val = (val & 0x00ff) + ((val >> 8) & 0x00ff); 347 + val = (val + (val >> 4)) & 0x0f0f; 348 + val = (val + (val >> 8)) & 0x00ff; 349 349 350 350 return val; 351 351 #endif ··· 360 360 #if QEMU_GNUC_PREREQ(3, 4) 361 361 return __builtin_popcount(val); 362 362 #else 363 - val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 364 - val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 365 - val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 366 - val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff); 367 - val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff); 363 + val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 364 + val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 365 + val = (val + (val >> 4)) & 0x0f0f0f0f; 366 + val = (val * 0x01010101) >> 24; 368 367 369 368 return val; 370 369 #endif ··· 379 378 #if QEMU_GNUC_PREREQ(3, 4) 380 379 return __builtin_popcountll(val); 381 380 #else 382 - val = (val & 0x5555555555555555ULL) + ((val >> 1) & 0x5555555555555555ULL); 383 - val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL); 384 - val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 0x0f0f0f0f0f0f0f0fULL); 385 - val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 0x00ff00ff00ff00ffULL); 386 - val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 0x0000ffff0000ffffULL); 387 - val = (val & 0x00000000ffffffffULL) + ((val >> 32) & 0x00000000ffffffffULL); 381 + val = (val & 0x5555555555555555ULL) + ((val >> 1) & 0x5555555555555555ULL); 382 + val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL); 383 + val = (val + (val >> 4)) & 0x0f0f0f0f0f0f0f0fULL; 384 + val = (val * 0x0101010101010101ULL) >> 56; 388 385 389 386 return val; 390 387 #endif