qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

target/i386: Cleanup and use the EPYC mode topology functions

Use the new functions from topology.h and delete the unused code. Given the
sockets, nodes, cores and threads, the new functions generate apic id for EPYC
mode. Removes all the hardcoded values.

Signed-off-by: Babu Moger <babu.moger@amd.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <158396722151.58170.8031705769621392927.stgit@naples-babu.amd.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>

authored by

Babu Moger and committed by
Eduardo Habkost
dd08ef03 7568b205

+34 -127
+34 -127
target/i386/cpu.c
··· 338 338 } 339 339 } 340 340 341 - /* 342 - * Definitions used for building CPUID Leaf 0x8000001D and 0x8000001E 343 - * Please refer to the AMD64 Architecture Programmer’s Manual Volume 3. 344 - * Define the constants to build the cpu topology. Right now, TOPOEXT 345 - * feature is enabled only on EPYC. So, these constants are based on 346 - * EPYC supported configurations. We may need to handle the cases if 347 - * these values change in future. 348 - */ 349 - /* Maximum core complexes in a node */ 350 - #define MAX_CCX 2 351 - /* Maximum cores in a core complex */ 352 - #define MAX_CORES_IN_CCX 4 353 - /* Maximum cores in a node */ 354 - #define MAX_CORES_IN_NODE 8 355 - /* Maximum nodes in a socket */ 356 - #define MAX_NODES_PER_SOCKET 4 357 - 358 - /* 359 - * Figure out the number of nodes required to build this config. 360 - * Max cores in a node is 8 361 - */ 362 - static int nodes_in_socket(int nr_cores) 363 - { 364 - int nodes; 365 - 366 - nodes = DIV_ROUND_UP(nr_cores, MAX_CORES_IN_NODE); 367 - 368 - /* Hardware does not support config with 3 nodes, return 4 in that case */ 369 - return (nodes == 3) ? 4 : nodes; 370 - } 371 - 372 - /* 373 - * Decide the number of cores in a core complex with the given nr_cores using 374 - * following set constants MAX_CCX, MAX_CORES_IN_CCX, MAX_CORES_IN_NODE and 375 - * MAX_NODES_PER_SOCKET. Maintain symmetry as much as possible 376 - * L3 cache is shared across all cores in a core complex. So, this will also 377 - * tell us how many cores are sharing the L3 cache. 378 - */ 379 - static int cores_in_core_complex(int nr_cores) 380 - { 381 - int nodes; 382 - 383 - /* Check if we can fit all the cores in one core complex */ 384 - if (nr_cores <= MAX_CORES_IN_CCX) { 385 - return nr_cores; 386 - } 387 - /* Get the number of nodes required to build this config */ 388 - nodes = nodes_in_socket(nr_cores); 389 - 390 - /* 391 - * Divide the cores accros all the core complexes 392 - * Return rounded up value 393 - */ 394 - return DIV_ROUND_UP(nr_cores, nodes * MAX_CCX); 395 - } 396 - 397 341 /* Encode cache info for CPUID[8000001D] */ 398 - static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, CPUState *cs, 399 - uint32_t *eax, uint32_t *ebx, 400 - uint32_t *ecx, uint32_t *edx) 342 + static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, 343 + X86CPUTopoInfo *topo_info, 344 + uint32_t *eax, uint32_t *ebx, 345 + uint32_t *ecx, uint32_t *edx) 401 346 { 402 347 uint32_t l3_cores; 348 + unsigned nodes = MAX(topo_info->nodes_per_pkg, 1); 349 + 403 350 assert(cache->size == cache->line_size * cache->associativity * 404 351 cache->partitions * cache->sets); 405 352 ··· 408 355 409 356 /* L3 is shared among multiple cores */ 410 357 if (cache->level == 3) { 411 - l3_cores = cores_in_core_complex(cs->nr_cores); 412 - *eax |= ((l3_cores * cs->nr_threads) - 1) << 14; 358 + l3_cores = DIV_ROUND_UP((topo_info->dies_per_pkg * 359 + topo_info->cores_per_die * 360 + topo_info->threads_per_core), 361 + nodes); 362 + *eax |= (l3_cores - 1) << 14; 413 363 } else { 414 - *eax |= ((cs->nr_threads - 1) << 14); 364 + *eax |= ((topo_info->threads_per_core - 1) << 14); 415 365 } 416 366 417 367 assert(cache->line_size > 0); ··· 431 381 (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0); 432 382 } 433 383 434 - /* Data structure to hold the configuration info for a given core index */ 435 - struct core_topology { 436 - /* core complex id of the current core index */ 437 - int ccx_id; 438 - /* 439 - * Adjusted core index for this core in the topology 440 - * This can be 0,1,2,3 with max 4 cores in a core complex 441 - */ 442 - int core_id; 443 - /* Node id for this core index */ 444 - int node_id; 445 - /* Number of nodes in this config */ 446 - int num_nodes; 447 - }; 448 - 449 - /* 450 - * Build the configuration closely match the EPYC hardware. Using the EPYC 451 - * hardware configuration values (MAX_CCX, MAX_CORES_IN_CCX, MAX_CORES_IN_NODE) 452 - * right now. This could change in future. 453 - * nr_cores : Total number of cores in the config 454 - * core_id : Core index of the current CPU 455 - * topo : Data structure to hold all the config info for this core index 456 - */ 457 - static void build_core_topology(int nr_cores, int core_id, 458 - struct core_topology *topo) 459 - { 460 - int nodes, cores_in_ccx; 461 - 462 - /* First get the number of nodes required */ 463 - nodes = nodes_in_socket(nr_cores); 464 - 465 - cores_in_ccx = cores_in_core_complex(nr_cores); 466 - 467 - topo->node_id = core_id / (cores_in_ccx * MAX_CCX); 468 - topo->ccx_id = (core_id % (cores_in_ccx * MAX_CCX)) / cores_in_ccx; 469 - topo->core_id = core_id % cores_in_ccx; 470 - topo->num_nodes = nodes; 471 - } 472 - 473 384 /* Encode cache info for CPUID[8000001E] */ 474 - static void encode_topo_cpuid8000001e(CPUState *cs, X86CPU *cpu, 385 + static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, 475 386 uint32_t *eax, uint32_t *ebx, 476 387 uint32_t *ecx, uint32_t *edx) 477 388 { 478 - struct core_topology topo = {0}; 479 - unsigned long nodes; 389 + X86CPUTopoIDs topo_ids = {0}; 390 + unsigned long nodes = MAX(topo_info->nodes_per_pkg, 1); 480 391 int shift; 481 392 482 - build_core_topology(cs->nr_cores, cpu->core_id, &topo); 393 + x86_topo_ids_from_apicid_epyc(cpu->apic_id, topo_info, &topo_ids); 394 + 483 395 *eax = cpu->apic_id; 484 396 /* 485 397 * CPUID_Fn8000001E_EBX ··· 496 408 * 3 Core complex id 497 409 * 1:0 Core id 498 410 */ 499 - if (cs->nr_threads - 1) { 500 - *ebx = ((cs->nr_threads - 1) << 8) | (topo.node_id << 3) | 501 - (topo.ccx_id << 2) | topo.core_id; 502 - } else { 503 - *ebx = (topo.node_id << 4) | (topo.ccx_id << 3) | topo.core_id; 504 - } 411 + *ebx = ((topo_info->threads_per_core - 1) << 8) | (topo_ids.node_id << 3) | 412 + (topo_ids.core_id); 505 413 /* 506 414 * CPUID_Fn8000001E_ECX 507 415 * 31:11 Reserved ··· 510 418 * 2 Socket id 511 419 * 1:0 Node id 512 420 */ 513 - if (topo.num_nodes <= 4) { 514 - *ecx = ((topo.num_nodes - 1) << 8) | (cpu->socket_id << 2) | 515 - topo.node_id; 421 + if (nodes <= 4) { 422 + *ecx = ((nodes - 1) << 8) | (topo_ids.pkg_id << 2) | topo_ids.node_id; 516 423 } else { 517 424 /* 518 425 * Node id fix up. Actual hardware supports up to 4 nodes. But with ··· 527 434 * number of nodes. find_last_bit returns last set bit(0 based). Left 528 435 * shift(+1) the socket id to represent all the nodes. 529 436 */ 530 - nodes = topo.num_nodes - 1; 437 + nodes -= 1; 531 438 shift = find_last_bit(&nodes, 8); 532 - *ecx = ((topo.num_nodes - 1) << 8) | (cpu->socket_id << (shift + 1)) | 533 - topo.node_id; 439 + *ecx = (nodes << 8) | (topo_ids.pkg_id << (shift + 1)) | 440 + topo_ids.node_id; 534 441 } 535 442 *edx = 0; 536 443 } ··· 5499 5406 uint32_t signature[3]; 5500 5407 X86CPUTopoInfo topo_info; 5501 5408 5409 + topo_info.nodes_per_pkg = env->nr_nodes; 5502 5410 topo_info.dies_per_pkg = env->nr_dies; 5503 5411 topo_info.cores_per_die = cs->nr_cores; 5504 5412 topo_info.threads_per_core = cs->nr_threads; ··· 5918 5826 } 5919 5827 switch (count) { 5920 5828 case 0: /* L1 dcache info */ 5921 - encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, cs, 5922 - eax, ebx, ecx, edx); 5829 + encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, 5830 + &topo_info, eax, ebx, ecx, edx); 5923 5831 break; 5924 5832 case 1: /* L1 icache info */ 5925 - encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, cs, 5926 - eax, ebx, ecx, edx); 5833 + encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, 5834 + &topo_info, eax, ebx, ecx, edx); 5927 5835 break; 5928 5836 case 2: /* L2 cache info */ 5929 - encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, cs, 5930 - eax, ebx, ecx, edx); 5837 + encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, 5838 + &topo_info, eax, ebx, ecx, edx); 5931 5839 break; 5932 5840 case 3: /* L3 cache info */ 5933 - encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, cs, 5934 - eax, ebx, ecx, edx); 5841 + encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, 5842 + &topo_info, eax, ebx, ecx, edx); 5935 5843 break; 5936 5844 default: /* end of info */ 5937 5845 *eax = *ebx = *ecx = *edx = 0; ··· 5940 5848 break; 5941 5849 case 0x8000001E: 5942 5850 assert(cpu->core_id <= 255); 5943 - encode_topo_cpuid8000001e(cs, cpu, 5944 - eax, ebx, ecx, edx); 5851 + encode_topo_cpuid8000001e(&topo_info, cpu, eax, ebx, ecx, edx); 5945 5852 break; 5946 5853 case 0xC0000000: 5947 5854 *eax = env->cpuid_xlevel2;