qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio
at jcs-hda-dma 3945 lines 116 kB view raw
1/* 2 * Virtual page mapping 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19#include "qemu/osdep.h" 20#include "qapi/error.h" 21 22#include "qemu/cutils.h" 23#include "cpu.h" 24#include "exec/exec-all.h" 25#include "exec/target_page.h" 26#include "tcg.h" 27#include "hw/qdev-core.h" 28#include "hw/qdev-properties.h" 29#if !defined(CONFIG_USER_ONLY) 30#include "hw/boards.h" 31#include "hw/xen/xen.h" 32#endif 33#include "sysemu/kvm.h" 34#include "sysemu/sysemu.h" 35#include "qemu/timer.h" 36#include "qemu/config-file.h" 37#include "qemu/error-report.h" 38#if defined(CONFIG_USER_ONLY) 39#include "qemu.h" 40#else /* !CONFIG_USER_ONLY */ 41#include "hw/hw.h" 42#include "exec/memory.h" 43#include "exec/ioport.h" 44#include "sysemu/dma.h" 45#include "sysemu/numa.h" 46#include "sysemu/hw_accel.h" 47#include "exec/address-spaces.h" 48#include "sysemu/xen-mapcache.h" 49#include "trace-root.h" 50 51#ifdef CONFIG_FALLOCATE_PUNCH_HOLE 52#include <linux/falloc.h> 53#endif 54 55#endif 56#include "qemu/rcu_queue.h" 57#include "qemu/main-loop.h" 58#include "translate-all.h" 59#include "sysemu/replay.h" 60 61#include "exec/memory-internal.h" 62#include "exec/ram_addr.h" 63#include "exec/log.h" 64 65#include "migration/vmstate.h" 66 67#include "qemu/range.h" 68#ifndef _WIN32 69#include "qemu/mmap-alloc.h" 70#endif 71 72#include "monitor/monitor.h" 73 74//#define DEBUG_SUBPAGE 75 76#if !defined(CONFIG_USER_ONLY) 77/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes 78 * are protected by the ramlist lock. 79 */ 80RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) }; 81 82static MemoryRegion *system_memory; 83static MemoryRegion *system_io; 84 85AddressSpace address_space_io; 86AddressSpace address_space_memory; 87 88MemoryRegion io_mem_rom, io_mem_notdirty; 89static MemoryRegion io_mem_unassigned; 90 91/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ 92#define RAM_PREALLOC (1 << 0) 93 94/* RAM is mmap-ed with MAP_SHARED */ 95#define RAM_SHARED (1 << 1) 96 97/* Only a portion of RAM (used_length) is actually used, and migrated. 98 * This used_length size can change across reboots. 99 */ 100#define RAM_RESIZEABLE (1 << 2) 101 102/* UFFDIO_ZEROPAGE is available on this RAMBlock to atomically 103 * zero the page and wake waiting processes. 104 * (Set during postcopy) 105 */ 106#define RAM_UF_ZEROPAGE (1 << 3) 107#endif 108 109#ifdef TARGET_PAGE_BITS_VARY 110int target_page_bits; 111bool target_page_bits_decided; 112#endif 113 114struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); 115/* current CPU in the current thread. It is only valid inside 116 cpu_exec() */ 117__thread CPUState *current_cpu; 118/* 0 = Do not count executed instructions. 119 1 = Precise instruction counting. 120 2 = Adaptive rate instruction counting. */ 121int use_icount; 122 123uintptr_t qemu_host_page_size; 124intptr_t qemu_host_page_mask; 125 126bool set_preferred_target_page_bits(int bits) 127{ 128 /* The target page size is the lowest common denominator for all 129 * the CPUs in the system, so we can only make it smaller, never 130 * larger. And we can't make it smaller once we've committed to 131 * a particular size. 132 */ 133#ifdef TARGET_PAGE_BITS_VARY 134 assert(bits >= TARGET_PAGE_BITS_MIN); 135 if (target_page_bits == 0 || target_page_bits > bits) { 136 if (target_page_bits_decided) { 137 return false; 138 } 139 target_page_bits = bits; 140 } 141#endif 142 return true; 143} 144 145#if !defined(CONFIG_USER_ONLY) 146 147static void finalize_target_page_bits(void) 148{ 149#ifdef TARGET_PAGE_BITS_VARY 150 if (target_page_bits == 0) { 151 target_page_bits = TARGET_PAGE_BITS_MIN; 152 } 153 target_page_bits_decided = true; 154#endif 155} 156 157typedef struct PhysPageEntry PhysPageEntry; 158 159struct PhysPageEntry { 160 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */ 161 uint32_t skip : 6; 162 /* index into phys_sections (!skip) or phys_map_nodes (skip) */ 163 uint32_t ptr : 26; 164}; 165 166#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6) 167 168/* Size of the L2 (and L3, etc) page tables. */ 169#define ADDR_SPACE_BITS 64 170 171#define P_L2_BITS 9 172#define P_L2_SIZE (1 << P_L2_BITS) 173 174#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1) 175 176typedef PhysPageEntry Node[P_L2_SIZE]; 177 178typedef struct PhysPageMap { 179 struct rcu_head rcu; 180 181 unsigned sections_nb; 182 unsigned sections_nb_alloc; 183 unsigned nodes_nb; 184 unsigned nodes_nb_alloc; 185 Node *nodes; 186 MemoryRegionSection *sections; 187} PhysPageMap; 188 189struct AddressSpaceDispatch { 190 MemoryRegionSection *mru_section; 191 /* This is a multi-level map on the physical address space. 192 * The bottom level has pointers to MemoryRegionSections. 193 */ 194 PhysPageEntry phys_map; 195 PhysPageMap map; 196}; 197 198#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK) 199typedef struct subpage_t { 200 MemoryRegion iomem; 201 FlatView *fv; 202 hwaddr base; 203 uint16_t sub_section[]; 204} subpage_t; 205 206#define PHYS_SECTION_UNASSIGNED 0 207#define PHYS_SECTION_NOTDIRTY 1 208#define PHYS_SECTION_ROM 2 209#define PHYS_SECTION_WATCH 3 210 211static void io_mem_init(void); 212static void memory_map_init(void); 213static void tcg_commit(MemoryListener *listener); 214 215static MemoryRegion io_mem_watch; 216 217/** 218 * CPUAddressSpace: all the information a CPU needs about an AddressSpace 219 * @cpu: the CPU whose AddressSpace this is 220 * @as: the AddressSpace itself 221 * @memory_dispatch: its dispatch pointer (cached, RCU protected) 222 * @tcg_as_listener: listener for tracking changes to the AddressSpace 223 */ 224struct CPUAddressSpace { 225 CPUState *cpu; 226 AddressSpace *as; 227 struct AddressSpaceDispatch *memory_dispatch; 228 MemoryListener tcg_as_listener; 229}; 230 231struct DirtyBitmapSnapshot { 232 ram_addr_t start; 233 ram_addr_t end; 234 unsigned long dirty[]; 235}; 236 237#endif 238 239#if !defined(CONFIG_USER_ONLY) 240 241static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes) 242{ 243 static unsigned alloc_hint = 16; 244 if (map->nodes_nb + nodes > map->nodes_nb_alloc) { 245 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint); 246 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes); 247 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc); 248 alloc_hint = map->nodes_nb_alloc; 249 } 250} 251 252static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf) 253{ 254 unsigned i; 255 uint32_t ret; 256 PhysPageEntry e; 257 PhysPageEntry *p; 258 259 ret = map->nodes_nb++; 260 p = map->nodes[ret]; 261 assert(ret != PHYS_MAP_NODE_NIL); 262 assert(ret != map->nodes_nb_alloc); 263 264 e.skip = leaf ? 0 : 1; 265 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL; 266 for (i = 0; i < P_L2_SIZE; ++i) { 267 memcpy(&p[i], &e, sizeof(e)); 268 } 269 return ret; 270} 271 272static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp, 273 hwaddr *index, hwaddr *nb, uint16_t leaf, 274 int level) 275{ 276 PhysPageEntry *p; 277 hwaddr step = (hwaddr)1 << (level * P_L2_BITS); 278 279 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) { 280 lp->ptr = phys_map_node_alloc(map, level == 0); 281 } 282 p = map->nodes[lp->ptr]; 283 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)]; 284 285 while (*nb && lp < &p[P_L2_SIZE]) { 286 if ((*index & (step - 1)) == 0 && *nb >= step) { 287 lp->skip = 0; 288 lp->ptr = leaf; 289 *index += step; 290 *nb -= step; 291 } else { 292 phys_page_set_level(map, lp, index, nb, leaf, level - 1); 293 } 294 ++lp; 295 } 296} 297 298static void phys_page_set(AddressSpaceDispatch *d, 299 hwaddr index, hwaddr nb, 300 uint16_t leaf) 301{ 302 /* Wildly overreserve - it doesn't matter much. */ 303 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS); 304 305 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1); 306} 307 308/* Compact a non leaf page entry. Simply detect that the entry has a single child, 309 * and update our entry so we can skip it and go directly to the destination. 310 */ 311static void phys_page_compact(PhysPageEntry *lp, Node *nodes) 312{ 313 unsigned valid_ptr = P_L2_SIZE; 314 int valid = 0; 315 PhysPageEntry *p; 316 int i; 317 318 if (lp->ptr == PHYS_MAP_NODE_NIL) { 319 return; 320 } 321 322 p = nodes[lp->ptr]; 323 for (i = 0; i < P_L2_SIZE; i++) { 324 if (p[i].ptr == PHYS_MAP_NODE_NIL) { 325 continue; 326 } 327 328 valid_ptr = i; 329 valid++; 330 if (p[i].skip) { 331 phys_page_compact(&p[i], nodes); 332 } 333 } 334 335 /* We can only compress if there's only one child. */ 336 if (valid != 1) { 337 return; 338 } 339 340 assert(valid_ptr < P_L2_SIZE); 341 342 /* Don't compress if it won't fit in the # of bits we have. */ 343 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) { 344 return; 345 } 346 347 lp->ptr = p[valid_ptr].ptr; 348 if (!p[valid_ptr].skip) { 349 /* If our only child is a leaf, make this a leaf. */ 350 /* By design, we should have made this node a leaf to begin with so we 351 * should never reach here. 352 * But since it's so simple to handle this, let's do it just in case we 353 * change this rule. 354 */ 355 lp->skip = 0; 356 } else { 357 lp->skip += p[valid_ptr].skip; 358 } 359} 360 361void address_space_dispatch_compact(AddressSpaceDispatch *d) 362{ 363 if (d->phys_map.skip) { 364 phys_page_compact(&d->phys_map, d->map.nodes); 365 } 366} 367 368static inline bool section_covers_addr(const MemoryRegionSection *section, 369 hwaddr addr) 370{ 371 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means 372 * the section must cover the entire address space. 373 */ 374 return int128_gethi(section->size) || 375 range_covers_byte(section->offset_within_address_space, 376 int128_getlo(section->size), addr); 377} 378 379static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr addr) 380{ 381 PhysPageEntry lp = d->phys_map, *p; 382 Node *nodes = d->map.nodes; 383 MemoryRegionSection *sections = d->map.sections; 384 hwaddr index = addr >> TARGET_PAGE_BITS; 385 int i; 386 387 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) { 388 if (lp.ptr == PHYS_MAP_NODE_NIL) { 389 return &sections[PHYS_SECTION_UNASSIGNED]; 390 } 391 p = nodes[lp.ptr]; 392 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)]; 393 } 394 395 if (section_covers_addr(&sections[lp.ptr], addr)) { 396 return &sections[lp.ptr]; 397 } else { 398 return &sections[PHYS_SECTION_UNASSIGNED]; 399 } 400} 401 402bool memory_region_is_unassigned(MemoryRegion *mr) 403{ 404 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device 405 && mr != &io_mem_watch; 406} 407 408/* Called from RCU critical section */ 409static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, 410 hwaddr addr, 411 bool resolve_subpage) 412{ 413 MemoryRegionSection *section = atomic_read(&d->mru_section); 414 subpage_t *subpage; 415 416 if (!section || section == &d->map.sections[PHYS_SECTION_UNASSIGNED] || 417 !section_covers_addr(section, addr)) { 418 section = phys_page_find(d, addr); 419 atomic_set(&d->mru_section, section); 420 } 421 if (resolve_subpage && section->mr->subpage) { 422 subpage = container_of(section->mr, subpage_t, iomem); 423 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]]; 424 } 425 return section; 426} 427 428/* Called from RCU critical section */ 429static MemoryRegionSection * 430address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat, 431 hwaddr *plen, bool resolve_subpage) 432{ 433 MemoryRegionSection *section; 434 MemoryRegion *mr; 435 Int128 diff; 436 437 section = address_space_lookup_region(d, addr, resolve_subpage); 438 /* Compute offset within MemoryRegionSection */ 439 addr -= section->offset_within_address_space; 440 441 /* Compute offset within MemoryRegion */ 442 *xlat = addr + section->offset_within_region; 443 444 mr = section->mr; 445 446 /* MMIO registers can be expected to perform full-width accesses based only 447 * on their address, without considering adjacent registers that could 448 * decode to completely different MemoryRegions. When such registers 449 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO 450 * regions overlap wildly. For this reason we cannot clamp the accesses 451 * here. 452 * 453 * If the length is small (as is the case for address_space_ldl/stl), 454 * everything works fine. If the incoming length is large, however, 455 * the caller really has to do the clamping through memory_access_size. 456 */ 457 if (memory_region_is_ram(mr)) { 458 diff = int128_sub(section->size, int128_make64(addr)); 459 *plen = int128_get64(int128_min(diff, int128_make64(*plen))); 460 } 461 return section; 462} 463 464/** 465 * flatview_do_translate - translate an address in FlatView 466 * 467 * @fv: the flat view that we want to translate on 468 * @addr: the address to be translated in above address space 469 * @xlat: the translated address offset within memory region. It 470 * cannot be @NULL. 471 * @plen_out: valid read/write length of the translated address. It 472 * can be @NULL when we don't care about it. 473 * @page_mask_out: page mask for the translated address. This 474 * should only be meaningful for IOMMU translated 475 * addresses, since there may be huge pages that this bit 476 * would tell. It can be @NULL if we don't care about it. 477 * @is_write: whether the translation operation is for write 478 * @is_mmio: whether this can be MMIO, set true if it can 479 * 480 * This function is called from RCU critical section 481 */ 482static MemoryRegionSection flatview_do_translate(FlatView *fv, 483 hwaddr addr, 484 hwaddr *xlat, 485 hwaddr *plen_out, 486 hwaddr *page_mask_out, 487 bool is_write, 488 bool is_mmio, 489 AddressSpace **target_as) 490{ 491 IOMMUTLBEntry iotlb; 492 MemoryRegionSection *section; 493 IOMMUMemoryRegion *iommu_mr; 494 IOMMUMemoryRegionClass *imrc; 495 hwaddr page_mask = (hwaddr)(-1); 496 hwaddr plen = (hwaddr)(-1); 497 498 if (plen_out) { 499 plen = *plen_out; 500 } 501 502 for (;;) { 503 section = address_space_translate_internal( 504 flatview_to_dispatch(fv), addr, &addr, 505 &plen, is_mmio); 506 507 iommu_mr = memory_region_get_iommu(section->mr); 508 if (!iommu_mr) { 509 break; 510 } 511 imrc = memory_region_get_iommu_class_nocheck(iommu_mr); 512 513 iotlb = imrc->translate(iommu_mr, addr, is_write ? 514 IOMMU_WO : IOMMU_RO); 515 addr = ((iotlb.translated_addr & ~iotlb.addr_mask) 516 | (addr & iotlb.addr_mask)); 517 page_mask &= iotlb.addr_mask; 518 plen = MIN(plen, (addr | iotlb.addr_mask) - addr + 1); 519 if (!(iotlb.perm & (1 << is_write))) { 520 goto translate_fail; 521 } 522 523 fv = address_space_to_flatview(iotlb.target_as); 524 *target_as = iotlb.target_as; 525 } 526 527 *xlat = addr; 528 529 if (page_mask == (hwaddr)(-1)) { 530 /* Not behind an IOMMU, use default page size. */ 531 page_mask = ~TARGET_PAGE_MASK; 532 } 533 534 if (page_mask_out) { 535 *page_mask_out = page_mask; 536 } 537 538 if (plen_out) { 539 *plen_out = plen; 540 } 541 542 return *section; 543 544translate_fail: 545 return (MemoryRegionSection) { .mr = &io_mem_unassigned }; 546} 547 548/* Called from RCU critical section */ 549IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr, 550 bool is_write) 551{ 552 MemoryRegionSection section; 553 hwaddr xlat, page_mask; 554 555 /* 556 * This can never be MMIO, and we don't really care about plen, 557 * but page mask. 558 */ 559 section = flatview_do_translate(address_space_to_flatview(as), addr, &xlat, 560 NULL, &page_mask, is_write, false, &as); 561 562 /* Illegal translation */ 563 if (section.mr == &io_mem_unassigned) { 564 goto iotlb_fail; 565 } 566 567 /* Convert memory region offset into address space offset */ 568 xlat += section.offset_within_address_space - 569 section.offset_within_region; 570 571 return (IOMMUTLBEntry) { 572 .target_as = as, 573 .iova = addr & ~page_mask, 574 .translated_addr = xlat & ~page_mask, 575 .addr_mask = page_mask, 576 /* IOTLBs are for DMAs, and DMA only allows on RAMs. */ 577 .perm = IOMMU_RW, 578 }; 579 580iotlb_fail: 581 return (IOMMUTLBEntry) {0}; 582} 583 584/* Called from RCU critical section */ 585MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat, 586 hwaddr *plen, bool is_write) 587{ 588 MemoryRegion *mr; 589 MemoryRegionSection section; 590 AddressSpace *as = NULL; 591 592 /* This can be MMIO, so setup MMIO bit. */ 593 section = flatview_do_translate(fv, addr, xlat, plen, NULL, 594 is_write, true, &as); 595 mr = section.mr; 596 597 if (xen_enabled() && memory_access_is_direct(mr, is_write)) { 598 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr; 599 *plen = MIN(page, *plen); 600 } 601 602 return mr; 603} 604 605/* Called from RCU critical section */ 606MemoryRegionSection * 607address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, 608 hwaddr *xlat, hwaddr *plen) 609{ 610 MemoryRegionSection *section; 611 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch); 612 613 section = address_space_translate_internal(d, addr, xlat, plen, false); 614 615 assert(!memory_region_is_iommu(section->mr)); 616 return section; 617} 618#endif 619 620#if !defined(CONFIG_USER_ONLY) 621 622static int cpu_common_post_load(void *opaque, int version_id) 623{ 624 CPUState *cpu = opaque; 625 626 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the 627 version_id is increased. */ 628 cpu->interrupt_request &= ~0x01; 629 tlb_flush(cpu); 630 631 /* loadvm has just updated the content of RAM, bypassing the 632 * usual mechanisms that ensure we flush TBs for writes to 633 * memory we've translated code from. So we must flush all TBs, 634 * which will now be stale. 635 */ 636 tb_flush(cpu); 637 638 return 0; 639} 640 641static int cpu_common_pre_load(void *opaque) 642{ 643 CPUState *cpu = opaque; 644 645 cpu->exception_index = -1; 646 647 return 0; 648} 649 650static bool cpu_common_exception_index_needed(void *opaque) 651{ 652 CPUState *cpu = opaque; 653 654 return tcg_enabled() && cpu->exception_index != -1; 655} 656 657static const VMStateDescription vmstate_cpu_common_exception_index = { 658 .name = "cpu_common/exception_index", 659 .version_id = 1, 660 .minimum_version_id = 1, 661 .needed = cpu_common_exception_index_needed, 662 .fields = (VMStateField[]) { 663 VMSTATE_INT32(exception_index, CPUState), 664 VMSTATE_END_OF_LIST() 665 } 666}; 667 668static bool cpu_common_crash_occurred_needed(void *opaque) 669{ 670 CPUState *cpu = opaque; 671 672 return cpu->crash_occurred; 673} 674 675static const VMStateDescription vmstate_cpu_common_crash_occurred = { 676 .name = "cpu_common/crash_occurred", 677 .version_id = 1, 678 .minimum_version_id = 1, 679 .needed = cpu_common_crash_occurred_needed, 680 .fields = (VMStateField[]) { 681 VMSTATE_BOOL(crash_occurred, CPUState), 682 VMSTATE_END_OF_LIST() 683 } 684}; 685 686const VMStateDescription vmstate_cpu_common = { 687 .name = "cpu_common", 688 .version_id = 1, 689 .minimum_version_id = 1, 690 .pre_load = cpu_common_pre_load, 691 .post_load = cpu_common_post_load, 692 .fields = (VMStateField[]) { 693 VMSTATE_UINT32(halted, CPUState), 694 VMSTATE_UINT32(interrupt_request, CPUState), 695 VMSTATE_END_OF_LIST() 696 }, 697 .subsections = (const VMStateDescription*[]) { 698 &vmstate_cpu_common_exception_index, 699 &vmstate_cpu_common_crash_occurred, 700 NULL 701 } 702}; 703 704#endif 705 706CPUState *qemu_get_cpu(int index) 707{ 708 CPUState *cpu; 709 710 CPU_FOREACH(cpu) { 711 if (cpu->cpu_index == index) { 712 return cpu; 713 } 714 } 715 716 return NULL; 717} 718 719#if !defined(CONFIG_USER_ONLY) 720void cpu_address_space_init(CPUState *cpu, int asidx, 721 const char *prefix, MemoryRegion *mr) 722{ 723 CPUAddressSpace *newas; 724 AddressSpace *as = g_new0(AddressSpace, 1); 725 char *as_name; 726 727 assert(mr); 728 as_name = g_strdup_printf("%s-%d", prefix, cpu->cpu_index); 729 address_space_init(as, mr, as_name); 730 g_free(as_name); 731 732 /* Target code should have set num_ases before calling us */ 733 assert(asidx < cpu->num_ases); 734 735 if (asidx == 0) { 736 /* address space 0 gets the convenience alias */ 737 cpu->as = as; 738 } 739 740 /* KVM cannot currently support multiple address spaces. */ 741 assert(asidx == 0 || !kvm_enabled()); 742 743 if (!cpu->cpu_ases) { 744 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases); 745 } 746 747 newas = &cpu->cpu_ases[asidx]; 748 newas->cpu = cpu; 749 newas->as = as; 750 if (tcg_enabled()) { 751 newas->tcg_as_listener.commit = tcg_commit; 752 memory_listener_register(&newas->tcg_as_listener, as); 753 } 754} 755 756AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx) 757{ 758 /* Return the AddressSpace corresponding to the specified index */ 759 return cpu->cpu_ases[asidx].as; 760} 761#endif 762 763void cpu_exec_unrealizefn(CPUState *cpu) 764{ 765 CPUClass *cc = CPU_GET_CLASS(cpu); 766 767 cpu_list_remove(cpu); 768 769 if (cc->vmsd != NULL) { 770 vmstate_unregister(NULL, cc->vmsd, cpu); 771 } 772 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) { 773 vmstate_unregister(NULL, &vmstate_cpu_common, cpu); 774 } 775} 776 777Property cpu_common_props[] = { 778#ifndef CONFIG_USER_ONLY 779 /* Create a memory property for softmmu CPU object, 780 * so users can wire up its memory. (This can't go in qom/cpu.c 781 * because that file is compiled only once for both user-mode 782 * and system builds.) The default if no link is set up is to use 783 * the system address space. 784 */ 785 DEFINE_PROP_LINK("memory", CPUState, memory, TYPE_MEMORY_REGION, 786 MemoryRegion *), 787#endif 788 DEFINE_PROP_END_OF_LIST(), 789}; 790 791void cpu_exec_initfn(CPUState *cpu) 792{ 793 cpu->as = NULL; 794 cpu->num_ases = 0; 795 796#ifndef CONFIG_USER_ONLY 797 cpu->thread_id = qemu_get_thread_id(); 798 cpu->memory = system_memory; 799 object_ref(OBJECT(cpu->memory)); 800#endif 801} 802 803void cpu_exec_realizefn(CPUState *cpu, Error **errp) 804{ 805 CPUClass *cc = CPU_GET_CLASS(cpu); 806 static bool tcg_target_initialized; 807 808 cpu_list_add(cpu); 809 810 if (tcg_enabled() && !tcg_target_initialized) { 811 tcg_target_initialized = true; 812 cc->tcg_initialize(); 813 } 814 815#ifndef CONFIG_USER_ONLY 816 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) { 817 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu); 818 } 819 if (cc->vmsd != NULL) { 820 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu); 821 } 822#endif 823} 824 825const char *parse_cpu_model(const char *cpu_model) 826{ 827 ObjectClass *oc; 828 CPUClass *cc; 829 gchar **model_pieces; 830 const char *cpu_type; 831 832 model_pieces = g_strsplit(cpu_model, ",", 2); 833 834 oc = cpu_class_by_name(CPU_RESOLVING_TYPE, model_pieces[0]); 835 if (oc == NULL) { 836 error_report("unable to find CPU model '%s'", model_pieces[0]); 837 g_strfreev(model_pieces); 838 exit(EXIT_FAILURE); 839 } 840 841 cpu_type = object_class_get_name(oc); 842 cc = CPU_CLASS(oc); 843 cc->parse_features(cpu_type, model_pieces[1], &error_fatal); 844 g_strfreev(model_pieces); 845 return cpu_type; 846} 847 848#if defined(CONFIG_USER_ONLY) 849static void breakpoint_invalidate(CPUState *cpu, target_ulong pc) 850{ 851 mmap_lock(); 852 tb_lock(); 853 tb_invalidate_phys_page_range(pc, pc + 1, 0); 854 tb_unlock(); 855 mmap_unlock(); 856} 857#else 858static void breakpoint_invalidate(CPUState *cpu, target_ulong pc) 859{ 860 MemTxAttrs attrs; 861 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs); 862 int asidx = cpu_asidx_from_attrs(cpu, attrs); 863 if (phys != -1) { 864 /* Locks grabbed by tb_invalidate_phys_addr */ 865 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as, 866 phys | (pc & ~TARGET_PAGE_MASK)); 867 } 868} 869#endif 870 871#if defined(CONFIG_USER_ONLY) 872void cpu_watchpoint_remove_all(CPUState *cpu, int mask) 873 874{ 875} 876 877int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len, 878 int flags) 879{ 880 return -ENOSYS; 881} 882 883void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint) 884{ 885} 886 887int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len, 888 int flags, CPUWatchpoint **watchpoint) 889{ 890 return -ENOSYS; 891} 892#else 893/* Add a watchpoint. */ 894int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len, 895 int flags, CPUWatchpoint **watchpoint) 896{ 897 CPUWatchpoint *wp; 898 899 /* forbid ranges which are empty or run off the end of the address space */ 900 if (len == 0 || (addr + len - 1) < addr) { 901 error_report("tried to set invalid watchpoint at %" 902 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len); 903 return -EINVAL; 904 } 905 wp = g_malloc(sizeof(*wp)); 906 907 wp->vaddr = addr; 908 wp->len = len; 909 wp->flags = flags; 910 911 /* keep all GDB-injected watchpoints in front */ 912 if (flags & BP_GDB) { 913 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry); 914 } else { 915 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry); 916 } 917 918 tlb_flush_page(cpu, addr); 919 920 if (watchpoint) 921 *watchpoint = wp; 922 return 0; 923} 924 925/* Remove a specific watchpoint. */ 926int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len, 927 int flags) 928{ 929 CPUWatchpoint *wp; 930 931 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { 932 if (addr == wp->vaddr && len == wp->len 933 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) { 934 cpu_watchpoint_remove_by_ref(cpu, wp); 935 return 0; 936 } 937 } 938 return -ENOENT; 939} 940 941/* Remove a specific watchpoint by reference. */ 942void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint) 943{ 944 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry); 945 946 tlb_flush_page(cpu, watchpoint->vaddr); 947 948 g_free(watchpoint); 949} 950 951/* Remove all matching watchpoints. */ 952void cpu_watchpoint_remove_all(CPUState *cpu, int mask) 953{ 954 CPUWatchpoint *wp, *next; 955 956 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) { 957 if (wp->flags & mask) { 958 cpu_watchpoint_remove_by_ref(cpu, wp); 959 } 960 } 961} 962 963/* Return true if this watchpoint address matches the specified 964 * access (ie the address range covered by the watchpoint overlaps 965 * partially or completely with the address range covered by the 966 * access). 967 */ 968static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp, 969 vaddr addr, 970 vaddr len) 971{ 972 /* We know the lengths are non-zero, but a little caution is 973 * required to avoid errors in the case where the range ends 974 * exactly at the top of the address space and so addr + len 975 * wraps round to zero. 976 */ 977 vaddr wpend = wp->vaddr + wp->len - 1; 978 vaddr addrend = addr + len - 1; 979 980 return !(addr > wpend || wp->vaddr > addrend); 981} 982 983#endif 984 985/* Add a breakpoint. */ 986int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags, 987 CPUBreakpoint **breakpoint) 988{ 989 CPUBreakpoint *bp; 990 991 bp = g_malloc(sizeof(*bp)); 992 993 bp->pc = pc; 994 bp->flags = flags; 995 996 /* keep all GDB-injected breakpoints in front */ 997 if (flags & BP_GDB) { 998 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry); 999 } else { 1000 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry); 1001 } 1002 1003 breakpoint_invalidate(cpu, pc); 1004 1005 if (breakpoint) { 1006 *breakpoint = bp; 1007 } 1008 return 0; 1009} 1010 1011/* Remove a specific breakpoint. */ 1012int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags) 1013{ 1014 CPUBreakpoint *bp; 1015 1016 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) { 1017 if (bp->pc == pc && bp->flags == flags) { 1018 cpu_breakpoint_remove_by_ref(cpu, bp); 1019 return 0; 1020 } 1021 } 1022 return -ENOENT; 1023} 1024 1025/* Remove a specific breakpoint by reference. */ 1026void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint) 1027{ 1028 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry); 1029 1030 breakpoint_invalidate(cpu, breakpoint->pc); 1031 1032 g_free(breakpoint); 1033} 1034 1035/* Remove all matching breakpoints. */ 1036void cpu_breakpoint_remove_all(CPUState *cpu, int mask) 1037{ 1038 CPUBreakpoint *bp, *next; 1039 1040 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) { 1041 if (bp->flags & mask) { 1042 cpu_breakpoint_remove_by_ref(cpu, bp); 1043 } 1044 } 1045} 1046 1047/* enable or disable single step mode. EXCP_DEBUG is returned by the 1048 CPU loop after each instruction */ 1049void cpu_single_step(CPUState *cpu, int enabled) 1050{ 1051 if (cpu->singlestep_enabled != enabled) { 1052 cpu->singlestep_enabled = enabled; 1053 if (kvm_enabled()) { 1054 kvm_update_guest_debug(cpu, 0); 1055 } else { 1056 /* must flush all the translated code to avoid inconsistencies */ 1057 /* XXX: only flush what is necessary */ 1058 tb_flush(cpu); 1059 } 1060 } 1061} 1062 1063void cpu_abort(CPUState *cpu, const char *fmt, ...) 1064{ 1065 va_list ap; 1066 va_list ap2; 1067 1068 va_start(ap, fmt); 1069 va_copy(ap2, ap); 1070 fprintf(stderr, "qemu: fatal: "); 1071 vfprintf(stderr, fmt, ap); 1072 fprintf(stderr, "\n"); 1073 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP); 1074 if (qemu_log_separate()) { 1075 qemu_log_lock(); 1076 qemu_log("qemu: fatal: "); 1077 qemu_log_vprintf(fmt, ap2); 1078 qemu_log("\n"); 1079 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP); 1080 qemu_log_flush(); 1081 qemu_log_unlock(); 1082 qemu_log_close(); 1083 } 1084 va_end(ap2); 1085 va_end(ap); 1086 replay_finish(); 1087#if defined(CONFIG_USER_ONLY) 1088 { 1089 struct sigaction act; 1090 sigfillset(&act.sa_mask); 1091 act.sa_handler = SIG_DFL; 1092 sigaction(SIGABRT, &act, NULL); 1093 } 1094#endif 1095 abort(); 1096} 1097 1098#if !defined(CONFIG_USER_ONLY) 1099/* Called from RCU critical section */ 1100static RAMBlock *qemu_get_ram_block(ram_addr_t addr) 1101{ 1102 RAMBlock *block; 1103 1104 block = atomic_rcu_read(&ram_list.mru_block); 1105 if (block && addr - block->offset < block->max_length) { 1106 return block; 1107 } 1108 RAMBLOCK_FOREACH(block) { 1109 if (addr - block->offset < block->max_length) { 1110 goto found; 1111 } 1112 } 1113 1114 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); 1115 abort(); 1116 1117found: 1118 /* It is safe to write mru_block outside the iothread lock. This 1119 * is what happens: 1120 * 1121 * mru_block = xxx 1122 * rcu_read_unlock() 1123 * xxx removed from list 1124 * rcu_read_lock() 1125 * read mru_block 1126 * mru_block = NULL; 1127 * call_rcu(reclaim_ramblock, xxx); 1128 * rcu_read_unlock() 1129 * 1130 * atomic_rcu_set is not needed here. The block was already published 1131 * when it was placed into the list. Here we're just making an extra 1132 * copy of the pointer. 1133 */ 1134 ram_list.mru_block = block; 1135 return block; 1136} 1137 1138static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length) 1139{ 1140 CPUState *cpu; 1141 ram_addr_t start1; 1142 RAMBlock *block; 1143 ram_addr_t end; 1144 1145 end = TARGET_PAGE_ALIGN(start + length); 1146 start &= TARGET_PAGE_MASK; 1147 1148 rcu_read_lock(); 1149 block = qemu_get_ram_block(start); 1150 assert(block == qemu_get_ram_block(end - 1)); 1151 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset); 1152 CPU_FOREACH(cpu) { 1153 tlb_reset_dirty(cpu, start1, length); 1154 } 1155 rcu_read_unlock(); 1156} 1157 1158/* Note: start and end must be within the same ram block. */ 1159bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, 1160 ram_addr_t length, 1161 unsigned client) 1162{ 1163 DirtyMemoryBlocks *blocks; 1164 unsigned long end, page; 1165 bool dirty = false; 1166 1167 if (length == 0) { 1168 return false; 1169 } 1170 1171 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 1172 page = start >> TARGET_PAGE_BITS; 1173 1174 rcu_read_lock(); 1175 1176 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 1177 1178 while (page < end) { 1179 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; 1180 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE; 1181 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset); 1182 1183 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx], 1184 offset, num); 1185 page += num; 1186 } 1187 1188 rcu_read_unlock(); 1189 1190 if (dirty && tcg_enabled()) { 1191 tlb_reset_dirty_range_all(start, length); 1192 } 1193 1194 return dirty; 1195} 1196 1197DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty 1198 (ram_addr_t start, ram_addr_t length, unsigned client) 1199{ 1200 DirtyMemoryBlocks *blocks; 1201 unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL); 1202 ram_addr_t first = QEMU_ALIGN_DOWN(start, align); 1203 ram_addr_t last = QEMU_ALIGN_UP(start + length, align); 1204 DirtyBitmapSnapshot *snap; 1205 unsigned long page, end, dest; 1206 1207 snap = g_malloc0(sizeof(*snap) + 1208 ((last - first) >> (TARGET_PAGE_BITS + 3))); 1209 snap->start = first; 1210 snap->end = last; 1211 1212 page = first >> TARGET_PAGE_BITS; 1213 end = last >> TARGET_PAGE_BITS; 1214 dest = 0; 1215 1216 rcu_read_lock(); 1217 1218 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 1219 1220 while (page < end) { 1221 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; 1222 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE; 1223 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset); 1224 1225 assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL))); 1226 assert(QEMU_IS_ALIGNED(num, (1 << BITS_PER_LEVEL))); 1227 offset >>= BITS_PER_LEVEL; 1228 1229 bitmap_copy_and_clear_atomic(snap->dirty + dest, 1230 blocks->blocks[idx] + offset, 1231 num); 1232 page += num; 1233 dest += num >> BITS_PER_LEVEL; 1234 } 1235 1236 rcu_read_unlock(); 1237 1238 if (tcg_enabled()) { 1239 tlb_reset_dirty_range_all(start, length); 1240 } 1241 1242 return snap; 1243} 1244 1245bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, 1246 ram_addr_t start, 1247 ram_addr_t length) 1248{ 1249 unsigned long page, end; 1250 1251 assert(start >= snap->start); 1252 assert(start + length <= snap->end); 1253 1254 end = TARGET_PAGE_ALIGN(start + length - snap->start) >> TARGET_PAGE_BITS; 1255 page = (start - snap->start) >> TARGET_PAGE_BITS; 1256 1257 while (page < end) { 1258 if (test_bit(page, snap->dirty)) { 1259 return true; 1260 } 1261 page++; 1262 } 1263 return false; 1264} 1265 1266/* Called from RCU critical section */ 1267hwaddr memory_region_section_get_iotlb(CPUState *cpu, 1268 MemoryRegionSection *section, 1269 target_ulong vaddr, 1270 hwaddr paddr, hwaddr xlat, 1271 int prot, 1272 target_ulong *address) 1273{ 1274 hwaddr iotlb; 1275 CPUWatchpoint *wp; 1276 1277 if (memory_region_is_ram(section->mr)) { 1278 /* Normal RAM. */ 1279 iotlb = memory_region_get_ram_addr(section->mr) + xlat; 1280 if (!section->readonly) { 1281 iotlb |= PHYS_SECTION_NOTDIRTY; 1282 } else { 1283 iotlb |= PHYS_SECTION_ROM; 1284 } 1285 } else { 1286 AddressSpaceDispatch *d; 1287 1288 d = flatview_to_dispatch(section->fv); 1289 iotlb = section - d->map.sections; 1290 iotlb += xlat; 1291 } 1292 1293 /* Make accesses to pages with watchpoints go via the 1294 watchpoint trap routines. */ 1295 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { 1296 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) { 1297 /* Avoid trapping reads of pages with a write breakpoint. */ 1298 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) { 1299 iotlb = PHYS_SECTION_WATCH + paddr; 1300 *address |= TLB_MMIO; 1301 break; 1302 } 1303 } 1304 } 1305 1306 return iotlb; 1307} 1308#endif /* defined(CONFIG_USER_ONLY) */ 1309 1310#if !defined(CONFIG_USER_ONLY) 1311 1312static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end, 1313 uint16_t section); 1314static subpage_t *subpage_init(FlatView *fv, hwaddr base); 1315 1316static void *(*phys_mem_alloc)(size_t size, uint64_t *align, bool shared) = 1317 qemu_anon_ram_alloc; 1318 1319/* 1320 * Set a custom physical guest memory alloator. 1321 * Accelerators with unusual needs may need this. Hopefully, we can 1322 * get rid of it eventually. 1323 */ 1324void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align, bool shared)) 1325{ 1326 phys_mem_alloc = alloc; 1327} 1328 1329static uint16_t phys_section_add(PhysPageMap *map, 1330 MemoryRegionSection *section) 1331{ 1332 /* The physical section number is ORed with a page-aligned 1333 * pointer to produce the iotlb entries. Thus it should 1334 * never overflow into the page-aligned value. 1335 */ 1336 assert(map->sections_nb < TARGET_PAGE_SIZE); 1337 1338 if (map->sections_nb == map->sections_nb_alloc) { 1339 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16); 1340 map->sections = g_renew(MemoryRegionSection, map->sections, 1341 map->sections_nb_alloc); 1342 } 1343 map->sections[map->sections_nb] = *section; 1344 memory_region_ref(section->mr); 1345 return map->sections_nb++; 1346} 1347 1348static void phys_section_destroy(MemoryRegion *mr) 1349{ 1350 bool have_sub_page = mr->subpage; 1351 1352 memory_region_unref(mr); 1353 1354 if (have_sub_page) { 1355 subpage_t *subpage = container_of(mr, subpage_t, iomem); 1356 object_unref(OBJECT(&subpage->iomem)); 1357 g_free(subpage); 1358 } 1359} 1360 1361static void phys_sections_free(PhysPageMap *map) 1362{ 1363 while (map->sections_nb > 0) { 1364 MemoryRegionSection *section = &map->sections[--map->sections_nb]; 1365 phys_section_destroy(section->mr); 1366 } 1367 g_free(map->sections); 1368 g_free(map->nodes); 1369} 1370 1371static void register_subpage(FlatView *fv, MemoryRegionSection *section) 1372{ 1373 AddressSpaceDispatch *d = flatview_to_dispatch(fv); 1374 subpage_t *subpage; 1375 hwaddr base = section->offset_within_address_space 1376 & TARGET_PAGE_MASK; 1377 MemoryRegionSection *existing = phys_page_find(d, base); 1378 MemoryRegionSection subsection = { 1379 .offset_within_address_space = base, 1380 .size = int128_make64(TARGET_PAGE_SIZE), 1381 }; 1382 hwaddr start, end; 1383 1384 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned); 1385 1386 if (!(existing->mr->subpage)) { 1387 subpage = subpage_init(fv, base); 1388 subsection.fv = fv; 1389 subsection.mr = &subpage->iomem; 1390 phys_page_set(d, base >> TARGET_PAGE_BITS, 1, 1391 phys_section_add(&d->map, &subsection)); 1392 } else { 1393 subpage = container_of(existing->mr, subpage_t, iomem); 1394 } 1395 start = section->offset_within_address_space & ~TARGET_PAGE_MASK; 1396 end = start + int128_get64(section->size) - 1; 1397 subpage_register(subpage, start, end, 1398 phys_section_add(&d->map, section)); 1399} 1400 1401 1402static void register_multipage(FlatView *fv, 1403 MemoryRegionSection *section) 1404{ 1405 AddressSpaceDispatch *d = flatview_to_dispatch(fv); 1406 hwaddr start_addr = section->offset_within_address_space; 1407 uint16_t section_index = phys_section_add(&d->map, section); 1408 uint64_t num_pages = int128_get64(int128_rshift(section->size, 1409 TARGET_PAGE_BITS)); 1410 1411 assert(num_pages); 1412 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index); 1413} 1414 1415void flatview_add_to_dispatch(FlatView *fv, MemoryRegionSection *section) 1416{ 1417 MemoryRegionSection now = *section, remain = *section; 1418 Int128 page_size = int128_make64(TARGET_PAGE_SIZE); 1419 1420 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) { 1421 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space) 1422 - now.offset_within_address_space; 1423 1424 now.size = int128_min(int128_make64(left), now.size); 1425 register_subpage(fv, &now); 1426 } else { 1427 now.size = int128_zero(); 1428 } 1429 while (int128_ne(remain.size, now.size)) { 1430 remain.size = int128_sub(remain.size, now.size); 1431 remain.offset_within_address_space += int128_get64(now.size); 1432 remain.offset_within_region += int128_get64(now.size); 1433 now = remain; 1434 if (int128_lt(remain.size, page_size)) { 1435 register_subpage(fv, &now); 1436 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) { 1437 now.size = page_size; 1438 register_subpage(fv, &now); 1439 } else { 1440 now.size = int128_and(now.size, int128_neg(page_size)); 1441 register_multipage(fv, &now); 1442 } 1443 } 1444} 1445 1446void qemu_flush_coalesced_mmio_buffer(void) 1447{ 1448 if (kvm_enabled()) 1449 kvm_flush_coalesced_mmio_buffer(); 1450} 1451 1452void qemu_mutex_lock_ramlist(void) 1453{ 1454 qemu_mutex_lock(&ram_list.mutex); 1455} 1456 1457void qemu_mutex_unlock_ramlist(void) 1458{ 1459 qemu_mutex_unlock(&ram_list.mutex); 1460} 1461 1462void ram_block_dump(Monitor *mon) 1463{ 1464 RAMBlock *block; 1465 char *psize; 1466 1467 rcu_read_lock(); 1468 monitor_printf(mon, "%24s %8s %18s %18s %18s\n", 1469 "Block Name", "PSize", "Offset", "Used", "Total"); 1470 RAMBLOCK_FOREACH(block) { 1471 psize = size_to_str(block->page_size); 1472 monitor_printf(mon, "%24s %8s 0x%016" PRIx64 " 0x%016" PRIx64 1473 " 0x%016" PRIx64 "\n", block->idstr, psize, 1474 (uint64_t)block->offset, 1475 (uint64_t)block->used_length, 1476 (uint64_t)block->max_length); 1477 g_free(psize); 1478 } 1479 rcu_read_unlock(); 1480} 1481 1482#ifdef __linux__ 1483/* 1484 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which 1485 * may or may not name the same files / on the same filesystem now as 1486 * when we actually open and map them. Iterate over the file 1487 * descriptors instead, and use qemu_fd_getpagesize(). 1488 */ 1489static int find_max_supported_pagesize(Object *obj, void *opaque) 1490{ 1491 char *mem_path; 1492 long *hpsize_min = opaque; 1493 1494 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { 1495 mem_path = object_property_get_str(obj, "mem-path", NULL); 1496 if (mem_path) { 1497 long hpsize = qemu_mempath_getpagesize(mem_path); 1498 g_free(mem_path); 1499 if (hpsize < *hpsize_min) { 1500 *hpsize_min = hpsize; 1501 } 1502 } else { 1503 *hpsize_min = getpagesize(); 1504 } 1505 } 1506 1507 return 0; 1508} 1509 1510long qemu_getrampagesize(void) 1511{ 1512 long hpsize = LONG_MAX; 1513 long mainrampagesize; 1514 Object *memdev_root; 1515 1516 if (mem_path) { 1517 mainrampagesize = qemu_mempath_getpagesize(mem_path); 1518 } else { 1519 mainrampagesize = getpagesize(); 1520 } 1521 1522 /* it's possible we have memory-backend objects with 1523 * hugepage-backed RAM. these may get mapped into system 1524 * address space via -numa parameters or memory hotplug 1525 * hooks. we want to take these into account, but we 1526 * also want to make sure these supported hugepage 1527 * sizes are applicable across the entire range of memory 1528 * we may boot from, so we take the min across all 1529 * backends, and assume normal pages in cases where a 1530 * backend isn't backed by hugepages. 1531 */ 1532 memdev_root = object_resolve_path("/objects", NULL); 1533 if (memdev_root) { 1534 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize); 1535 } 1536 if (hpsize == LONG_MAX) { 1537 /* No additional memory regions found ==> Report main RAM page size */ 1538 return mainrampagesize; 1539 } 1540 1541 /* If NUMA is disabled or the NUMA nodes are not backed with a 1542 * memory-backend, then there is at least one node using "normal" RAM, 1543 * so if its page size is smaller we have got to report that size instead. 1544 */ 1545 if (hpsize > mainrampagesize && 1546 (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) { 1547 static bool warned; 1548 if (!warned) { 1549 error_report("Huge page support disabled (n/a for main memory)."); 1550 warned = true; 1551 } 1552 return mainrampagesize; 1553 } 1554 1555 return hpsize; 1556} 1557#else 1558long qemu_getrampagesize(void) 1559{ 1560 return getpagesize(); 1561} 1562#endif 1563 1564#ifdef __linux__ 1565static int64_t get_file_size(int fd) 1566{ 1567 int64_t size = lseek(fd, 0, SEEK_END); 1568 if (size < 0) { 1569 return -errno; 1570 } 1571 return size; 1572} 1573 1574static int file_ram_open(const char *path, 1575 const char *region_name, 1576 bool *created, 1577 Error **errp) 1578{ 1579 char *filename; 1580 char *sanitized_name; 1581 char *c; 1582 int fd = -1; 1583 1584 *created = false; 1585 for (;;) { 1586 fd = open(path, O_RDWR); 1587 if (fd >= 0) { 1588 /* @path names an existing file, use it */ 1589 break; 1590 } 1591 if (errno == ENOENT) { 1592 /* @path names a file that doesn't exist, create it */ 1593 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644); 1594 if (fd >= 0) { 1595 *created = true; 1596 break; 1597 } 1598 } else if (errno == EISDIR) { 1599 /* @path names a directory, create a file there */ 1600 /* Make name safe to use with mkstemp by replacing '/' with '_'. */ 1601 sanitized_name = g_strdup(region_name); 1602 for (c = sanitized_name; *c != '\0'; c++) { 1603 if (*c == '/') { 1604 *c = '_'; 1605 } 1606 } 1607 1608 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path, 1609 sanitized_name); 1610 g_free(sanitized_name); 1611 1612 fd = mkstemp(filename); 1613 if (fd >= 0) { 1614 unlink(filename); 1615 g_free(filename); 1616 break; 1617 } 1618 g_free(filename); 1619 } 1620 if (errno != EEXIST && errno != EINTR) { 1621 error_setg_errno(errp, errno, 1622 "can't open backing store %s for guest RAM", 1623 path); 1624 return -1; 1625 } 1626 /* 1627 * Try again on EINTR and EEXIST. The latter happens when 1628 * something else creates the file between our two open(). 1629 */ 1630 } 1631 1632 return fd; 1633} 1634 1635static void *file_ram_alloc(RAMBlock *block, 1636 ram_addr_t memory, 1637 int fd, 1638 bool truncate, 1639 Error **errp) 1640{ 1641 void *area; 1642 1643 block->page_size = qemu_fd_getpagesize(fd); 1644 if (block->mr->align % block->page_size) { 1645 error_setg(errp, "alignment 0x%" PRIx64 1646 " must be multiples of page size 0x%zx", 1647 block->mr->align, block->page_size); 1648 return NULL; 1649 } 1650 block->mr->align = MAX(block->page_size, block->mr->align); 1651#if defined(__s390x__) 1652 if (kvm_enabled()) { 1653 block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN); 1654 } 1655#endif 1656 1657 if (memory < block->page_size) { 1658 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to " 1659 "or larger than page size 0x%zx", 1660 memory, block->page_size); 1661 return NULL; 1662 } 1663 1664 memory = ROUND_UP(memory, block->page_size); 1665 1666 /* 1667 * ftruncate is not supported by hugetlbfs in older 1668 * hosts, so don't bother bailing out on errors. 1669 * If anything goes wrong with it under other filesystems, 1670 * mmap will fail. 1671 * 1672 * Do not truncate the non-empty backend file to avoid corrupting 1673 * the existing data in the file. Disabling shrinking is not 1674 * enough. For example, the current vNVDIMM implementation stores 1675 * the guest NVDIMM labels at the end of the backend file. If the 1676 * backend file is later extended, QEMU will not be able to find 1677 * those labels. Therefore, extending the non-empty backend file 1678 * is disabled as well. 1679 */ 1680 if (truncate && ftruncate(fd, memory)) { 1681 perror("ftruncate"); 1682 } 1683 1684 area = qemu_ram_mmap(fd, memory, block->mr->align, 1685 block->flags & RAM_SHARED); 1686 if (area == MAP_FAILED) { 1687 error_setg_errno(errp, errno, 1688 "unable to map backing store for guest RAM"); 1689 return NULL; 1690 } 1691 1692 if (mem_prealloc) { 1693 os_mem_prealloc(fd, area, memory, smp_cpus, errp); 1694 if (errp && *errp) { 1695 qemu_ram_munmap(area, memory); 1696 return NULL; 1697 } 1698 } 1699 1700 block->fd = fd; 1701 return area; 1702} 1703#endif 1704 1705/* Allocate space within the ram_addr_t space that governs the 1706 * dirty bitmaps. 1707 * Called with the ramlist lock held. 1708 */ 1709static ram_addr_t find_ram_offset(ram_addr_t size) 1710{ 1711 RAMBlock *block, *next_block; 1712 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX; 1713 1714 assert(size != 0); /* it would hand out same offset multiple times */ 1715 1716 if (QLIST_EMPTY_RCU(&ram_list.blocks)) { 1717 return 0; 1718 } 1719 1720 RAMBLOCK_FOREACH(block) { 1721 ram_addr_t candidate, next = RAM_ADDR_MAX; 1722 1723 /* Align blocks to start on a 'long' in the bitmap 1724 * which makes the bitmap sync'ing take the fast path. 1725 */ 1726 candidate = block->offset + block->max_length; 1727 candidate = ROUND_UP(candidate, BITS_PER_LONG << TARGET_PAGE_BITS); 1728 1729 /* Search for the closest following block 1730 * and find the gap. 1731 */ 1732 RAMBLOCK_FOREACH(next_block) { 1733 if (next_block->offset >= candidate) { 1734 next = MIN(next, next_block->offset); 1735 } 1736 } 1737 1738 /* If it fits remember our place and remember the size 1739 * of gap, but keep going so that we might find a smaller 1740 * gap to fill so avoiding fragmentation. 1741 */ 1742 if (next - candidate >= size && next - candidate < mingap) { 1743 offset = candidate; 1744 mingap = next - candidate; 1745 } 1746 1747 trace_find_ram_offset_loop(size, candidate, offset, next, mingap); 1748 } 1749 1750 if (offset == RAM_ADDR_MAX) { 1751 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n", 1752 (uint64_t)size); 1753 abort(); 1754 } 1755 1756 trace_find_ram_offset(size, offset); 1757 1758 return offset; 1759} 1760 1761unsigned long last_ram_page(void) 1762{ 1763 RAMBlock *block; 1764 ram_addr_t last = 0; 1765 1766 rcu_read_lock(); 1767 RAMBLOCK_FOREACH(block) { 1768 last = MAX(last, block->offset + block->max_length); 1769 } 1770 rcu_read_unlock(); 1771 return last >> TARGET_PAGE_BITS; 1772} 1773 1774static void qemu_ram_setup_dump(void *addr, ram_addr_t size) 1775{ 1776 int ret; 1777 1778 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */ 1779 if (!machine_dump_guest_core(current_machine)) { 1780 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP); 1781 if (ret) { 1782 perror("qemu_madvise"); 1783 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, " 1784 "but dump_guest_core=off specified\n"); 1785 } 1786 } 1787} 1788 1789const char *qemu_ram_get_idstr(RAMBlock *rb) 1790{ 1791 return rb->idstr; 1792} 1793 1794bool qemu_ram_is_shared(RAMBlock *rb) 1795{ 1796 return rb->flags & RAM_SHARED; 1797} 1798 1799/* Note: Only set at the start of postcopy */ 1800bool qemu_ram_is_uf_zeroable(RAMBlock *rb) 1801{ 1802 return rb->flags & RAM_UF_ZEROPAGE; 1803} 1804 1805void qemu_ram_set_uf_zeroable(RAMBlock *rb) 1806{ 1807 rb->flags |= RAM_UF_ZEROPAGE; 1808} 1809 1810/* Called with iothread lock held. */ 1811void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev) 1812{ 1813 RAMBlock *block; 1814 1815 assert(new_block); 1816 assert(!new_block->idstr[0]); 1817 1818 if (dev) { 1819 char *id = qdev_get_dev_path(dev); 1820 if (id) { 1821 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id); 1822 g_free(id); 1823 } 1824 } 1825 pstrcat(new_block->idstr, sizeof(new_block->idstr), name); 1826 1827 rcu_read_lock(); 1828 RAMBLOCK_FOREACH(block) { 1829 if (block != new_block && 1830 !strcmp(block->idstr, new_block->idstr)) { 1831 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n", 1832 new_block->idstr); 1833 abort(); 1834 } 1835 } 1836 rcu_read_unlock(); 1837} 1838 1839/* Called with iothread lock held. */ 1840void qemu_ram_unset_idstr(RAMBlock *block) 1841{ 1842 /* FIXME: arch_init.c assumes that this is not called throughout 1843 * migration. Ignore the problem since hot-unplug during migration 1844 * does not work anyway. 1845 */ 1846 if (block) { 1847 memset(block->idstr, 0, sizeof(block->idstr)); 1848 } 1849} 1850 1851size_t qemu_ram_pagesize(RAMBlock *rb) 1852{ 1853 return rb->page_size; 1854} 1855 1856/* Returns the largest size of page in use */ 1857size_t qemu_ram_pagesize_largest(void) 1858{ 1859 RAMBlock *block; 1860 size_t largest = 0; 1861 1862 RAMBLOCK_FOREACH(block) { 1863 largest = MAX(largest, qemu_ram_pagesize(block)); 1864 } 1865 1866 return largest; 1867} 1868 1869static int memory_try_enable_merging(void *addr, size_t len) 1870{ 1871 if (!machine_mem_merge(current_machine)) { 1872 /* disabled by the user */ 1873 return 0; 1874 } 1875 1876 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE); 1877} 1878 1879/* Only legal before guest might have detected the memory size: e.g. on 1880 * incoming migration, or right after reset. 1881 * 1882 * As memory core doesn't know how is memory accessed, it is up to 1883 * resize callback to update device state and/or add assertions to detect 1884 * misuse, if necessary. 1885 */ 1886int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp) 1887{ 1888 assert(block); 1889 1890 newsize = HOST_PAGE_ALIGN(newsize); 1891 1892 if (block->used_length == newsize) { 1893 return 0; 1894 } 1895 1896 if (!(block->flags & RAM_RESIZEABLE)) { 1897 error_setg_errno(errp, EINVAL, 1898 "Length mismatch: %s: 0x" RAM_ADDR_FMT 1899 " in != 0x" RAM_ADDR_FMT, block->idstr, 1900 newsize, block->used_length); 1901 return -EINVAL; 1902 } 1903 1904 if (block->max_length < newsize) { 1905 error_setg_errno(errp, EINVAL, 1906 "Length too large: %s: 0x" RAM_ADDR_FMT 1907 " > 0x" RAM_ADDR_FMT, block->idstr, 1908 newsize, block->max_length); 1909 return -EINVAL; 1910 } 1911 1912 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length); 1913 block->used_length = newsize; 1914 cpu_physical_memory_set_dirty_range(block->offset, block->used_length, 1915 DIRTY_CLIENTS_ALL); 1916 memory_region_set_size(block->mr, newsize); 1917 if (block->resized) { 1918 block->resized(block->idstr, newsize, block->host); 1919 } 1920 return 0; 1921} 1922 1923/* Called with ram_list.mutex held */ 1924static void dirty_memory_extend(ram_addr_t old_ram_size, 1925 ram_addr_t new_ram_size) 1926{ 1927 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size, 1928 DIRTY_MEMORY_BLOCK_SIZE); 1929 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size, 1930 DIRTY_MEMORY_BLOCK_SIZE); 1931 int i; 1932 1933 /* Only need to extend if block count increased */ 1934 if (new_num_blocks <= old_num_blocks) { 1935 return; 1936 } 1937 1938 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 1939 DirtyMemoryBlocks *old_blocks; 1940 DirtyMemoryBlocks *new_blocks; 1941 int j; 1942 1943 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]); 1944 new_blocks = g_malloc(sizeof(*new_blocks) + 1945 sizeof(new_blocks->blocks[0]) * new_num_blocks); 1946 1947 if (old_num_blocks) { 1948 memcpy(new_blocks->blocks, old_blocks->blocks, 1949 old_num_blocks * sizeof(old_blocks->blocks[0])); 1950 } 1951 1952 for (j = old_num_blocks; j < new_num_blocks; j++) { 1953 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE); 1954 } 1955 1956 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks); 1957 1958 if (old_blocks) { 1959 g_free_rcu(old_blocks, rcu); 1960 } 1961 } 1962} 1963 1964static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared) 1965{ 1966 RAMBlock *block; 1967 RAMBlock *last_block = NULL; 1968 ram_addr_t old_ram_size, new_ram_size; 1969 Error *err = NULL; 1970 1971 old_ram_size = last_ram_page(); 1972 1973 qemu_mutex_lock_ramlist(); 1974 new_block->offset = find_ram_offset(new_block->max_length); 1975 1976 if (!new_block->host) { 1977 if (xen_enabled()) { 1978 xen_ram_alloc(new_block->offset, new_block->max_length, 1979 new_block->mr, &err); 1980 if (err) { 1981 error_propagate(errp, err); 1982 qemu_mutex_unlock_ramlist(); 1983 return; 1984 } 1985 } else { 1986 new_block->host = phys_mem_alloc(new_block->max_length, 1987 &new_block->mr->align, shared); 1988 if (!new_block->host) { 1989 error_setg_errno(errp, errno, 1990 "cannot set up guest memory '%s'", 1991 memory_region_name(new_block->mr)); 1992 qemu_mutex_unlock_ramlist(); 1993 return; 1994 } 1995 memory_try_enable_merging(new_block->host, new_block->max_length); 1996 } 1997 } 1998 1999 new_ram_size = MAX(old_ram_size, 2000 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS); 2001 if (new_ram_size > old_ram_size) { 2002 dirty_memory_extend(old_ram_size, new_ram_size); 2003 } 2004 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ, 2005 * QLIST (which has an RCU-friendly variant) does not have insertion at 2006 * tail, so save the last element in last_block. 2007 */ 2008 RAMBLOCK_FOREACH(block) { 2009 last_block = block; 2010 if (block->max_length < new_block->max_length) { 2011 break; 2012 } 2013 } 2014 if (block) { 2015 QLIST_INSERT_BEFORE_RCU(block, new_block, next); 2016 } else if (last_block) { 2017 QLIST_INSERT_AFTER_RCU(last_block, new_block, next); 2018 } else { /* list is empty */ 2019 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next); 2020 } 2021 ram_list.mru_block = NULL; 2022 2023 /* Write list before version */ 2024 smp_wmb(); 2025 ram_list.version++; 2026 qemu_mutex_unlock_ramlist(); 2027 2028 cpu_physical_memory_set_dirty_range(new_block->offset, 2029 new_block->used_length, 2030 DIRTY_CLIENTS_ALL); 2031 2032 if (new_block->host) { 2033 qemu_ram_setup_dump(new_block->host, new_block->max_length); 2034 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE); 2035 /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */ 2036 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK); 2037 ram_block_notify_add(new_block->host, new_block->max_length); 2038 } 2039} 2040 2041#ifdef __linux__ 2042RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, 2043 bool share, int fd, 2044 Error **errp) 2045{ 2046 RAMBlock *new_block; 2047 Error *local_err = NULL; 2048 int64_t file_size; 2049 2050 if (xen_enabled()) { 2051 error_setg(errp, "-mem-path not supported with Xen"); 2052 return NULL; 2053 } 2054 2055 if (kvm_enabled() && !kvm_has_sync_mmu()) { 2056 error_setg(errp, 2057 "host lacks kvm mmu notifiers, -mem-path unsupported"); 2058 return NULL; 2059 } 2060 2061 if (phys_mem_alloc != qemu_anon_ram_alloc) { 2062 /* 2063 * file_ram_alloc() needs to allocate just like 2064 * phys_mem_alloc, but we haven't bothered to provide 2065 * a hook there. 2066 */ 2067 error_setg(errp, 2068 "-mem-path not supported with this accelerator"); 2069 return NULL; 2070 } 2071 2072 size = HOST_PAGE_ALIGN(size); 2073 file_size = get_file_size(fd); 2074 if (file_size > 0 && file_size < size) { 2075 error_setg(errp, "backing store %s size 0x%" PRIx64 2076 " does not match 'size' option 0x" RAM_ADDR_FMT, 2077 mem_path, file_size, size); 2078 return NULL; 2079 } 2080 2081 new_block = g_malloc0(sizeof(*new_block)); 2082 new_block->mr = mr; 2083 new_block->used_length = size; 2084 new_block->max_length = size; 2085 new_block->flags = share ? RAM_SHARED : 0; 2086 new_block->host = file_ram_alloc(new_block, size, fd, !file_size, errp); 2087 if (!new_block->host) { 2088 g_free(new_block); 2089 return NULL; 2090 } 2091 2092 ram_block_add(new_block, &local_err, share); 2093 if (local_err) { 2094 g_free(new_block); 2095 error_propagate(errp, local_err); 2096 return NULL; 2097 } 2098 return new_block; 2099 2100} 2101 2102 2103RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, 2104 bool share, const char *mem_path, 2105 Error **errp) 2106{ 2107 int fd; 2108 bool created; 2109 RAMBlock *block; 2110 2111 fd = file_ram_open(mem_path, memory_region_name(mr), &created, errp); 2112 if (fd < 0) { 2113 return NULL; 2114 } 2115 2116 block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp); 2117 if (!block) { 2118 if (created) { 2119 unlink(mem_path); 2120 } 2121 close(fd); 2122 return NULL; 2123 } 2124 2125 return block; 2126} 2127#endif 2128 2129static 2130RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, 2131 void (*resized)(const char*, 2132 uint64_t length, 2133 void *host), 2134 void *host, bool resizeable, bool share, 2135 MemoryRegion *mr, Error **errp) 2136{ 2137 RAMBlock *new_block; 2138 Error *local_err = NULL; 2139 2140 size = HOST_PAGE_ALIGN(size); 2141 max_size = HOST_PAGE_ALIGN(max_size); 2142 new_block = g_malloc0(sizeof(*new_block)); 2143 new_block->mr = mr; 2144 new_block->resized = resized; 2145 new_block->used_length = size; 2146 new_block->max_length = max_size; 2147 assert(max_size >= size); 2148 new_block->fd = -1; 2149 new_block->page_size = getpagesize(); 2150 new_block->host = host; 2151 if (host) { 2152 new_block->flags |= RAM_PREALLOC; 2153 } 2154 if (resizeable) { 2155 new_block->flags |= RAM_RESIZEABLE; 2156 } 2157 ram_block_add(new_block, &local_err, share); 2158 if (local_err) { 2159 g_free(new_block); 2160 error_propagate(errp, local_err); 2161 return NULL; 2162 } 2163 return new_block; 2164} 2165 2166RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, 2167 MemoryRegion *mr, Error **errp) 2168{ 2169 return qemu_ram_alloc_internal(size, size, NULL, host, false, 2170 false, mr, errp); 2171} 2172 2173RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, 2174 MemoryRegion *mr, Error **errp) 2175{ 2176 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, 2177 share, mr, errp); 2178} 2179 2180RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz, 2181 void (*resized)(const char*, 2182 uint64_t length, 2183 void *host), 2184 MemoryRegion *mr, Error **errp) 2185{ 2186 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, 2187 false, mr, errp); 2188} 2189 2190static void reclaim_ramblock(RAMBlock *block) 2191{ 2192 if (block->flags & RAM_PREALLOC) { 2193 ; 2194 } else if (xen_enabled()) { 2195 xen_invalidate_map_cache_entry(block->host); 2196#ifndef _WIN32 2197 } else if (block->fd >= 0) { 2198 qemu_ram_munmap(block->host, block->max_length); 2199 close(block->fd); 2200#endif 2201 } else { 2202 qemu_anon_ram_free(block->host, block->max_length); 2203 } 2204 g_free(block); 2205} 2206 2207void qemu_ram_free(RAMBlock *block) 2208{ 2209 if (!block) { 2210 return; 2211 } 2212 2213 if (block->host) { 2214 ram_block_notify_remove(block->host, block->max_length); 2215 } 2216 2217 qemu_mutex_lock_ramlist(); 2218 QLIST_REMOVE_RCU(block, next); 2219 ram_list.mru_block = NULL; 2220 /* Write list before version */ 2221 smp_wmb(); 2222 ram_list.version++; 2223 call_rcu(block, reclaim_ramblock, rcu); 2224 qemu_mutex_unlock_ramlist(); 2225} 2226 2227#ifndef _WIN32 2228void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) 2229{ 2230 RAMBlock *block; 2231 ram_addr_t offset; 2232 int flags; 2233 void *area, *vaddr; 2234 2235 RAMBLOCK_FOREACH(block) { 2236 offset = addr - block->offset; 2237 if (offset < block->max_length) { 2238 vaddr = ramblock_ptr(block, offset); 2239 if (block->flags & RAM_PREALLOC) { 2240 ; 2241 } else if (xen_enabled()) { 2242 abort(); 2243 } else { 2244 flags = MAP_FIXED; 2245 if (block->fd >= 0) { 2246 flags |= (block->flags & RAM_SHARED ? 2247 MAP_SHARED : MAP_PRIVATE); 2248 area = mmap(vaddr, length, PROT_READ | PROT_WRITE, 2249 flags, block->fd, offset); 2250 } else { 2251 /* 2252 * Remap needs to match alloc. Accelerators that 2253 * set phys_mem_alloc never remap. If they did, 2254 * we'd need a remap hook here. 2255 */ 2256 assert(phys_mem_alloc == qemu_anon_ram_alloc); 2257 2258 flags |= MAP_PRIVATE | MAP_ANONYMOUS; 2259 area = mmap(vaddr, length, PROT_READ | PROT_WRITE, 2260 flags, -1, 0); 2261 } 2262 if (area != vaddr) { 2263 error_report("Could not remap addr: " 2264 RAM_ADDR_FMT "@" RAM_ADDR_FMT "", 2265 length, addr); 2266 exit(1); 2267 } 2268 memory_try_enable_merging(vaddr, length); 2269 qemu_ram_setup_dump(vaddr, length); 2270 } 2271 } 2272 } 2273} 2274#endif /* !_WIN32 */ 2275 2276/* Return a host pointer to ram allocated with qemu_ram_alloc. 2277 * This should not be used for general purpose DMA. Use address_space_map 2278 * or address_space_rw instead. For local memory (e.g. video ram) that the 2279 * device owns, use memory_region_get_ram_ptr. 2280 * 2281 * Called within RCU critical section. 2282 */ 2283void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr) 2284{ 2285 RAMBlock *block = ram_block; 2286 2287 if (block == NULL) { 2288 block = qemu_get_ram_block(addr); 2289 addr -= block->offset; 2290 } 2291 2292 if (xen_enabled() && block->host == NULL) { 2293 /* We need to check if the requested address is in the RAM 2294 * because we don't want to map the entire memory in QEMU. 2295 * In that case just map until the end of the page. 2296 */ 2297 if (block->offset == 0) { 2298 return xen_map_cache(addr, 0, 0, false); 2299 } 2300 2301 block->host = xen_map_cache(block->offset, block->max_length, 1, false); 2302 } 2303 return ramblock_ptr(block, addr); 2304} 2305 2306/* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr 2307 * but takes a size argument. 2308 * 2309 * Called within RCU critical section. 2310 */ 2311static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr, 2312 hwaddr *size, bool lock) 2313{ 2314 RAMBlock *block = ram_block; 2315 if (*size == 0) { 2316 return NULL; 2317 } 2318 2319 if (block == NULL) { 2320 block = qemu_get_ram_block(addr); 2321 addr -= block->offset; 2322 } 2323 *size = MIN(*size, block->max_length - addr); 2324 2325 if (xen_enabled() && block->host == NULL) { 2326 /* We need to check if the requested address is in the RAM 2327 * because we don't want to map the entire memory in QEMU. 2328 * In that case just map the requested area. 2329 */ 2330 if (block->offset == 0) { 2331 return xen_map_cache(addr, *size, lock, lock); 2332 } 2333 2334 block->host = xen_map_cache(block->offset, block->max_length, 1, lock); 2335 } 2336 2337 return ramblock_ptr(block, addr); 2338} 2339 2340/* Return the offset of a hostpointer within a ramblock */ 2341ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host) 2342{ 2343 ram_addr_t res = (uint8_t *)host - (uint8_t *)rb->host; 2344 assert((uintptr_t)host >= (uintptr_t)rb->host); 2345 assert(res < rb->max_length); 2346 2347 return res; 2348} 2349 2350/* 2351 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset 2352 * in that RAMBlock. 2353 * 2354 * ptr: Host pointer to look up 2355 * round_offset: If true round the result offset down to a page boundary 2356 * *ram_addr: set to result ram_addr 2357 * *offset: set to result offset within the RAMBlock 2358 * 2359 * Returns: RAMBlock (or NULL if not found) 2360 * 2361 * By the time this function returns, the returned pointer is not protected 2362 * by RCU anymore. If the caller is not within an RCU critical section and 2363 * does not hold the iothread lock, it must have other means of protecting the 2364 * pointer, such as a reference to the region that includes the incoming 2365 * ram_addr_t. 2366 */ 2367RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, 2368 ram_addr_t *offset) 2369{ 2370 RAMBlock *block; 2371 uint8_t *host = ptr; 2372 2373 if (xen_enabled()) { 2374 ram_addr_t ram_addr; 2375 rcu_read_lock(); 2376 ram_addr = xen_ram_addr_from_mapcache(ptr); 2377 block = qemu_get_ram_block(ram_addr); 2378 if (block) { 2379 *offset = ram_addr - block->offset; 2380 } 2381 rcu_read_unlock(); 2382 return block; 2383 } 2384 2385 rcu_read_lock(); 2386 block = atomic_rcu_read(&ram_list.mru_block); 2387 if (block && block->host && host - block->host < block->max_length) { 2388 goto found; 2389 } 2390 2391 RAMBLOCK_FOREACH(block) { 2392 /* This case append when the block is not mapped. */ 2393 if (block->host == NULL) { 2394 continue; 2395 } 2396 if (host - block->host < block->max_length) { 2397 goto found; 2398 } 2399 } 2400 2401 rcu_read_unlock(); 2402 return NULL; 2403 2404found: 2405 *offset = (host - block->host); 2406 if (round_offset) { 2407 *offset &= TARGET_PAGE_MASK; 2408 } 2409 rcu_read_unlock(); 2410 return block; 2411} 2412 2413/* 2414 * Finds the named RAMBlock 2415 * 2416 * name: The name of RAMBlock to find 2417 * 2418 * Returns: RAMBlock (or NULL if not found) 2419 */ 2420RAMBlock *qemu_ram_block_by_name(const char *name) 2421{ 2422 RAMBlock *block; 2423 2424 RAMBLOCK_FOREACH(block) { 2425 if (!strcmp(name, block->idstr)) { 2426 return block; 2427 } 2428 } 2429 2430 return NULL; 2431} 2432 2433/* Some of the softmmu routines need to translate from a host pointer 2434 (typically a TLB entry) back to a ram offset. */ 2435ram_addr_t qemu_ram_addr_from_host(void *ptr) 2436{ 2437 RAMBlock *block; 2438 ram_addr_t offset; 2439 2440 block = qemu_ram_block_from_host(ptr, false, &offset); 2441 if (!block) { 2442 return RAM_ADDR_INVALID; 2443 } 2444 2445 return block->offset + offset; 2446} 2447 2448/* Called within RCU critical section. */ 2449void memory_notdirty_write_prepare(NotDirtyInfo *ndi, 2450 CPUState *cpu, 2451 vaddr mem_vaddr, 2452 ram_addr_t ram_addr, 2453 unsigned size) 2454{ 2455 ndi->cpu = cpu; 2456 ndi->ram_addr = ram_addr; 2457 ndi->mem_vaddr = mem_vaddr; 2458 ndi->size = size; 2459 ndi->locked = false; 2460 2461 assert(tcg_enabled()); 2462 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { 2463 ndi->locked = true; 2464 tb_lock(); 2465 tb_invalidate_phys_page_fast(ram_addr, size); 2466 } 2467} 2468 2469/* Called within RCU critical section. */ 2470void memory_notdirty_write_complete(NotDirtyInfo *ndi) 2471{ 2472 if (ndi->locked) { 2473 tb_unlock(); 2474 } 2475 2476 /* Set both VGA and migration bits for simplicity and to remove 2477 * the notdirty callback faster. 2478 */ 2479 cpu_physical_memory_set_dirty_range(ndi->ram_addr, ndi->size, 2480 DIRTY_CLIENTS_NOCODE); 2481 /* we remove the notdirty callback only if the code has been 2482 flushed */ 2483 if (!cpu_physical_memory_is_clean(ndi->ram_addr)) { 2484 tlb_set_dirty(ndi->cpu, ndi->mem_vaddr); 2485 } 2486} 2487 2488/* Called within RCU critical section. */ 2489static void notdirty_mem_write(void *opaque, hwaddr ram_addr, 2490 uint64_t val, unsigned size) 2491{ 2492 NotDirtyInfo ndi; 2493 2494 memory_notdirty_write_prepare(&ndi, current_cpu, current_cpu->mem_io_vaddr, 2495 ram_addr, size); 2496 2497 switch (size) { 2498 case 1: 2499 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val); 2500 break; 2501 case 2: 2502 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val); 2503 break; 2504 case 4: 2505 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val); 2506 break; 2507 case 8: 2508 stq_p(qemu_map_ram_ptr(NULL, ram_addr), val); 2509 break; 2510 default: 2511 abort(); 2512 } 2513 memory_notdirty_write_complete(&ndi); 2514} 2515 2516static bool notdirty_mem_accepts(void *opaque, hwaddr addr, 2517 unsigned size, bool is_write) 2518{ 2519 return is_write; 2520} 2521 2522static const MemoryRegionOps notdirty_mem_ops = { 2523 .write = notdirty_mem_write, 2524 .valid.accepts = notdirty_mem_accepts, 2525 .endianness = DEVICE_NATIVE_ENDIAN, 2526 .valid = { 2527 .min_access_size = 1, 2528 .max_access_size = 8, 2529 .unaligned = false, 2530 }, 2531 .impl = { 2532 .min_access_size = 1, 2533 .max_access_size = 8, 2534 .unaligned = false, 2535 }, 2536}; 2537 2538/* Generate a debug exception if a watchpoint has been hit. */ 2539static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags) 2540{ 2541 CPUState *cpu = current_cpu; 2542 CPUClass *cc = CPU_GET_CLASS(cpu); 2543 target_ulong vaddr; 2544 CPUWatchpoint *wp; 2545 2546 assert(tcg_enabled()); 2547 if (cpu->watchpoint_hit) { 2548 /* We re-entered the check after replacing the TB. Now raise 2549 * the debug interrupt so that is will trigger after the 2550 * current instruction. */ 2551 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG); 2552 return; 2553 } 2554 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset; 2555 vaddr = cc->adjust_watchpoint_address(cpu, vaddr, len); 2556 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { 2557 if (cpu_watchpoint_address_matches(wp, vaddr, len) 2558 && (wp->flags & flags)) { 2559 if (flags == BP_MEM_READ) { 2560 wp->flags |= BP_WATCHPOINT_HIT_READ; 2561 } else { 2562 wp->flags |= BP_WATCHPOINT_HIT_WRITE; 2563 } 2564 wp->hitaddr = vaddr; 2565 wp->hitattrs = attrs; 2566 if (!cpu->watchpoint_hit) { 2567 if (wp->flags & BP_CPU && 2568 !cc->debug_check_watchpoint(cpu, wp)) { 2569 wp->flags &= ~BP_WATCHPOINT_HIT; 2570 continue; 2571 } 2572 cpu->watchpoint_hit = wp; 2573 2574 /* Both tb_lock and iothread_mutex will be reset when 2575 * cpu_loop_exit or cpu_loop_exit_noexc longjmp 2576 * back into the cpu_exec main loop. 2577 */ 2578 tb_lock(); 2579 tb_check_watchpoint(cpu); 2580 if (wp->flags & BP_STOP_BEFORE_ACCESS) { 2581 cpu->exception_index = EXCP_DEBUG; 2582 cpu_loop_exit(cpu); 2583 } else { 2584 /* Force execution of one insn next time. */ 2585 cpu->cflags_next_tb = 1 | curr_cflags(); 2586 cpu_loop_exit_noexc(cpu); 2587 } 2588 } 2589 } else { 2590 wp->flags &= ~BP_WATCHPOINT_HIT; 2591 } 2592 } 2593} 2594 2595/* Watchpoint access routines. Watchpoints are inserted using TLB tricks, 2596 so these check for a hit then pass through to the normal out-of-line 2597 phys routines. */ 2598static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata, 2599 unsigned size, MemTxAttrs attrs) 2600{ 2601 MemTxResult res; 2602 uint64_t data; 2603 int asidx = cpu_asidx_from_attrs(current_cpu, attrs); 2604 AddressSpace *as = current_cpu->cpu_ases[asidx].as; 2605 2606 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ); 2607 switch (size) { 2608 case 1: 2609 data = address_space_ldub(as, addr, attrs, &res); 2610 break; 2611 case 2: 2612 data = address_space_lduw(as, addr, attrs, &res); 2613 break; 2614 case 4: 2615 data = address_space_ldl(as, addr, attrs, &res); 2616 break; 2617 case 8: 2618 data = address_space_ldq(as, addr, attrs, &res); 2619 break; 2620 default: abort(); 2621 } 2622 *pdata = data; 2623 return res; 2624} 2625 2626static MemTxResult watch_mem_write(void *opaque, hwaddr addr, 2627 uint64_t val, unsigned size, 2628 MemTxAttrs attrs) 2629{ 2630 MemTxResult res; 2631 int asidx = cpu_asidx_from_attrs(current_cpu, attrs); 2632 AddressSpace *as = current_cpu->cpu_ases[asidx].as; 2633 2634 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE); 2635 switch (size) { 2636 case 1: 2637 address_space_stb(as, addr, val, attrs, &res); 2638 break; 2639 case 2: 2640 address_space_stw(as, addr, val, attrs, &res); 2641 break; 2642 case 4: 2643 address_space_stl(as, addr, val, attrs, &res); 2644 break; 2645 case 8: 2646 address_space_stq(as, addr, val, attrs, &res); 2647 break; 2648 default: abort(); 2649 } 2650 return res; 2651} 2652 2653static const MemoryRegionOps watch_mem_ops = { 2654 .read_with_attrs = watch_mem_read, 2655 .write_with_attrs = watch_mem_write, 2656 .endianness = DEVICE_NATIVE_ENDIAN, 2657 .valid = { 2658 .min_access_size = 1, 2659 .max_access_size = 8, 2660 .unaligned = false, 2661 }, 2662 .impl = { 2663 .min_access_size = 1, 2664 .max_access_size = 8, 2665 .unaligned = false, 2666 }, 2667}; 2668 2669static MemTxResult flatview_read(FlatView *fv, hwaddr addr, 2670 MemTxAttrs attrs, uint8_t *buf, int len); 2671static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, 2672 const uint8_t *buf, int len); 2673static bool flatview_access_valid(FlatView *fv, hwaddr addr, int len, 2674 bool is_write); 2675 2676static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data, 2677 unsigned len, MemTxAttrs attrs) 2678{ 2679 subpage_t *subpage = opaque; 2680 uint8_t buf[8]; 2681 MemTxResult res; 2682 2683#if defined(DEBUG_SUBPAGE) 2684 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__, 2685 subpage, len, addr); 2686#endif 2687 res = flatview_read(subpage->fv, addr + subpage->base, attrs, buf, len); 2688 if (res) { 2689 return res; 2690 } 2691 switch (len) { 2692 case 1: 2693 *data = ldub_p(buf); 2694 return MEMTX_OK; 2695 case 2: 2696 *data = lduw_p(buf); 2697 return MEMTX_OK; 2698 case 4: 2699 *data = ldl_p(buf); 2700 return MEMTX_OK; 2701 case 8: 2702 *data = ldq_p(buf); 2703 return MEMTX_OK; 2704 default: 2705 abort(); 2706 } 2707} 2708 2709static MemTxResult subpage_write(void *opaque, hwaddr addr, 2710 uint64_t value, unsigned len, MemTxAttrs attrs) 2711{ 2712 subpage_t *subpage = opaque; 2713 uint8_t buf[8]; 2714 2715#if defined(DEBUG_SUBPAGE) 2716 printf("%s: subpage %p len %u addr " TARGET_FMT_plx 2717 " value %"PRIx64"\n", 2718 __func__, subpage, len, addr, value); 2719#endif 2720 switch (len) { 2721 case 1: 2722 stb_p(buf, value); 2723 break; 2724 case 2: 2725 stw_p(buf, value); 2726 break; 2727 case 4: 2728 stl_p(buf, value); 2729 break; 2730 case 8: 2731 stq_p(buf, value); 2732 break; 2733 default: 2734 abort(); 2735 } 2736 return flatview_write(subpage->fv, addr + subpage->base, attrs, buf, len); 2737} 2738 2739static bool subpage_accepts(void *opaque, hwaddr addr, 2740 unsigned len, bool is_write) 2741{ 2742 subpage_t *subpage = opaque; 2743#if defined(DEBUG_SUBPAGE) 2744 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n", 2745 __func__, subpage, is_write ? 'w' : 'r', len, addr); 2746#endif 2747 2748 return flatview_access_valid(subpage->fv, addr + subpage->base, 2749 len, is_write); 2750} 2751 2752static const MemoryRegionOps subpage_ops = { 2753 .read_with_attrs = subpage_read, 2754 .write_with_attrs = subpage_write, 2755 .impl.min_access_size = 1, 2756 .impl.max_access_size = 8, 2757 .valid.min_access_size = 1, 2758 .valid.max_access_size = 8, 2759 .valid.accepts = subpage_accepts, 2760 .endianness = DEVICE_NATIVE_ENDIAN, 2761}; 2762 2763static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end, 2764 uint16_t section) 2765{ 2766 int idx, eidx; 2767 2768 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE) 2769 return -1; 2770 idx = SUBPAGE_IDX(start); 2771 eidx = SUBPAGE_IDX(end); 2772#if defined(DEBUG_SUBPAGE) 2773 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n", 2774 __func__, mmio, start, end, idx, eidx, section); 2775#endif 2776 for (; idx <= eidx; idx++) { 2777 mmio->sub_section[idx] = section; 2778 } 2779 2780 return 0; 2781} 2782 2783static subpage_t *subpage_init(FlatView *fv, hwaddr base) 2784{ 2785 subpage_t *mmio; 2786 2787 mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t)); 2788 mmio->fv = fv; 2789 mmio->base = base; 2790 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio, 2791 NULL, TARGET_PAGE_SIZE); 2792 mmio->iomem.subpage = true; 2793#if defined(DEBUG_SUBPAGE) 2794 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__, 2795 mmio, base, TARGET_PAGE_SIZE); 2796#endif 2797 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED); 2798 2799 return mmio; 2800} 2801 2802static uint16_t dummy_section(PhysPageMap *map, FlatView *fv, MemoryRegion *mr) 2803{ 2804 assert(fv); 2805 MemoryRegionSection section = { 2806 .fv = fv, 2807 .mr = mr, 2808 .offset_within_address_space = 0, 2809 .offset_within_region = 0, 2810 .size = int128_2_64(), 2811 }; 2812 2813 return phys_section_add(map, &section); 2814} 2815 2816static void readonly_mem_write(void *opaque, hwaddr addr, 2817 uint64_t val, unsigned size) 2818{ 2819 /* Ignore any write to ROM. */ 2820} 2821 2822static bool readonly_mem_accepts(void *opaque, hwaddr addr, 2823 unsigned size, bool is_write) 2824{ 2825 return is_write; 2826} 2827 2828/* This will only be used for writes, because reads are special cased 2829 * to directly access the underlying host ram. 2830 */ 2831static const MemoryRegionOps readonly_mem_ops = { 2832 .write = readonly_mem_write, 2833 .valid.accepts = readonly_mem_accepts, 2834 .endianness = DEVICE_NATIVE_ENDIAN, 2835 .valid = { 2836 .min_access_size = 1, 2837 .max_access_size = 8, 2838 .unaligned = false, 2839 }, 2840 .impl = { 2841 .min_access_size = 1, 2842 .max_access_size = 8, 2843 .unaligned = false, 2844 }, 2845}; 2846 2847MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs) 2848{ 2849 int asidx = cpu_asidx_from_attrs(cpu, attrs); 2850 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx]; 2851 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch); 2852 MemoryRegionSection *sections = d->map.sections; 2853 2854 return sections[index & ~TARGET_PAGE_MASK].mr; 2855} 2856 2857static void io_mem_init(void) 2858{ 2859 memory_region_init_io(&io_mem_rom, NULL, &readonly_mem_ops, 2860 NULL, NULL, UINT64_MAX); 2861 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL, 2862 NULL, UINT64_MAX); 2863 2864 /* io_mem_notdirty calls tb_invalidate_phys_page_fast, 2865 * which can be called without the iothread mutex. 2866 */ 2867 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL, 2868 NULL, UINT64_MAX); 2869 memory_region_clear_global_locking(&io_mem_notdirty); 2870 2871 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL, 2872 NULL, UINT64_MAX); 2873} 2874 2875AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv) 2876{ 2877 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1); 2878 uint16_t n; 2879 2880 n = dummy_section(&d->map, fv, &io_mem_unassigned); 2881 assert(n == PHYS_SECTION_UNASSIGNED); 2882 n = dummy_section(&d->map, fv, &io_mem_notdirty); 2883 assert(n == PHYS_SECTION_NOTDIRTY); 2884 n = dummy_section(&d->map, fv, &io_mem_rom); 2885 assert(n == PHYS_SECTION_ROM); 2886 n = dummy_section(&d->map, fv, &io_mem_watch); 2887 assert(n == PHYS_SECTION_WATCH); 2888 2889 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 }; 2890 2891 return d; 2892} 2893 2894void address_space_dispatch_free(AddressSpaceDispatch *d) 2895{ 2896 phys_sections_free(&d->map); 2897 g_free(d); 2898} 2899 2900static void tcg_commit(MemoryListener *listener) 2901{ 2902 CPUAddressSpace *cpuas; 2903 AddressSpaceDispatch *d; 2904 2905 /* since each CPU stores ram addresses in its TLB cache, we must 2906 reset the modified entries */ 2907 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener); 2908 cpu_reloading_memory_map(); 2909 /* The CPU and TLB are protected by the iothread lock. 2910 * We reload the dispatch pointer now because cpu_reloading_memory_map() 2911 * may have split the RCU critical section. 2912 */ 2913 d = address_space_to_dispatch(cpuas->as); 2914 atomic_rcu_set(&cpuas->memory_dispatch, d); 2915 tlb_flush(cpuas->cpu); 2916} 2917 2918static void memory_map_init(void) 2919{ 2920 system_memory = g_malloc(sizeof(*system_memory)); 2921 2922 memory_region_init(system_memory, NULL, "system", UINT64_MAX); 2923 address_space_init(&address_space_memory, system_memory, "memory"); 2924 2925 system_io = g_malloc(sizeof(*system_io)); 2926 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io", 2927 65536); 2928 address_space_init(&address_space_io, system_io, "I/O"); 2929} 2930 2931MemoryRegion *get_system_memory(void) 2932{ 2933 return system_memory; 2934} 2935 2936MemoryRegion *get_system_io(void) 2937{ 2938 return system_io; 2939} 2940 2941#endif /* !defined(CONFIG_USER_ONLY) */ 2942 2943/* physical memory access (slow version, mainly for debug) */ 2944#if defined(CONFIG_USER_ONLY) 2945int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, 2946 uint8_t *buf, int len, int is_write) 2947{ 2948 int l, flags; 2949 target_ulong page; 2950 void * p; 2951 2952 while (len > 0) { 2953 page = addr & TARGET_PAGE_MASK; 2954 l = (page + TARGET_PAGE_SIZE) - addr; 2955 if (l > len) 2956 l = len; 2957 flags = page_get_flags(page); 2958 if (!(flags & PAGE_VALID)) 2959 return -1; 2960 if (is_write) { 2961 if (!(flags & PAGE_WRITE)) 2962 return -1; 2963 /* XXX: this code should not depend on lock_user */ 2964 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0))) 2965 return -1; 2966 memcpy(p, buf, l); 2967 unlock_user(p, addr, l); 2968 } else { 2969 if (!(flags & PAGE_READ)) 2970 return -1; 2971 /* XXX: this code should not depend on lock_user */ 2972 if (!(p = lock_user(VERIFY_READ, addr, l, 1))) 2973 return -1; 2974 memcpy(buf, p, l); 2975 unlock_user(p, addr, 0); 2976 } 2977 len -= l; 2978 buf += l; 2979 addr += l; 2980 } 2981 return 0; 2982} 2983 2984#else 2985 2986static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr, 2987 hwaddr length) 2988{ 2989 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr); 2990 addr += memory_region_get_ram_addr(mr); 2991 2992 /* No early return if dirty_log_mask is or becomes 0, because 2993 * cpu_physical_memory_set_dirty_range will still call 2994 * xen_modified_memory. 2995 */ 2996 if (dirty_log_mask) { 2997 dirty_log_mask = 2998 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask); 2999 } 3000 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) { 3001 assert(tcg_enabled()); 3002 tb_lock(); 3003 tb_invalidate_phys_range(addr, addr + length); 3004 tb_unlock(); 3005 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); 3006 } 3007 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask); 3008} 3009 3010static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) 3011{ 3012 unsigned access_size_max = mr->ops->valid.max_access_size; 3013 3014 /* Regions are assumed to support 1-4 byte accesses unless 3015 otherwise specified. */ 3016 if (access_size_max == 0) { 3017 access_size_max = 4; 3018 } 3019 3020 /* Bound the maximum access by the alignment of the address. */ 3021 if (!mr->ops->impl.unaligned) { 3022 unsigned align_size_max = addr & -addr; 3023 if (align_size_max != 0 && align_size_max < access_size_max) { 3024 access_size_max = align_size_max; 3025 } 3026 } 3027 3028 /* Don't attempt accesses larger than the maximum. */ 3029 if (l > access_size_max) { 3030 l = access_size_max; 3031 } 3032 l = pow2floor(l); 3033 3034 return l; 3035} 3036 3037static bool prepare_mmio_access(MemoryRegion *mr) 3038{ 3039 bool unlocked = !qemu_mutex_iothread_locked(); 3040 bool release_lock = false; 3041 3042 if (unlocked && mr->global_locking) { 3043 qemu_mutex_lock_iothread(); 3044 unlocked = false; 3045 release_lock = true; 3046 } 3047 if (mr->flush_coalesced_mmio) { 3048 if (unlocked) { 3049 qemu_mutex_lock_iothread(); 3050 } 3051 qemu_flush_coalesced_mmio_buffer(); 3052 if (unlocked) { 3053 qemu_mutex_unlock_iothread(); 3054 } 3055 } 3056 3057 return release_lock; 3058} 3059 3060/* Called within RCU critical section. */ 3061static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, 3062 MemTxAttrs attrs, 3063 const uint8_t *buf, 3064 int len, hwaddr addr1, 3065 hwaddr l, MemoryRegion *mr) 3066{ 3067 uint8_t *ptr; 3068 uint64_t val; 3069 MemTxResult result = MEMTX_OK; 3070 bool release_lock = false; 3071 3072 for (;;) { 3073 if (!memory_access_is_direct(mr, true)) { 3074 release_lock |= prepare_mmio_access(mr); 3075 l = memory_access_size(mr, l, addr1); 3076 /* XXX: could force current_cpu to NULL to avoid 3077 potential bugs */ 3078 switch (l) { 3079 case 8: 3080 /* 64 bit write access */ 3081 val = ldq_p(buf); 3082 result |= memory_region_dispatch_write(mr, addr1, val, 8, 3083 attrs); 3084 break; 3085 case 4: 3086 /* 32 bit write access */ 3087 val = (uint32_t)ldl_p(buf); 3088 result |= memory_region_dispatch_write(mr, addr1, val, 4, 3089 attrs); 3090 break; 3091 case 2: 3092 /* 16 bit write access */ 3093 val = lduw_p(buf); 3094 result |= memory_region_dispatch_write(mr, addr1, val, 2, 3095 attrs); 3096 break; 3097 case 1: 3098 /* 8 bit write access */ 3099 val = ldub_p(buf); 3100 result |= memory_region_dispatch_write(mr, addr1, val, 1, 3101 attrs); 3102 break; 3103 default: 3104 abort(); 3105 } 3106 } else { 3107 /* RAM case */ 3108 ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false); 3109 memcpy(ptr, buf, l); 3110 invalidate_and_set_dirty(mr, addr1, l); 3111 } 3112 3113 if (release_lock) { 3114 qemu_mutex_unlock_iothread(); 3115 release_lock = false; 3116 } 3117 3118 len -= l; 3119 buf += l; 3120 addr += l; 3121 3122 if (!len) { 3123 break; 3124 } 3125 3126 l = len; 3127 mr = flatview_translate(fv, addr, &addr1, &l, true); 3128 } 3129 3130 return result; 3131} 3132 3133/* Called from RCU critical section. */ 3134static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, 3135 const uint8_t *buf, int len) 3136{ 3137 hwaddr l; 3138 hwaddr addr1; 3139 MemoryRegion *mr; 3140 MemTxResult result = MEMTX_OK; 3141 3142 l = len; 3143 mr = flatview_translate(fv, addr, &addr1, &l, true); 3144 result = flatview_write_continue(fv, addr, attrs, buf, len, 3145 addr1, l, mr); 3146 3147 return result; 3148} 3149 3150/* Called within RCU critical section. */ 3151MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr, 3152 MemTxAttrs attrs, uint8_t *buf, 3153 int len, hwaddr addr1, hwaddr l, 3154 MemoryRegion *mr) 3155{ 3156 uint8_t *ptr; 3157 uint64_t val; 3158 MemTxResult result = MEMTX_OK; 3159 bool release_lock = false; 3160 3161 for (;;) { 3162 if (!memory_access_is_direct(mr, false)) { 3163 /* I/O case */ 3164 release_lock |= prepare_mmio_access(mr); 3165 l = memory_access_size(mr, l, addr1); 3166 switch (l) { 3167 case 8: 3168 /* 64 bit read access */ 3169 result |= memory_region_dispatch_read(mr, addr1, &val, 8, 3170 attrs); 3171 stq_p(buf, val); 3172 break; 3173 case 4: 3174 /* 32 bit read access */ 3175 result |= memory_region_dispatch_read(mr, addr1, &val, 4, 3176 attrs); 3177 stl_p(buf, val); 3178 break; 3179 case 2: 3180 /* 16 bit read access */ 3181 result |= memory_region_dispatch_read(mr, addr1, &val, 2, 3182 attrs); 3183 stw_p(buf, val); 3184 break; 3185 case 1: 3186 /* 8 bit read access */ 3187 result |= memory_region_dispatch_read(mr, addr1, &val, 1, 3188 attrs); 3189 stb_p(buf, val); 3190 break; 3191 default: 3192 abort(); 3193 } 3194 } else { 3195 /* RAM case */ 3196 ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false); 3197 memcpy(buf, ptr, l); 3198 } 3199 3200 if (release_lock) { 3201 qemu_mutex_unlock_iothread(); 3202 release_lock = false; 3203 } 3204 3205 len -= l; 3206 buf += l; 3207 addr += l; 3208 3209 if (!len) { 3210 break; 3211 } 3212 3213 l = len; 3214 mr = flatview_translate(fv, addr, &addr1, &l, false); 3215 } 3216 3217 return result; 3218} 3219 3220/* Called from RCU critical section. */ 3221static MemTxResult flatview_read(FlatView *fv, hwaddr addr, 3222 MemTxAttrs attrs, uint8_t *buf, int len) 3223{ 3224 hwaddr l; 3225 hwaddr addr1; 3226 MemoryRegion *mr; 3227 3228 l = len; 3229 mr = flatview_translate(fv, addr, &addr1, &l, false); 3230 return flatview_read_continue(fv, addr, attrs, buf, len, 3231 addr1, l, mr); 3232} 3233 3234MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr, 3235 MemTxAttrs attrs, uint8_t *buf, int len) 3236{ 3237 MemTxResult result = MEMTX_OK; 3238 FlatView *fv; 3239 3240 if (len > 0) { 3241 rcu_read_lock(); 3242 fv = address_space_to_flatview(as); 3243 result = flatview_read(fv, addr, attrs, buf, len); 3244 rcu_read_unlock(); 3245 } 3246 3247 return result; 3248} 3249 3250MemTxResult address_space_write(AddressSpace *as, hwaddr addr, 3251 MemTxAttrs attrs, 3252 const uint8_t *buf, int len) 3253{ 3254 MemTxResult result = MEMTX_OK; 3255 FlatView *fv; 3256 3257 if (len > 0) { 3258 rcu_read_lock(); 3259 fv = address_space_to_flatview(as); 3260 result = flatview_write(fv, addr, attrs, buf, len); 3261 rcu_read_unlock(); 3262 } 3263 3264 return result; 3265} 3266 3267MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs, 3268 uint8_t *buf, int len, bool is_write) 3269{ 3270 if (is_write) { 3271 return address_space_write(as, addr, attrs, buf, len); 3272 } else { 3273 return address_space_read_full(as, addr, attrs, buf, len); 3274 } 3275} 3276 3277void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf, 3278 int len, int is_write) 3279{ 3280 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED, 3281 buf, len, is_write); 3282} 3283 3284enum write_rom_type { 3285 WRITE_DATA, 3286 FLUSH_CACHE, 3287}; 3288 3289static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as, 3290 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type) 3291{ 3292 hwaddr l; 3293 uint8_t *ptr; 3294 hwaddr addr1; 3295 MemoryRegion *mr; 3296 3297 rcu_read_lock(); 3298 while (len > 0) { 3299 l = len; 3300 mr = address_space_translate(as, addr, &addr1, &l, true); 3301 3302 if (!(memory_region_is_ram(mr) || 3303 memory_region_is_romd(mr))) { 3304 l = memory_access_size(mr, l, addr1); 3305 } else { 3306 /* ROM/RAM case */ 3307 ptr = qemu_map_ram_ptr(mr->ram_block, addr1); 3308 switch (type) { 3309 case WRITE_DATA: 3310 memcpy(ptr, buf, l); 3311 invalidate_and_set_dirty(mr, addr1, l); 3312 break; 3313 case FLUSH_CACHE: 3314 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l); 3315 break; 3316 } 3317 } 3318 len -= l; 3319 buf += l; 3320 addr += l; 3321 } 3322 rcu_read_unlock(); 3323} 3324 3325/* used for ROM loading : can write in RAM and ROM */ 3326void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr, 3327 const uint8_t *buf, int len) 3328{ 3329 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA); 3330} 3331 3332void cpu_flush_icache_range(hwaddr start, int len) 3333{ 3334 /* 3335 * This function should do the same thing as an icache flush that was 3336 * triggered from within the guest. For TCG we are always cache coherent, 3337 * so there is no need to flush anything. For KVM / Xen we need to flush 3338 * the host's instruction cache at least. 3339 */ 3340 if (tcg_enabled()) { 3341 return; 3342 } 3343 3344 cpu_physical_memory_write_rom_internal(&address_space_memory, 3345 start, NULL, len, FLUSH_CACHE); 3346} 3347 3348typedef struct { 3349 MemoryRegion *mr; 3350 void *buffer; 3351 hwaddr addr; 3352 hwaddr len; 3353 bool in_use; 3354} BounceBuffer; 3355 3356static BounceBuffer bounce; 3357 3358typedef struct MapClient { 3359 QEMUBH *bh; 3360 QLIST_ENTRY(MapClient) link; 3361} MapClient; 3362 3363QemuMutex map_client_list_lock; 3364static QLIST_HEAD(map_client_list, MapClient) map_client_list 3365 = QLIST_HEAD_INITIALIZER(map_client_list); 3366 3367static void cpu_unregister_map_client_do(MapClient *client) 3368{ 3369 QLIST_REMOVE(client, link); 3370 g_free(client); 3371} 3372 3373static void cpu_notify_map_clients_locked(void) 3374{ 3375 MapClient *client; 3376 3377 while (!QLIST_EMPTY(&map_client_list)) { 3378 client = QLIST_FIRST(&map_client_list); 3379 qemu_bh_schedule(client->bh); 3380 cpu_unregister_map_client_do(client); 3381 } 3382} 3383 3384void cpu_register_map_client(QEMUBH *bh) 3385{ 3386 MapClient *client = g_malloc(sizeof(*client)); 3387 3388 qemu_mutex_lock(&map_client_list_lock); 3389 client->bh = bh; 3390 QLIST_INSERT_HEAD(&map_client_list, client, link); 3391 if (!atomic_read(&bounce.in_use)) { 3392 cpu_notify_map_clients_locked(); 3393 } 3394 qemu_mutex_unlock(&map_client_list_lock); 3395} 3396 3397void cpu_exec_init_all(void) 3398{ 3399 qemu_mutex_init(&ram_list.mutex); 3400 /* The data structures we set up here depend on knowing the page size, 3401 * so no more changes can be made after this point. 3402 * In an ideal world, nothing we did before we had finished the 3403 * machine setup would care about the target page size, and we could 3404 * do this much later, rather than requiring board models to state 3405 * up front what their requirements are. 3406 */ 3407 finalize_target_page_bits(); 3408 io_mem_init(); 3409 memory_map_init(); 3410 qemu_mutex_init(&map_client_list_lock); 3411} 3412 3413void cpu_unregister_map_client(QEMUBH *bh) 3414{ 3415 MapClient *client; 3416 3417 qemu_mutex_lock(&map_client_list_lock); 3418 QLIST_FOREACH(client, &map_client_list, link) { 3419 if (client->bh == bh) { 3420 cpu_unregister_map_client_do(client); 3421 break; 3422 } 3423 } 3424 qemu_mutex_unlock(&map_client_list_lock); 3425} 3426 3427static void cpu_notify_map_clients(void) 3428{ 3429 qemu_mutex_lock(&map_client_list_lock); 3430 cpu_notify_map_clients_locked(); 3431 qemu_mutex_unlock(&map_client_list_lock); 3432} 3433 3434static bool flatview_access_valid(FlatView *fv, hwaddr addr, int len, 3435 bool is_write) 3436{ 3437 MemoryRegion *mr; 3438 hwaddr l, xlat; 3439 3440 while (len > 0) { 3441 l = len; 3442 mr = flatview_translate(fv, addr, &xlat, &l, is_write); 3443 if (!memory_access_is_direct(mr, is_write)) { 3444 l = memory_access_size(mr, l, addr); 3445 if (!memory_region_access_valid(mr, xlat, l, is_write)) { 3446 return false; 3447 } 3448 } 3449 3450 len -= l; 3451 addr += l; 3452 } 3453 return true; 3454} 3455 3456bool address_space_access_valid(AddressSpace *as, hwaddr addr, 3457 int len, bool is_write) 3458{ 3459 FlatView *fv; 3460 bool result; 3461 3462 rcu_read_lock(); 3463 fv = address_space_to_flatview(as); 3464 result = flatview_access_valid(fv, addr, len, is_write); 3465 rcu_read_unlock(); 3466 return result; 3467} 3468 3469static hwaddr 3470flatview_extend_translation(FlatView *fv, hwaddr addr, 3471 hwaddr target_len, 3472 MemoryRegion *mr, hwaddr base, hwaddr len, 3473 bool is_write) 3474{ 3475 hwaddr done = 0; 3476 hwaddr xlat; 3477 MemoryRegion *this_mr; 3478 3479 for (;;) { 3480 target_len -= len; 3481 addr += len; 3482 done += len; 3483 if (target_len == 0) { 3484 return done; 3485 } 3486 3487 len = target_len; 3488 this_mr = flatview_translate(fv, addr, &xlat, 3489 &len, is_write); 3490 if (this_mr != mr || xlat != base + done) { 3491 return done; 3492 } 3493 } 3494} 3495 3496/* Map a physical memory region into a host virtual address. 3497 * May map a subset of the requested range, given by and returned in *plen. 3498 * May return NULL if resources needed to perform the mapping are exhausted. 3499 * Use only for reads OR writes - not for read-modify-write operations. 3500 * Use cpu_register_map_client() to know when retrying the map operation is 3501 * likely to succeed. 3502 */ 3503void *address_space_map(AddressSpace *as, 3504 hwaddr addr, 3505 hwaddr *plen, 3506 bool is_write) 3507{ 3508 hwaddr len = *plen; 3509 hwaddr l, xlat; 3510 MemoryRegion *mr; 3511 void *ptr; 3512 FlatView *fv; 3513 3514 if (len == 0) { 3515 return NULL; 3516 } 3517 3518 l = len; 3519 rcu_read_lock(); 3520 fv = address_space_to_flatview(as); 3521 mr = flatview_translate(fv, addr, &xlat, &l, is_write); 3522 3523 if (!memory_access_is_direct(mr, is_write)) { 3524 if (atomic_xchg(&bounce.in_use, true)) { 3525 rcu_read_unlock(); 3526 return NULL; 3527 } 3528 /* Avoid unbounded allocations */ 3529 l = MIN(l, TARGET_PAGE_SIZE); 3530 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l); 3531 bounce.addr = addr; 3532 bounce.len = l; 3533 3534 memory_region_ref(mr); 3535 bounce.mr = mr; 3536 if (!is_write) { 3537 flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED, 3538 bounce.buffer, l); 3539 } 3540 3541 rcu_read_unlock(); 3542 *plen = l; 3543 return bounce.buffer; 3544 } 3545 3546 3547 memory_region_ref(mr); 3548 *plen = flatview_extend_translation(fv, addr, len, mr, xlat, 3549 l, is_write); 3550 ptr = qemu_ram_ptr_length(mr->ram_block, xlat, plen, true); 3551 rcu_read_unlock(); 3552 3553 return ptr; 3554} 3555 3556/* Unmaps a memory region previously mapped by address_space_map(). 3557 * Will also mark the memory as dirty if is_write == 1. access_len gives 3558 * the amount of memory that was actually read or written by the caller. 3559 */ 3560void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, 3561 int is_write, hwaddr access_len) 3562{ 3563 if (buffer != bounce.buffer) { 3564 MemoryRegion *mr; 3565 ram_addr_t addr1; 3566 3567 mr = memory_region_from_host(buffer, &addr1); 3568 assert(mr != NULL); 3569 if (is_write) { 3570 invalidate_and_set_dirty(mr, addr1, access_len); 3571 } 3572 if (xen_enabled()) { 3573 xen_invalidate_map_cache_entry(buffer); 3574 } 3575 memory_region_unref(mr); 3576 return; 3577 } 3578 if (is_write) { 3579 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED, 3580 bounce.buffer, access_len); 3581 } 3582 qemu_vfree(bounce.buffer); 3583 bounce.buffer = NULL; 3584 memory_region_unref(bounce.mr); 3585 atomic_mb_set(&bounce.in_use, false); 3586 cpu_notify_map_clients(); 3587} 3588 3589void *cpu_physical_memory_map(hwaddr addr, 3590 hwaddr *plen, 3591 int is_write) 3592{ 3593 return address_space_map(&address_space_memory, addr, plen, is_write); 3594} 3595 3596void cpu_physical_memory_unmap(void *buffer, hwaddr len, 3597 int is_write, hwaddr access_len) 3598{ 3599 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len); 3600} 3601 3602#define ARG1_DECL AddressSpace *as 3603#define ARG1 as 3604#define SUFFIX 3605#define TRANSLATE(...) address_space_translate(as, __VA_ARGS__) 3606#define IS_DIRECT(mr, is_write) memory_access_is_direct(mr, is_write) 3607#define MAP_RAM(mr, ofs) qemu_map_ram_ptr((mr)->ram_block, ofs) 3608#define INVALIDATE(mr, ofs, len) invalidate_and_set_dirty(mr, ofs, len) 3609#define RCU_READ_LOCK(...) rcu_read_lock() 3610#define RCU_READ_UNLOCK(...) rcu_read_unlock() 3611#include "memory_ldst.inc.c" 3612 3613int64_t address_space_cache_init(MemoryRegionCache *cache, 3614 AddressSpace *as, 3615 hwaddr addr, 3616 hwaddr len, 3617 bool is_write) 3618{ 3619 cache->len = len; 3620 cache->as = as; 3621 cache->xlat = addr; 3622 return len; 3623} 3624 3625void address_space_cache_invalidate(MemoryRegionCache *cache, 3626 hwaddr addr, 3627 hwaddr access_len) 3628{ 3629} 3630 3631void address_space_cache_destroy(MemoryRegionCache *cache) 3632{ 3633 cache->as = NULL; 3634} 3635 3636#define ARG1_DECL MemoryRegionCache *cache 3637#define ARG1 cache 3638#define SUFFIX _cached 3639#define TRANSLATE(addr, ...) \ 3640 address_space_translate(cache->as, cache->xlat + (addr), __VA_ARGS__) 3641#define IS_DIRECT(mr, is_write) true 3642#define MAP_RAM(mr, ofs) qemu_map_ram_ptr((mr)->ram_block, ofs) 3643#define INVALIDATE(mr, ofs, len) invalidate_and_set_dirty(mr, ofs, len) 3644#define RCU_READ_LOCK() rcu_read_lock() 3645#define RCU_READ_UNLOCK() rcu_read_unlock() 3646#include "memory_ldst.inc.c" 3647 3648/* virtual memory access for debug (includes writing to ROM) */ 3649int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, 3650 uint8_t *buf, int len, int is_write) 3651{ 3652 int l; 3653 hwaddr phys_addr; 3654 target_ulong page; 3655 3656 cpu_synchronize_state(cpu); 3657 while (len > 0) { 3658 int asidx; 3659 MemTxAttrs attrs; 3660 3661 page = addr & TARGET_PAGE_MASK; 3662 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs); 3663 asidx = cpu_asidx_from_attrs(cpu, attrs); 3664 /* if no physical page mapped, return an error */ 3665 if (phys_addr == -1) 3666 return -1; 3667 l = (page + TARGET_PAGE_SIZE) - addr; 3668 if (l > len) 3669 l = len; 3670 phys_addr += (addr & ~TARGET_PAGE_MASK); 3671 if (is_write) { 3672 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as, 3673 phys_addr, buf, l); 3674 } else { 3675 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr, 3676 MEMTXATTRS_UNSPECIFIED, 3677 buf, l, 0); 3678 } 3679 len -= l; 3680 buf += l; 3681 addr += l; 3682 } 3683 return 0; 3684} 3685 3686/* 3687 * Allows code that needs to deal with migration bitmaps etc to still be built 3688 * target independent. 3689 */ 3690size_t qemu_target_page_size(void) 3691{ 3692 return TARGET_PAGE_SIZE; 3693} 3694 3695int qemu_target_page_bits(void) 3696{ 3697 return TARGET_PAGE_BITS; 3698} 3699 3700int qemu_target_page_bits_min(void) 3701{ 3702 return TARGET_PAGE_BITS_MIN; 3703} 3704#endif 3705 3706/* 3707 * A helper function for the _utterly broken_ virtio device model to find out if 3708 * it's running on a big endian machine. Don't do this at home kids! 3709 */ 3710bool target_words_bigendian(void); 3711bool target_words_bigendian(void) 3712{ 3713#if defined(TARGET_WORDS_BIGENDIAN) 3714 return true; 3715#else 3716 return false; 3717#endif 3718} 3719 3720#ifndef CONFIG_USER_ONLY 3721bool cpu_physical_memory_is_io(hwaddr phys_addr) 3722{ 3723 MemoryRegion*mr; 3724 hwaddr l = 1; 3725 bool res; 3726 3727 rcu_read_lock(); 3728 mr = address_space_translate(&address_space_memory, 3729 phys_addr, &phys_addr, &l, false); 3730 3731 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr)); 3732 rcu_read_unlock(); 3733 return res; 3734} 3735 3736int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) 3737{ 3738 RAMBlock *block; 3739 int ret = 0; 3740 3741 rcu_read_lock(); 3742 RAMBLOCK_FOREACH(block) { 3743 ret = func(block->idstr, block->host, block->offset, 3744 block->used_length, opaque); 3745 if (ret) { 3746 break; 3747 } 3748 } 3749 rcu_read_unlock(); 3750 return ret; 3751} 3752 3753/* 3754 * Unmap pages of memory from start to start+length such that 3755 * they a) read as 0, b) Trigger whatever fault mechanism 3756 * the OS provides for postcopy. 3757 * The pages must be unmapped by the end of the function. 3758 * Returns: 0 on success, none-0 on failure 3759 * 3760 */ 3761int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) 3762{ 3763 int ret = -1; 3764 3765 uint8_t *host_startaddr = rb->host + start; 3766 3767 if ((uintptr_t)host_startaddr & (rb->page_size - 1)) { 3768 error_report("ram_block_discard_range: Unaligned start address: %p", 3769 host_startaddr); 3770 goto err; 3771 } 3772 3773 if ((start + length) <= rb->used_length) { 3774 bool need_madvise, need_fallocate; 3775 uint8_t *host_endaddr = host_startaddr + length; 3776 if ((uintptr_t)host_endaddr & (rb->page_size - 1)) { 3777 error_report("ram_block_discard_range: Unaligned end address: %p", 3778 host_endaddr); 3779 goto err; 3780 } 3781 3782 errno = ENOTSUP; /* If we are missing MADVISE etc */ 3783 3784 /* The logic here is messy; 3785 * madvise DONTNEED fails for hugepages 3786 * fallocate works on hugepages and shmem 3787 */ 3788 need_madvise = (rb->page_size == qemu_host_page_size); 3789 need_fallocate = rb->fd != -1; 3790 if (need_fallocate) { 3791 /* For a file, this causes the area of the file to be zero'd 3792 * if read, and for hugetlbfs also causes it to be unmapped 3793 * so a userfault will trigger. 3794 */ 3795#ifdef CONFIG_FALLOCATE_PUNCH_HOLE 3796 ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 3797 start, length); 3798 if (ret) { 3799 ret = -errno; 3800 error_report("ram_block_discard_range: Failed to fallocate " 3801 "%s:%" PRIx64 " +%zx (%d)", 3802 rb->idstr, start, length, ret); 3803 goto err; 3804 } 3805#else 3806 ret = -ENOSYS; 3807 error_report("ram_block_discard_range: fallocate not available/file" 3808 "%s:%" PRIx64 " +%zx (%d)", 3809 rb->idstr, start, length, ret); 3810 goto err; 3811#endif 3812 } 3813 if (need_madvise) { 3814 /* For normal RAM this causes it to be unmapped, 3815 * for shared memory it causes the local mapping to disappear 3816 * and to fall back on the file contents (which we just 3817 * fallocate'd away). 3818 */ 3819#if defined(CONFIG_MADVISE) 3820 ret = madvise(host_startaddr, length, MADV_DONTNEED); 3821 if (ret) { 3822 ret = -errno; 3823 error_report("ram_block_discard_range: Failed to discard range " 3824 "%s:%" PRIx64 " +%zx (%d)", 3825 rb->idstr, start, length, ret); 3826 goto err; 3827 } 3828#else 3829 ret = -ENOSYS; 3830 error_report("ram_block_discard_range: MADVISE not available" 3831 "%s:%" PRIx64 " +%zx (%d)", 3832 rb->idstr, start, length, ret); 3833 goto err; 3834#endif 3835 } 3836 trace_ram_block_discard_range(rb->idstr, host_startaddr, length, 3837 need_madvise, need_fallocate, ret); 3838 } else { 3839 error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64 3840 "/%zx/" RAM_ADDR_FMT")", 3841 rb->idstr, start, length, rb->used_length); 3842 } 3843 3844err: 3845 return ret; 3846} 3847 3848#endif 3849 3850void page_size_init(void) 3851{ 3852 /* NOTE: we can always suppose that qemu_host_page_size >= 3853 TARGET_PAGE_SIZE */ 3854 if (qemu_host_page_size == 0) { 3855 qemu_host_page_size = qemu_real_host_page_size; 3856 } 3857 if (qemu_host_page_size < TARGET_PAGE_SIZE) { 3858 qemu_host_page_size = TARGET_PAGE_SIZE; 3859 } 3860 qemu_host_page_mask = -(intptr_t)qemu_host_page_size; 3861} 3862 3863#if !defined(CONFIG_USER_ONLY) 3864 3865static void mtree_print_phys_entries(fprintf_function mon, void *f, 3866 int start, int end, int skip, int ptr) 3867{ 3868 if (start == end - 1) { 3869 mon(f, "\t%3d ", start); 3870 } else { 3871 mon(f, "\t%3d..%-3d ", start, end - 1); 3872 } 3873 mon(f, " skip=%d ", skip); 3874 if (ptr == PHYS_MAP_NODE_NIL) { 3875 mon(f, " ptr=NIL"); 3876 } else if (!skip) { 3877 mon(f, " ptr=#%d", ptr); 3878 } else { 3879 mon(f, " ptr=[%d]", ptr); 3880 } 3881 mon(f, "\n"); 3882} 3883 3884#define MR_SIZE(size) (int128_nz(size) ? (hwaddr)int128_get64( \ 3885 int128_sub((size), int128_one())) : 0) 3886 3887void mtree_print_dispatch(fprintf_function mon, void *f, 3888 AddressSpaceDispatch *d, MemoryRegion *root) 3889{ 3890 int i; 3891 3892 mon(f, " Dispatch\n"); 3893 mon(f, " Physical sections\n"); 3894 3895 for (i = 0; i < d->map.sections_nb; ++i) { 3896 MemoryRegionSection *s = d->map.sections + i; 3897 const char *names[] = { " [unassigned]", " [not dirty]", 3898 " [ROM]", " [watch]" }; 3899 3900 mon(f, " #%d @" TARGET_FMT_plx ".." TARGET_FMT_plx " %s%s%s%s%s", 3901 i, 3902 s->offset_within_address_space, 3903 s->offset_within_address_space + MR_SIZE(s->mr->size), 3904 s->mr->name ? s->mr->name : "(noname)", 3905 i < ARRAY_SIZE(names) ? names[i] : "", 3906 s->mr == root ? " [ROOT]" : "", 3907 s == d->mru_section ? " [MRU]" : "", 3908 s->mr->is_iommu ? " [iommu]" : ""); 3909 3910 if (s->mr->alias) { 3911 mon(f, " alias=%s", s->mr->alias->name ? 3912 s->mr->alias->name : "noname"); 3913 } 3914 mon(f, "\n"); 3915 } 3916 3917 mon(f, " Nodes (%d bits per level, %d levels) ptr=[%d] skip=%d\n", 3918 P_L2_BITS, P_L2_LEVELS, d->phys_map.ptr, d->phys_map.skip); 3919 for (i = 0; i < d->map.nodes_nb; ++i) { 3920 int j, jprev; 3921 PhysPageEntry prev; 3922 Node *n = d->map.nodes + i; 3923 3924 mon(f, " [%d]\n", i); 3925 3926 for (j = 0, jprev = 0, prev = *n[0]; j < ARRAY_SIZE(*n); ++j) { 3927 PhysPageEntry *pe = *n + j; 3928 3929 if (pe->ptr == prev.ptr && pe->skip == prev.skip) { 3930 continue; 3931 } 3932 3933 mtree_print_phys_entries(mon, f, jprev, j, prev.skip, prev.ptr); 3934 3935 jprev = j; 3936 prev = *pe; 3937 } 3938 3939 if (jprev != ARRAY_SIZE(*n)) { 3940 mtree_print_phys_entries(mon, f, jprev, j, prev.skip, prev.ptr); 3941 } 3942 } 3943} 3944 3945#endif