Commit 432d268c authored by Jun Nakajima's avatar Jun Nakajima Committed by Alexander Graf

xen: Introduce the Xen mapcache

On IA32 host or IA32 PAE host, at present, generally, we can't create
an HVM guest with more than 2G memory, because generally it's almost
impossible for Qemu to find a large enough and consecutive virtual
address space to map an HVM guest's whole physical address space.
The attached patch fixes this issue using dynamic mapping based on
little blocks of memory.

Each call to qemu_get_ram_ptr makes a call to qemu_map_cache with the
lock option, so mapcache will not unmap these ram_ptr.

Blocks that do not belong to the RAM, but usually to a device ROM or to
a framebuffer, are handled in a separate function. So the whole RAMBlock
can be map.
Signed-off-by: default avatarJun Nakajima <jun.nakajima@intel.com>
Signed-off-by: default avatarAnthony PERARD <anthony.perard@citrix.com>
Signed-off-by: default avatarStefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: default avatarAlexander Graf <agraf@suse.de>
parent 9c11a8ac
......@@ -214,8 +214,11 @@ else
CONFIG_NO_XEN = y
endif
# xen support
CONFIG_NO_XEN_MAPCACHE = $(if $(subst n,,$(CONFIG_XEN_MAPCACHE)),n,y)
obj-i386-$(CONFIG_XEN) += xen-all.o
obj-$(CONFIG_NO_XEN) += xen-stub.o
obj-i386-$(CONFIG_XEN_MAPCACHE) += xen-mapcache.o
obj-$(CONFIG_NO_XEN_MAPCACHE) += xen-mapcache-stub.o
# Inter-VM PCI shared memory
CONFIG_IVSHMEM =
......
......@@ -3299,6 +3299,9 @@ case "$target_arch2" in
i386|x86_64)
if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then
echo "CONFIG_XEN=y" >> $config_target_mak
if test "$cpu" = "i386" -o "$cpu" = "x86_64"; then
echo "CONFIG_XEN_MAPCACHE=y" >> $config_target_mak
fi
fi
esac
case "$target_arch2" in
......
......@@ -32,6 +32,7 @@
#include "hw/qdev.h"
#include "osdep.h"
#include "kvm.h"
#include "hw/xen.h"
#include "qemu-timer.h"
#if defined(CONFIG_USER_ONLY)
#include <qemu.h>
......@@ -51,6 +52,8 @@
#include <libutil.h>
#endif
#endif
#else /* !CONFIG_USER_ONLY */
#include "xen-mapcache.h"
#endif
//#define DEBUG_TB_INVALIDATE
......@@ -2889,6 +2892,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
}
}
new_block->offset = find_ram_offset(size);
if (host) {
new_block->host = host;
new_block->flags |= RAM_PREALLOC_MASK;
......@@ -2911,13 +2915,15 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
PROT_EXEC|PROT_READ|PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
#else
new_block->host = qemu_vmalloc(size);
if (xen_mapcache_enabled()) {
xen_ram_alloc(new_block->offset, size);
} else {
new_block->host = qemu_vmalloc(size);
}
#endif
qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
}
}
new_block->offset = find_ram_offset(size);
new_block->length = size;
QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
......@@ -2962,7 +2968,11 @@ void qemu_ram_free(ram_addr_t addr)
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
munmap(block->host, block->length);
#else
qemu_vfree(block->host);
if (xen_mapcache_enabled()) {
qemu_invalidate_entry(block->host);
} else {
qemu_vfree(block->host);
}
#endif
}
qemu_free(block);
......@@ -3051,6 +3061,16 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
QLIST_REMOVE(block, next);
QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
}
if (xen_mapcache_enabled()) {
/* We need to check if the requested address is in the RAM
* because we don't want to map the entire memory in QEMU.
*/
if (block->offset == 0) {
return qemu_map_cache(addr, 0, 1);
} else if (block->host == NULL) {
block->host = xen_map_block(block->offset, block->length);
}
}
return block->host + (addr - block->offset);
}
}
......@@ -3070,6 +3090,16 @@ void *qemu_safe_ram_ptr(ram_addr_t addr)
QLIST_FOREACH(block, &ram_list.blocks, next) {
if (addr - block->offset < block->length) {
if (xen_mapcache_enabled()) {
/* We need to check if the requested address is in the RAM
* because we don't want to map the entire memory in QEMU.
*/
if (block->offset == 0) {
return qemu_map_cache(addr, 0, 1);
} else if (block->host == NULL) {
block->host = xen_map_block(block->offset, block->length);
}
}
return block->host + (addr - block->offset);
}
}
......@@ -3086,11 +3116,21 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
uint8_t *host = ptr;
QLIST_FOREACH(block, &ram_list.blocks, next) {
/* This case append when the block is not mapped. */
if (block->host == NULL) {
continue;
}
if (host - block->host < block->length) {
*ram_addr = block->offset + (host - block->host);
return 0;
}
}
if (xen_mapcache_enabled()) {
*ram_addr = qemu_ram_addr_from_mapcache(ptr);
return 0;
}
return -1;
}
......
......@@ -31,6 +31,15 @@ static inline int xen_enabled(void)
#endif
}
static inline int xen_mapcache_enabled(void)
{
#ifdef CONFIG_XEN_MAPCACHE
return xen_enabled();
#else
return 0;
#endif
}
int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num);
void xen_piix3_set_irq(void *opaque, int irq_num, int level);
void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len);
......@@ -41,6 +50,10 @@ int xen_init(void);
int xen_hvm_init(void);
void xen_vcpu_init(void);
#if defined(NEED_CPU_H) && !defined(CONFIG_USER_ONLY)
void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size);
#endif
#if defined(CONFIG_XEN) && CONFIG_XEN_CTRL_INTERFACE_VERSION < 400
# define HVM_MAX_VCPUS 32
#endif
......
......@@ -63,6 +63,15 @@ static inline int xc_fd(int xen_xc)
}
static inline int xc_domain_populate_physmap_exact
(XenXC xc_handle, uint32_t domid, unsigned long nr_extents,
unsigned int extent_order, unsigned int mem_flags, xen_pfn_t *extent_start)
{
return xc_domain_memory_populate_physmap
(xc_handle, domid, nr_extents, extent_order, mem_flags, extent_start);
}
/* Xen 4.1 */
#else
......
......@@ -361,3 +361,13 @@ disable milkymist_uart_pulse_irq_tx(void) "Pulse IRQ TX"
# hw/milkymist-vgafb.c
disable milkymist_vgafb_memory_read(uint32_t addr, uint32_t value) "addr %08x value %08x"
disable milkymist_vgafb_memory_write(uint32_t addr, uint32_t value) "addr %08x value %08x"
# xen-all.c
disable xen_ram_alloc(unsigned long ram_addr, unsigned long size) "requested: %#lx, size %#lx"
# xen-mapcache.c
disable qemu_map_cache(uint64_t phys_addr) "want %#"PRIx64""
disable qemu_remap_bucket(uint64_t index) "index %#"PRIx64""
disable qemu_map_cache_return(void* ptr) "%p"
disable xen_map_block(uint64_t phys_addr, uint64_t size) "%#"PRIx64", size %#"PRIx64""
disable xen_unmap_block(void* addr, unsigned long size) "%p, size %#lx"
......@@ -10,6 +10,9 @@
#include "hw/xen_common.h"
#include "hw/xen_backend.h"
#include "xen-mapcache.h"
#include "trace.h"
/* Xen specific function for piix pci */
int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
......@@ -52,6 +55,65 @@ qemu_irq *xen_interrupt_controller_init(void)
return qemu_allocate_irqs(xen_set_irq, NULL, 16);
}
/* Memory Ops */
static void xen_ram_init(ram_addr_t ram_size)
{
RAMBlock *new_block;
ram_addr_t below_4g_mem_size, above_4g_mem_size = 0;
new_block = qemu_mallocz(sizeof (*new_block));
pstrcpy(new_block->idstr, sizeof (new_block->idstr), "xen.ram");
new_block->host = NULL;
new_block->offset = 0;
new_block->length = ram_size;
QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
new_block->length >> TARGET_PAGE_BITS);
memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
0xff, new_block->length >> TARGET_PAGE_BITS);
if (ram_size >= 0xe0000000 ) {
above_4g_mem_size = ram_size - 0xe0000000;
below_4g_mem_size = 0xe0000000;
} else {
below_4g_mem_size = ram_size;
}
cpu_register_physical_memory(0, below_4g_mem_size, new_block->offset);
#if TARGET_PHYS_ADDR_BITS > 32
if (above_4g_mem_size > 0) {
cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size,
new_block->offset + below_4g_mem_size);
}
#endif
}
void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size)
{
unsigned long nr_pfn;
xen_pfn_t *pfn_list;
int i;
trace_xen_ram_alloc(ram_addr, size);
nr_pfn = size >> TARGET_PAGE_BITS;
pfn_list = qemu_malloc(sizeof (*pfn_list) * nr_pfn);
for (i = 0; i < nr_pfn; i++) {
pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i;
}
if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
hw_error("xen: failed to populate ram at %lx", ram_addr);
}
qemu_free(pfn_list);
}
/* VCPU Operations, MMIO, IO ring ... */
static void xen_reset_vcpu(void *opaque)
......@@ -86,5 +148,9 @@ int xen_init(void)
int xen_hvm_init(void)
{
/* Init RAM management */
qemu_map_cache_init();
xen_ram_init(ram_size);
return 0;
}
/*
* Copyright (C) 2011 Citrix Ltd.
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
*/
#include "config.h"
#include "exec-all.h"
#include "qemu-common.h"
#include "cpu-common.h"
#include "xen-mapcache.h"
void qemu_map_cache_init(void)
{
}
uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock)
{
return qemu_get_ram_ptr(phys_addr);
}
void qemu_map_cache_unlock(void *buffer)
{
}
ram_addr_t qemu_ram_addr_from_mapcache(void *ptr)
{
return -1;
}
void qemu_invalidate_map_cache(void)
{
}
void qemu_invalidate_entry(uint8_t *buffer)
{
}
uint8_t *xen_map_block(target_phys_addr_t phys_addr, target_phys_addr_t size)
{
return NULL;
}
This diff is collapsed.
/*
* Copyright (C) 2011 Citrix Ltd.
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
*/
#ifndef XEN_MAPCACHE_H
#define XEN_MAPCACHE_H
#include <sys/mman.h>
#include "trace.h"
void qemu_map_cache_init(void);
uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock);
void qemu_map_cache_unlock(void *phys_addr);
ram_addr_t qemu_ram_addr_from_mapcache(void *ptr);
void qemu_invalidate_entry(uint8_t *buffer);
void qemu_invalidate_map_cache(void);
uint8_t *xen_map_block(target_phys_addr_t phys_addr, target_phys_addr_t size);
static inline void xen_unmap_block(void *addr, ram_addr_t size)
{
trace_xen_unmap_block(addr, size);
if (munmap(addr, size) != 0) {
hw_error("xen_unmap_block: %s", strerror(errno));
}
}
#define mapcache_lock() ((void)0)
#define mapcache_unlock() ((void)0)
#endif /* !XEN_MAPCACHE_H */
......@@ -22,6 +22,10 @@ void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
{
}
void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size)
{
}
qemu_irq *xen_interrupt_controller_init(void)
{
return NULL;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment