Commit d4c6fa73 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'stable/for-linus-3.4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen

Pull xen updates from Konrad Rzeszutek Wilk:
 "which has three neat features:

   - PV multiconsole support, so that there can be hvc1, hvc2, etc; This
     can be used in HVM and in PV mode.

   - P-state and C-state power management driver that uploads said power
     management data to the hypervisor.  It also inhibits cpufreq
     scaling drivers to load so that only the hypervisor can make power
     management decisions - fixing a weird perf bug.

     There is one thing in the Kconfig that you won't like: "default y
     if (X86_ACPI_CPUFREQ = y || X86_POWERNOW_K8 = y)" (note, that it
     all depends on CONFIG_XEN which depends on CONFIG_PARAVIRT which by
     default is off).  I've a fix to convert that boolean expression
     into "default m" which I am going to post after the cpufreq git
     pull - as the two patches to make this work depend on a fix in Dave
     Jones's tree.

   - Function Level Reset (FLR) support in the Xen PCI backend.

  Fixes:

   - Kconfig dependencies for Xen PV keyboard and video
   - Compile warnings and constify fixes
   - Change over to use percpu_xxx instead of this_cpu_xxx"

Fix up trivial conflicts in drivers/tty/hvc/hvc_xen.c due to changes to
a removed commit.

* tag 'stable/for-linus-3.4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
  xen kconfig: relax INPUT_XEN_KBDDEV_FRONTEND deps
  xen/acpi-processor: C and P-state driver that uploads said data to hypervisor.
  xen: constify all instances of "struct attribute_group"
  xen/xenbus: ignore console/0
  hvc_xen: introduce HVC_XEN_FRONTEND
  hvc_xen: implement multiconsole support
  hvc_xen: support PV on HVM consoles
  xenbus: don't free other end details too early
  xen/enlighten: Expose MWAIT and MWAIT_LEAF if hypervisor OKs it.
  xen/setup/pm/acpi: Remove the call to boot_option_idle_override.
  xenbus: address compiler warnings
  xen: use this_cpu_xxx replace percpu_xxx funcs
  xen/pciback: Support pci_reset_function, aka FLR or D3 support.
  pci: Introduce __pci_reset_function_locked to be used when holding device_lock.
  xen: Utilize the restore_msi_irqs hook.
parents aab008db 4bc25af7
......@@ -77,6 +77,7 @@ DEFINE_GUEST_HANDLE(int);
DEFINE_GUEST_HANDLE(long);
DEFINE_GUEST_HANDLE(void);
DEFINE_GUEST_HANDLE(uint64_t);
DEFINE_GUEST_HANDLE(uint32_t);
typedef unsigned long xen_pfn_t;
DEFINE_GUEST_HANDLE(xen_pfn_t);
......
......@@ -56,6 +56,7 @@ DEFINE_GUEST_HANDLE(int);
DEFINE_GUEST_HANDLE(long);
DEFINE_GUEST_HANDLE(void);
DEFINE_GUEST_HANDLE(uint64_t);
DEFINE_GUEST_HANDLE(uint32_t);
#endif
#ifndef HYPERVISOR_VIRT_START
......
......@@ -324,6 +324,32 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
out:
return ret;
}
static void xen_initdom_restore_msi_irqs(struct pci_dev *dev, int irq)
{
int ret = 0;
if (pci_seg_supported) {
struct physdev_pci_device restore_ext;
restore_ext.seg = pci_domain_nr(dev->bus);
restore_ext.bus = dev->bus->number;
restore_ext.devfn = dev->devfn;
ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi_ext,
&restore_ext);
if (ret == -ENOSYS)
pci_seg_supported = false;
WARN(ret && ret != -ENOSYS, "restore_msi_ext -> %d\n", ret);
}
if (!pci_seg_supported) {
struct physdev_restore_msi restore;
restore.bus = dev->bus->number;
restore.devfn = dev->devfn;
ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore);
WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret);
}
}
#endif
static void xen_teardown_msi_irqs(struct pci_dev *dev)
......@@ -446,6 +472,7 @@ int __init pci_xen_initial_domain(void)
#ifdef CONFIG_PCI_MSI
x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
#endif
xen_setup_acpi_sci();
__acpi_register_gsi = acpi_register_gsi_xen;
......
......@@ -62,6 +62,15 @@
#include <asm/reboot.h>
#include <asm/stackprotector.h>
#include <asm/hypervisor.h>
#include <asm/mwait.h>
#ifdef CONFIG_ACPI
#include <linux/acpi.h>
#include <asm/acpi.h>
#include <acpi/pdc_intel.h>
#include <acpi/processor.h>
#include <xen/interface/platform.h>
#endif
#include "xen-ops.h"
#include "mmu.h"
......@@ -200,13 +209,17 @@ static void __init xen_banner(void)
static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask;
static __read_mostly unsigned int cpuid_leaf5_ecx_val;
static __read_mostly unsigned int cpuid_leaf5_edx_val;
static void xen_cpuid(unsigned int *ax, unsigned int *bx,
unsigned int *cx, unsigned int *dx)
{
unsigned maskebx = ~0;
unsigned maskecx = ~0;
unsigned maskedx = ~0;
unsigned setecx = 0;
/*
* Mask out inconvenient features, to try and disable as many
* unsupported kernel subsystems as possible.
......@@ -214,9 +227,18 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
switch (*ax) {
case 1:
maskecx = cpuid_leaf1_ecx_mask;
setecx = cpuid_leaf1_ecx_set_mask;
maskedx = cpuid_leaf1_edx_mask;
break;
case CPUID_MWAIT_LEAF:
/* Synthesize the values.. */
*ax = 0;
*bx = 0;
*cx = cpuid_leaf5_ecx_val;
*dx = cpuid_leaf5_edx_val;
return;
case 0xb:
/* Suppress extended topology stuff */
maskebx = 0;
......@@ -232,9 +254,75 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
*bx &= maskebx;
*cx &= maskecx;
*cx |= setecx;
*dx &= maskedx;
}
static bool __init xen_check_mwait(void)
{
#ifdef CONFIG_ACPI
struct xen_platform_op op = {
.cmd = XENPF_set_processor_pminfo,
.u.set_pminfo.id = -1,
.u.set_pminfo.type = XEN_PM_PDC,
};
uint32_t buf[3];
unsigned int ax, bx, cx, dx;
unsigned int mwait_mask;
/* We need to determine whether it is OK to expose the MWAIT
* capability to the kernel to harvest deeper than C3 states from ACPI
* _CST using the processor_harvest_xen.c module. For this to work, we
* need to gather the MWAIT_LEAF values (which the cstate.c code
* checks against). The hypervisor won't expose the MWAIT flag because
* it would break backwards compatibility; so we will find out directly
* from the hardware and hypercall.
*/
if (!xen_initial_domain())
return false;
ax = 1;
cx = 0;
native_cpuid(&ax, &bx, &cx, &dx);
mwait_mask = (1 << (X86_FEATURE_EST % 32)) |
(1 << (X86_FEATURE_MWAIT % 32));
if ((cx & mwait_mask) != mwait_mask)
return false;
/* We need to emulate the MWAIT_LEAF and for that we need both
* ecx and edx. The hypercall provides only partial information.
*/
ax = CPUID_MWAIT_LEAF;
bx = 0;
cx = 0;
dx = 0;
native_cpuid(&ax, &bx, &cx, &dx);
/* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so,
* don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3.
*/
buf[0] = ACPI_PDC_REVISION_ID;
buf[1] = 1;
buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
if ((HYPERVISOR_dom0_op(&op) == 0) &&
(buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
cpuid_leaf5_ecx_val = cx;
cpuid_leaf5_edx_val = dx;
}
return true;
#else
return false;
#endif
}
static void __init xen_init_cpuid_mask(void)
{
unsigned int ax, bx, cx, dx;
......@@ -261,6 +349,9 @@ static void __init xen_init_cpuid_mask(void)
/* Xen will set CR4.OSXSAVE if supported and not disabled by force */
if ((cx & xsave_mask) != xsave_mask)
cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
if (xen_check_mwait())
cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
}
static void xen_set_debugreg(int reg, unsigned long val)
......@@ -777,11 +868,11 @@ static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
static unsigned long xen_read_cr0(void)
{
unsigned long cr0 = percpu_read(xen_cr0_value);
unsigned long cr0 = this_cpu_read(xen_cr0_value);
if (unlikely(cr0 == 0)) {
cr0 = native_read_cr0();
percpu_write(xen_cr0_value, cr0);
this_cpu_write(xen_cr0_value, cr0);
}
return cr0;
......@@ -791,7 +882,7 @@ static void xen_write_cr0(unsigned long cr0)
{
struct multicall_space mcs;
percpu_write(xen_cr0_value, cr0);
this_cpu_write(xen_cr0_value, cr0);
/* Only pay attention to cr0.TS; everything else is
ignored. */
......
......@@ -26,7 +26,7 @@ static unsigned long xen_save_fl(void)
struct vcpu_info *vcpu;
unsigned long flags;
vcpu = percpu_read(xen_vcpu);
vcpu = this_cpu_read(xen_vcpu);
/* flag has opposite sense of mask */
flags = !vcpu->evtchn_upcall_mask;
......@@ -50,7 +50,7 @@ static void xen_restore_fl(unsigned long flags)
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
vcpu = percpu_read(xen_vcpu);
vcpu = this_cpu_read(xen_vcpu);
vcpu->evtchn_upcall_mask = flags;
preempt_enable_no_resched();
......@@ -72,7 +72,7 @@ static void xen_irq_disable(void)
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
percpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
preempt_enable_no_resched();
}
PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
......@@ -86,7 +86,7 @@ static void xen_irq_enable(void)
the caller is confused and is trying to re-enable interrupts
on an indeterminate processor. */
vcpu = percpu_read(xen_vcpu);
vcpu = this_cpu_read(xen_vcpu);
vcpu->evtchn_upcall_mask = 0;
/* Doesn't matter if we get preempted here, because any
......
......@@ -1071,14 +1071,14 @@ static void drop_other_mm_ref(void *info)
struct mm_struct *mm = info;
struct mm_struct *active_mm;
active_mm = percpu_read(cpu_tlbstate.active_mm);
active_mm = this_cpu_read(cpu_tlbstate.active_mm);
if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
leave_mm(smp_processor_id());
/* If this cpu still has a stale cr3 reference, then make sure
it has been flushed. */
if (percpu_read(xen_current_cr3) == __pa(mm->pgd))
if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))
load_cr3(swapper_pg_dir);
}
......@@ -1185,17 +1185,17 @@ static void __init xen_pagetable_setup_done(pgd_t *base)
static void xen_write_cr2(unsigned long cr2)
{
percpu_read(xen_vcpu)->arch.cr2 = cr2;
this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
}
static unsigned long xen_read_cr2(void)
{
return percpu_read(xen_vcpu)->arch.cr2;
return this_cpu_read(xen_vcpu)->arch.cr2;
}
unsigned long xen_read_cr2_direct(void)
{
return percpu_read(xen_vcpu_info.arch.cr2);
return this_cpu_read(xen_vcpu_info.arch.cr2);
}
static void xen_flush_tlb(void)
......@@ -1278,12 +1278,12 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
static unsigned long xen_read_cr3(void)
{
return percpu_read(xen_cr3);
return this_cpu_read(xen_cr3);
}
static void set_current_cr3(void *v)
{
percpu_write(xen_current_cr3, (unsigned long)v);
this_cpu_write(xen_current_cr3, (unsigned long)v);
}
static void __xen_write_cr3(bool kernel, unsigned long cr3)
......@@ -1306,7 +1306,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
xen_extend_mmuext_op(&op);
if (kernel) {
percpu_write(xen_cr3, cr3);
this_cpu_write(xen_cr3, cr3);
/* Update xen_current_cr3 once the batch has actually
been submitted. */
......@@ -1322,7 +1322,7 @@ static void xen_write_cr3(unsigned long cr3)
/* Update while interrupts are disabled, so its atomic with
respect to ipis */
percpu_write(xen_cr3, cr3);
this_cpu_write(xen_cr3, cr3);
__xen_write_cr3(true, cr3);
......
......@@ -47,7 +47,7 @@ static inline void xen_mc_issue(unsigned mode)
xen_mc_flush();
/* restore flags saved in xen_mc_batch */
local_irq_restore(percpu_read(xen_mc_irq_flags));
local_irq_restore(this_cpu_read(xen_mc_irq_flags));
}
/* Set up a callback to be called when the current batch is flushed */
......
......@@ -420,7 +420,6 @@ void __init xen_arch_setup(void)
boot_cpu_data.hlt_works_ok = 1;
#endif
disable_cpuidle();
boot_option_idle_override = IDLE_HALT;
WARN_ON(set_pm_idle_to_default());
fiddle_vdso();
}
......@@ -76,7 +76,7 @@ static void __cpuinit cpu_bringup(void)
xen_setup_cpu_clockevents();
set_cpu_online(cpu, true);
percpu_write(cpu_state, CPU_ONLINE);
this_cpu_write(cpu_state, CPU_ONLINE);
wmb();
/* We can take interrupts now: we're officially "up". */
......
......@@ -558,7 +558,7 @@ config INPUT_CMA3000_I2C
config INPUT_XEN_KBDDEV_FRONTEND
tristate "Xen virtual keyboard and mouse support"
depends on XEN_FBDEV_FRONTEND
depends on XEN
default y
select XEN_XENBUS_FRONTEND
help
......
......@@ -3162,6 +3162,31 @@ int __pci_reset_function(struct pci_dev *dev)
}
EXPORT_SYMBOL_GPL(__pci_reset_function);
/**
* __pci_reset_function_locked - reset a PCI device function while holding
* the @dev mutex lock.
* @dev: PCI device to reset
*
* Some devices allow an individual function to be reset without affecting
* other functions in the same device. The PCI device must be responsive
* to PCI config space in order to use this function.
*
* The device function is presumed to be unused and the caller is holding
* the device mutex lock when this function is called.
* Resetting the device will make the contents of PCI configuration space
* random, so any caller of this must be prepared to reinitialise the
* device including MSI, bus mastering, BARs, decoding IO and memory spaces,
* etc.
*
* Returns 0 if the device function was successfully reset or negative if the
* device doesn't support resetting a single function.
*/
int __pci_reset_function_locked(struct pci_dev *dev)
{
return pci_dev_reset(dev, 1);
}
EXPORT_SYMBOL_GPL(__pci_reset_function_locked);
/**
* pci_probe_reset_function - check whether the device can be safely reset
* @dev: PCI device to reset
......
......@@ -66,6 +66,14 @@ config HVC_XEN
help
Xen virtual console device driver
config HVC_XEN_FRONTEND
bool "Xen Hypervisor Multiple Consoles support"
depends on HVC_XEN
select XEN_XENBUS_FRONTEND
default y
help
Xen driver for secondary virtual consoles
config HVC_UDBG
bool "udbg based fake hypervisor console"
depends on PPC && EXPERIMENTAL
......
......@@ -23,44 +23,74 @@
#include <linux/err.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/list.h>
#include <asm/io.h>
#include <asm/xen/hypervisor.h>
#include <xen/xen.h>
#include <xen/interface/xen.h>
#include <xen/hvm.h>
#include <xen/grant_table.h>
#include <xen/page.h>
#include <xen/events.h>
#include <xen/interface/io/console.h>
#include <xen/hvc-console.h>
#include <xen/xenbus.h>
#include "hvc_console.h"
#define HVC_COOKIE 0x58656e /* "Xen" in hex */
static struct hvc_struct *hvc;
static int xencons_irq;
struct xencons_info {
struct list_head list;
struct xenbus_device *xbdev;
struct xencons_interface *intf;
unsigned int evtchn;
struct hvc_struct *hvc;
int irq;
int vtermno;
grant_ref_t gntref;
};
static LIST_HEAD(xenconsoles);
static DEFINE_SPINLOCK(xencons_lock);
/* ------------------------------------------------------------------ */
static unsigned long console_pfn = ~0ul;
static struct xencons_info *vtermno_to_xencons(int vtermno)
{
struct xencons_info *entry, *n, *ret = NULL;
if (list_empty(&xenconsoles))
return NULL;
list_for_each_entry_safe(entry, n, &xenconsoles, list) {
if (entry->vtermno == vtermno) {
ret = entry;
break;
}
}
return ret;
}
static inline struct xencons_interface *xencons_interface(void)
static inline int xenbus_devid_to_vtermno(int devid)
{
if (console_pfn == ~0ul)
return mfn_to_virt(xen_start_info->console.domU.mfn);
else
return __va(console_pfn << PAGE_SHIFT);
return devid + HVC_COOKIE;
}
static inline void notify_daemon(void)
static inline void notify_daemon(struct xencons_info *cons)
{
/* Use evtchn: this is called early, before irq is set up. */
notify_remote_via_evtchn(xen_start_info->console.domU.evtchn);
notify_remote_via_evtchn(cons->evtchn);
}
static int __write_console(const char *data, int len)
static int __write_console(struct xencons_info *xencons,
const char *data, int len)
{
struct xencons_interface *intf = xencons_interface();
XENCONS_RING_IDX cons, prod;
struct xencons_interface *intf = xencons->intf;
int sent = 0;
cons = intf->out_cons;
......@@ -75,13 +105,16 @@ static int __write_console(const char *data, int len)
intf->out_prod = prod;
if (sent)
notify_daemon();
notify_daemon(xencons);
return sent;
}
static int domU_write_console(uint32_t vtermno, const char *data, int len)
{
int ret = len;
struct xencons_info *cons = vtermno_to_xencons(vtermno);
if (cons == NULL)
return -EINVAL;
/*
* Make sure the whole buffer is emitted, polling if
......@@ -90,7 +123,7 @@ static int domU_write_console(uint32_t vtermno, const char *data, int len)
* kernel is crippled.
*/
while (len) {
int sent = __write_console(data, len);
int sent = __write_console(cons, data, len);
data += sent;
len -= sent;
......@@ -104,9 +137,13 @@ static int domU_write_console(uint32_t vtermno, const char *data, int len)
static int domU_read_console(uint32_t vtermno, char *buf, int len)
{
struct xencons_interface *intf = xencons_interface();
struct xencons_interface *intf;
XENCONS_RING_IDX cons, prod;
int recv = 0;
struct xencons_info *xencons = vtermno_to_xencons(vtermno);
if (xencons == NULL)
return -EINVAL;
intf = xencons->intf;
cons = intf->in_cons;
prod = intf->in_prod;
......@@ -119,7 +156,7 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len)
mb(); /* read ring before consuming */
intf->in_cons = cons;
notify_daemon();
notify_daemon(xencons);
return recv;
}
......@@ -157,68 +194,407 @@ static struct hv_ops dom0_hvc_ops = {
.notifier_hangup = notifier_hangup_irq,
};
static int __init xen_hvc_init(void)
static int xen_hvm_console_init(void)
{
int r;
uint64_t v = 0;
unsigned long mfn;
struct xencons_info *info;
if (!xen_hvm_domain())
return -ENODEV;
info = vtermno_to_xencons(HVC_COOKIE);
if (!info) {
info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO);
if (!info)
return -ENOMEM;
}
/* already configured */
if (info->intf != NULL)
return 0;
r = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v);
if (r < 0) {
kfree(info);
return -ENODEV;
}
info->evtchn = v;
hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &v);
if (r < 0) {
kfree(info);
return -ENODEV;
}
mfn = v;
info->intf = ioremap(mfn << PAGE_SHIFT, PAGE_SIZE);
if (info->intf == NULL) {
kfree(info);
return -ENODEV;
}
info->vtermno = HVC_COOKIE;
spin_lock(&xencons_lock);
list_add_tail(&info->list, &xenconsoles);
spin_unlock(&xencons_lock);
return 0;
}
static int xen_pv_console_init(void)
{
struct hvc_struct *hp;
struct hv_ops *ops;
struct xencons_info *info;
if (!xen_pv_domain())
return -ENODEV;
if (xen_initial_domain()) {
ops = &dom0_hvc_ops;
xencons_irq = bind_virq_to_irq(VIRQ_CONSOLE, 0);
} else {
if (!xen_start_info->console.domU.evtchn)
return -ENODEV;
if (!xen_start_info->console.domU.evtchn)
return -ENODEV;
ops = &domU_hvc_ops;
xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
info = vtermno_to_xencons(HVC_COOKIE);