spapr.c 30.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/*
 * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
 *
 * Copyright (c) 2004-2007 Fabrice Bellard
 * Copyright (c) 2007 Jocelyn Mayer
 * Copyright (c) 2010 David Gibson, IBM Corporation.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 */
27
#include "sysemu/sysemu.h"
28
29
#include "hw.h"
#include "elf.h"
Paolo Bonzini's avatar
Paolo Bonzini committed
30
#include "net/net.h"
31
32
33
#include "sysemu/blockdev.h"
#include "sysemu/cpus.h"
#include "sysemu/kvm.h"
34
#include "kvm_ppc.h"
35
36
37
38
39
40

#include "hw/boards.h"
#include "hw/ppc.h"
#include "hw/loader.h"

#include "hw/spapr.h"
41
#include "hw/spapr_vio.h"
42
#include "hw/spapr_pci.h"
43
#include "hw/xics.h"
44
#include "hw/pci/msi.h"
45

46
#include "sysemu/kvm.h"
47
#include "kvm_ppc.h"
48
#include "pci/pci.h"
49

50
#include "exec/address-spaces.h"
51
#include "hw/usb.h"
52
#include "qemu/config-file.h"
Avi Kivity's avatar
Avi Kivity committed
53

54
55
#include <libfdt.h>

56
57
58
59
60
61
62
63
64
65
/* SLOF memory layout:
 *
 * SLOF raw image loaded at 0, copies its romfs right below the flat
 * device-tree, then position SLOF itself 31M below that
 *
 * So we set FW_OVERHEAD to 40MB which should account for all of that
 * and more
 *
 * We load our kernel at 4M, leaving space for SLOF initial image
 */
66
#define FDT_MAX_SIZE            0x10000
67
#define RTAS_MAX_SIZE           0x10000
68
69
#define FW_MAX_SIZE             0x400000
#define FW_FILE_NAME            "slof.bin"
70
71
#define FW_OVERHEAD             0x2800000
#define KERNEL_LOAD_ADDR        FW_MAX_SIZE
72

73
#define MIN_RMA_SLOF            128UL
74
75
76

#define TIMEBASE_FREQ           512000000ULL

77
#define MAX_CPUS                256
78
#define XICS_IRQS               1024
79

80
81
82
83
#define SPAPR_PCI_BUID          0x800000020000001ULL
#define SPAPR_PCI_MEM_WIN_ADDR  (0x10000000000ULL + 0xA0000000)
#define SPAPR_PCI_MEM_WIN_SIZE  0x20000000
#define SPAPR_PCI_IO_WIN_ADDR   (0x10000000000ULL + 0x80000000)
84
#define SPAPR_PCI_MSI_WIN_ADDR  (0x10000000000ULL + 0x90000000)
85

86
87
#define PHANDLE_XICP            0x00001111

88
89
#define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))

90
91
sPAPREnvironment *spapr;

92
int spapr_allocate_irq(int hint, bool lsi)
93
{
94
    int irq;
95
96
97
98
99
100
101
102

    if (hint) {
        irq = hint;
        /* FIXME: we should probably check for collisions somehow */
    } else {
        irq = spapr->next_irq++;
    }

103
104
105
    /* Configure irq type */
    if (!xics_get_qirq(spapr->icp, irq)) {
        return 0;
106
107
    }

108
    xics_set_irq_type(spapr->icp, irq, lsi);
109

110
    return irq;
111
112
}

113
/* Allocate block of consequtive IRQs, returns a number of the first */
114
int spapr_allocate_irq_block(int num, bool lsi)
115
116
117
118
119
120
121
{
    int first = -1;
    int i;

    for (i = 0; i < num; ++i) {
        int irq;

122
        irq = spapr_allocate_irq(0, lsi);
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
        if (!irq) {
            return -1;
        }

        if (0 == i) {
            first = irq;
        }

        /* If the above doesn't create a consecutive block then that's
         * an internal bug */
        assert(irq == (first + i));
    }

    return first;
}

139
static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
140
141
{
    int ret = 0, offset;
Andreas Färber's avatar
Andreas Färber committed
142
    CPUPPCState *env;
143
    CPUState *cpu;
144
145
    char cpu_model[32];
    int smt = kvmppc_smt_threads();
146
    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
147
148
149
150

    assert(spapr->cpu_model);

    for (env = first_cpu; env != NULL; env = env->next_cpu) {
151
        cpu = CPU(ppc_env_get_cpu(env));
152
153
154
155
        uint32_t associativity[] = {cpu_to_be32(0x5),
                                    cpu_to_be32(0x0),
                                    cpu_to_be32(0x0),
                                    cpu_to_be32(0x0),
156
                                    cpu_to_be32(cpu->numa_node),
157
                                    cpu_to_be32(cpu->cpu_index)};
158

159
        if ((cpu->cpu_index % smt) != 0) {
160
161
162
163
            continue;
        }

        snprintf(cpu_model, 32, "/cpus/%s@%x", spapr->cpu_model,
164
                 cpu->cpu_index);
165
166
167
168
169
170

        offset = fdt_path_offset(fdt, cpu_model);
        if (offset < 0) {
            return offset;
        }

171
172
173
174
175
176
177
178
179
180
        if (nb_numa_nodes > 1) {
            ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
                              sizeof(associativity));
            if (ret < 0) {
                return ret;
            }
        }

        ret = fdt_setprop(fdt, offset, "ibm,pft-size",
                          pft_size_prop, sizeof(pft_size_prop));
181
182
183
184
185
186
187
        if (ret < 0) {
            return ret;
        }
    }
    return ret;
}

188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221

static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
                                     size_t maxsize)
{
    size_t maxcells = maxsize / sizeof(uint32_t);
    int i, j, count;
    uint32_t *p = prop;

    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
        struct ppc_one_seg_page_size *sps = &env->sps.sps[i];

        if (!sps->page_shift) {
            break;
        }
        for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
            if (sps->enc[count].page_shift == 0) {
                break;
            }
        }
        if ((p - prop) >= (maxcells - 3 - count * 2)) {
            break;
        }
        *(p++) = cpu_to_be32(sps->page_shift);
        *(p++) = cpu_to_be32(sps->slb_enc);
        *(p++) = cpu_to_be32(count);
        for (j = 0; j < count; j++) {
            *(p++) = cpu_to_be32(sps->enc[j].page_shift);
            *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
        }
    }

    return (p - prop) * sizeof(uint32_t);
}

222
223
224
225
226
227
228
229
230
231
232
#define _FDT(exp) \
    do { \
        int ret = (exp);                                           \
        if (ret < 0) {                                             \
            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
                    #exp, fdt_strerror(ret));                      \
            exit(1);                                               \
        }                                                          \
    } while (0)


233
static void *spapr_create_fdt_skel(const char *cpu_model,
234
235
236
                                   hwaddr initrd_base,
                                   hwaddr initrd_size,
                                   hwaddr kernel_size,
237
                                   const char *boot_device,
238
239
                                   const char *kernel_cmdline,
                                   uint32_t epow_irq)
240
241
{
    void *fdt;
Andreas Färber's avatar
Andreas Färber committed
242
    CPUPPCState *env;
243
244
    uint32_t start_prop = cpu_to_be32(initrd_base);
    uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
245
    char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
246
        "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
247
    char qemu_hypertas_prop[] = "hcall-memop1";
248
    uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
249
    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
250
    char *modelname;
251
    int i, smt = kvmppc_smt_threads();
252
    unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
253

254
    fdt = g_malloc0(FDT_MAX_SIZE);
255
256
    _FDT((fdt_create(fdt, FDT_MAX_SIZE)));

257
258
259
260
261
262
    if (kernel_size) {
        _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
    }
    if (initrd_size) {
        _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
    }
263
264
265
266
267
    _FDT((fdt_finish_reservemap(fdt)));

    /* Root node */
    _FDT((fdt_begin_node(fdt, "")));
    _FDT((fdt_property_string(fdt, "device_type", "chrp")));
268
    _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
269
270
271
272
273
274
275

    _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));

    /* /chosen */
    _FDT((fdt_begin_node(fdt, "chosen")));

276
277
278
    /* Set Form1_affinity */
    _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));

279
280
281
282
283
    _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
    _FDT((fdt_property(fdt, "linux,initrd-start",
                       &start_prop, sizeof(start_prop))));
    _FDT((fdt_property(fdt, "linux,initrd-end",
                       &end_prop, sizeof(end_prop))));
284
285
286
    if (kernel_size) {
        uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
                              cpu_to_be64(kernel_size) };
287

288
289
        _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
    }
Avik Sil's avatar
Avik Sil committed
290
291
292
    if (boot_device) {
        _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
    }
293
294
295
    _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
    _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
    _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
296

297
298
299
300
301
302
303
304
    _FDT((fdt_end_node(fdt)));

    /* cpus */
    _FDT((fdt_begin_node(fdt, "cpus")));

    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));

305
    modelname = g_strdup(cpu_model);
306
307
308
309
310

    for (i = 0; i < strlen(modelname); i++) {
        modelname[i] = toupper(modelname[i]);
    }

311
312
313
    /* This is needed during FDT finalization */
    spapr->cpu_model = g_strdup(modelname);

David Gibson's avatar
David Gibson committed
314
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
315
316
        CPUState *cpu = CPU(ppc_env_get_cpu(env));
        int index = cpu->cpu_index;
317
318
        uint32_t servers_prop[smp_threads];
        uint32_t gservers_prop[smp_threads * 2];
319
320
321
        char *nodename;
        uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
                           0xffffffff, 0xffffffff};
322
323
        uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
        uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
324
325
        uint32_t page_sizes_prop[64];
        size_t page_sizes_prop_size;
326

327
328
329
330
        if ((index % smt) != 0) {
            continue;
        }

David Gibson's avatar
David Gibson committed
331
        if (asprintf(&nodename, "%s@%x", modelname, index) < 0) {
332
333
334
335
336
337
338
339
            fprintf(stderr, "Allocation failure\n");
            exit(1);
        }

        _FDT((fdt_begin_node(fdt, nodename)));

        free(nodename);

David Gibson's avatar
David Gibson committed
340
        _FDT((fdt_property_cell(fdt, "reg", index)));
341
342
343
344
345
346
347
        _FDT((fdt_property_string(fdt, "device_type", "cpu")));

        _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
        _FDT((fdt_property_cell(fdt, "dcache-block-size",
                                env->dcache_line_size)));
        _FDT((fdt_property_cell(fdt, "icache-block-size",
                                env->icache_line_size)));
348
349
        _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
        _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
350
351
352
        _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
        _FDT((fdt_property_string(fdt, "status", "okay")));
        _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
353
354
355
356
357
358
359
360
361
362

        /* Build interrupt servers and gservers properties */
        for (i = 0; i < smp_threads; i++) {
            servers_prop[i] = cpu_to_be32(index + i);
            /* Hack, direct the group queues back to cpu 0 */
            gservers_prop[i*2] = cpu_to_be32(index + i);
            gservers_prop[i*2 + 1] = 0;
        }
        _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
                           servers_prop, sizeof(servers_prop))));
363
        _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
364
                           gservers_prop, sizeof(gservers_prop))));
365

David Gibson's avatar
David Gibson committed
366
        if (env->mmu_model & POWERPC_MMU_1TSEG) {
367
368
369
370
            _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
                               segs, sizeof(segs))));
        }

371
372
373
374
        /* Advertise VMX/VSX (vector extensions) if available
         *   0 / no property == no vector extensions
         *   1               == VMX / Altivec available
         *   2               == VSX available */
375
376
377
        if (env->insns_flags & PPC_ALTIVEC) {
            uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;

378
379
380
381
382
383
            _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
        }

        /* Advertise DFP (Decimal Floating Point) if available
         *   0 / no property == no DFP
         *   1               == DFP available */
384
385
        if (env->insns_flags2 & PPC2_DFP) {
            _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
386
387
        }

388
389
390
391
392
393
394
        page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
                                                      sizeof(page_sizes_prop));
        if (page_sizes_prop_size) {
            _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
                               page_sizes_prop, page_sizes_prop_size)));
        }

395
396
397
        _FDT((fdt_end_node(fdt)));
    }

398
    g_free(modelname);
399
400
401

    _FDT((fdt_end_node(fdt)));

402
403
404
405
406
    /* RTAS */
    _FDT((fdt_begin_node(fdt, "rtas")));

    _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
                       sizeof(hypertas_prop))));
407
408
    _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas_prop,
                       sizeof(qemu_hypertas_prop))));
409

410
411
412
    _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
        refpoints, sizeof(refpoints))));

413
414
    _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));

415
416
    _FDT((fdt_end_node(fdt)));

417
    /* interrupt controller */
418
    _FDT((fdt_begin_node(fdt, "interrupt-controller")));
419
420
421
422
423
424
425
426

    _FDT((fdt_property_string(fdt, "device_type",
                              "PowerPC-External-Interrupt-Presentation")));
    _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
    _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
                       interrupt_server_ranges_prop,
                       sizeof(interrupt_server_ranges_prop))));
427
428
429
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
    _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
    _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
430
431
432

    _FDT((fdt_end_node(fdt)));

433
434
435
436
437
438
439
    /* vdevice */
    _FDT((fdt_begin_node(fdt, "vdevice")));

    _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
    _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
440
441
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
442
443
444

    _FDT((fdt_end_node(fdt)));

445
446
447
    /* event-sources */
    spapr_events_fdt_skel(fdt, epow_irq);

448
449
450
    _FDT((fdt_end_node(fdt))); /* close root node */
    _FDT((fdt_finish(fdt)));

451
452
453
    return fdt;
}

454
455
456
457
458
459
static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
{
    uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
                                cpu_to_be32(0x0), cpu_to_be32(0x0),
                                cpu_to_be32(0x0)};
    char mem_name[32];
460
    hwaddr node0_size, mem_start;
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
    uint64_t mem_reg_property[2];
    int i, off;

    /* memory node(s) */
    node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
    if (spapr->rma_size > node0_size) {
        spapr->rma_size = node0_size;
    }

    /* RMA */
    mem_reg_property[0] = 0;
    mem_reg_property[1] = cpu_to_be64(spapr->rma_size);
    off = fdt_add_subnode(fdt, 0, "memory@0");
    _FDT(off);
    _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
    _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
                      sizeof(mem_reg_property))));
    _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
                      sizeof(associativity))));

    /* RAM: Node 0 */
    if (node0_size > spapr->rma_size) {
        mem_reg_property[0] = cpu_to_be64(spapr->rma_size);
        mem_reg_property[1] = cpu_to_be64(node0_size - spapr->rma_size);

        sprintf(mem_name, "memory@" TARGET_FMT_lx, spapr->rma_size);
        off = fdt_add_subnode(fdt, 0, mem_name);
        _FDT(off);
        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
                          sizeof(mem_reg_property))));
        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
                          sizeof(associativity))));
    }

    /* RAM: Node 1 and beyond */
    mem_start = node0_size;
    for (i = 1; i < nb_numa_nodes; i++) {
        mem_reg_property[0] = cpu_to_be64(mem_start);
        mem_reg_property[1] = cpu_to_be64(node_mem[i]);
        associativity[3] = associativity[4] = cpu_to_be32(i);
        sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
        off = fdt_add_subnode(fdt, 0, mem_name);
        _FDT(off);
        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
                          sizeof(mem_reg_property))));
        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
                          sizeof(associativity))));
        mem_start += node_mem[i];
    }

    return 0;
}

516
static void spapr_finalize_fdt(sPAPREnvironment *spapr,
517
518
519
                               hwaddr fdt_addr,
                               hwaddr rtas_addr,
                               hwaddr rtas_size)
520
521
522
{
    int ret;
    void *fdt;
523
    sPAPRPHBState *phb;
524

525
    fdt = g_malloc(FDT_MAX_SIZE);
526
527
528

    /* open out the base tree into a temp buffer for the final tweaks */
    _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
529

530
531
532
533
534
535
    ret = spapr_populate_memory(spapr, fdt);
    if (ret < 0) {
        fprintf(stderr, "couldn't setup memory nodes in fdt\n");
        exit(1);
    }

536
537
538
539
540
541
    ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
    if (ret < 0) {
        fprintf(stderr, "couldn't setup vio devices in fdt\n");
        exit(1);
    }

542
    QLIST_FOREACH(phb, &spapr->phbs, list) {
543
        ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt);
544
545
546
547
548
549
550
    }

    if (ret < 0) {
        fprintf(stderr, "couldn't setup PCI devices in fdt\n");
        exit(1);
    }

551
552
553
554
555
556
    /* RTAS */
    ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
    if (ret < 0) {
        fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
    }

557
    /* Advertise NUMA via ibm,associativity */
558
559
560
    ret = spapr_fixup_cpu_dt(fdt, spapr);
    if (ret < 0) {
        fprintf(stderr, "Couldn't finalize CPU device tree properties\n");
561
562
    }

563
    if (!spapr->has_graphics) {
564
565
        spapr_populate_chosen_stdout(fdt, spapr->vio_bus);
    }
566

567
568
    _FDT((fdt_pack(fdt)));

569
570
571
572
573
574
    if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
        hw_error("FDT too big ! 0x%x bytes (max is 0x%x)\n",
                 fdt_totalsize(fdt), FDT_MAX_SIZE);
        exit(1);
    }

575
    cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
576

577
    g_free(fdt);
578
579
580
581
582
583
584
}

static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
{
    return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
}

585
static void emulate_spapr_hypercall(PowerPCCPU *cpu)
586
{
587
588
    CPUPPCState *env = &cpu->env;

589
590
591
592
    if (msr_pr) {
        hcall_dprintf("Hypercall made with MSR[PR]=1\n");
        env->gpr[3] = H_PRIVILEGE;
    } else {
593
        env->gpr[3] = spapr_hypercall(cpu, env->gpr[3], &env->gpr[4]);
594
    }
595
596
}

597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
static void spapr_reset_htab(sPAPREnvironment *spapr)
{
    long shift;

    /* allocate hash page table.  For now we always make this 16mb,
     * later we should probably make it scale to the size of guest
     * RAM */

    shift = kvmppc_reset_htab(spapr->htab_shift);

    if (shift > 0) {
        /* Kernel handles htab, we don't need to allocate one */
        spapr->htab_shift = shift;
    } else {
        if (!spapr->htab) {
            /* Allocate an htab if we don't yet have one */
            spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
        }

        /* And clear it */
        memset(spapr->htab, 0, HTAB_SIZE(spapr));
    }

    /* Update the RMA size if necessary */
    if (spapr->vrma_adjust) {
        spapr->rma_size = kvmppc_rma_size(ram_size, spapr->htab_shift);
    }
624
625
}

626
static void ppc_spapr_reset(void)
627
{
628
629
    /* Reset the hash table & recalc the RMA */
    spapr_reset_htab(spapr);
630

631
    qemu_devices_reset();
632
633
634
635
636
637
638
639
640
641
642
643
644

    /* Load the fdt */
    spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
                       spapr->rtas_size);

    /* Set up the entry state */
    first_cpu->gpr[3] = spapr->fdt_addr;
    first_cpu->gpr[5] = 0;
    first_cpu->halted = 0;
    first_cpu->nip = spapr->entry_point;

}

645
646
static void spapr_cpu_reset(void *opaque)
{
647
    PowerPCCPU *cpu = opaque;
648
    CPUPPCState *env = &cpu->env;
649

650
    cpu_reset(CPU(cpu));
651
652
653
654
655
656
657

    /* All CPUs start halted.  CPU0 is unhalted from the machine level
     * reset code and the rest are explicitly started up by the guest
     * using an RTAS call */
    env->halted = 1;

    env->spr[SPR_HIOR] = 0;
658
659
660
661
662
663

    env->external_htab = spapr->htab;
    env->htab_base = -1;
    env->htab_mask = HTAB_SIZE(spapr) - 1;
    env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
        (spapr->htab_shift - 18);
664
665
}

David Gibson's avatar
David Gibson committed
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
static void spapr_create_nvram(sPAPREnvironment *spapr)
{
    QemuOpts *machine_opts;
    DeviceState *dev;

    dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");

    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
    if (machine_opts) {
        const char *drivename;

        drivename = qemu_opt_get(machine_opts, "nvram");
        if (drivename) {
            BlockDriverState *bs;

            bs = bdrv_find(drivename);
            if (!bs) {
                fprintf(stderr, "No such block device \"%s\" for nvram\n",
                        drivename);
                exit(1);
            }
            qdev_prop_set_drive_nofail(dev, "drive", bs);
        }
    }

    qdev_init_nofail(dev);

    spapr->nvram = (struct sPAPRNVRAM *)dev;
}

696
/* Returns whether we want to use VGA or not */
697
698
static int spapr_vga_init(PCIBus *pci_bus)
{
699
700
    switch (vga_interface_type) {
    case VGA_NONE:
701
702
    case VGA_STD:
        return pci_vga_init(pci_bus) != NULL;
703
    default:
704
705
        fprintf(stderr, "This vga model is not supported,"
                "currently it only supports -vga std\n");
706
707
        exit(0);
        break;
708
709
710
    }
}

711
/* pSeries LPAR / sPAPR hardware init */
712
static void ppc_spapr_init(QEMUMachineInitArgs *args)
713
{
714
715
716
717
718
719
    ram_addr_t ram_size = args->ram_size;
    const char *cpu_model = args->cpu_model;
    const char *kernel_filename = args->kernel_filename;
    const char *kernel_cmdline = args->kernel_cmdline;
    const char *initrd_filename = args->initrd_filename;
    const char *boot_device = args->boot_device;
720
    PowerPCCPU *cpu;
Andreas Färber's avatar
Andreas Färber committed
721
    CPUPPCState *env;
722
    PCIHostState *phb;
723
    int i;
Avi Kivity's avatar
Avi Kivity committed
724
725
    MemoryRegion *sysmem = get_system_memory();
    MemoryRegion *ram = g_new(MemoryRegion, 1);
726
    hwaddr rma_alloc_size;
727
728
729
    uint32_t initrd_base = 0;
    long kernel_size = 0, initrd_size = 0;
    long load_limit, rtas_limit, fw_size;
730
    char *filename;
731

732
733
    msi_supported = true;

734
735
736
    spapr = g_malloc0(sizeof(*spapr));
    QLIST_INIT(&spapr->phbs);

737
738
    cpu_ppc_hypercall = emulate_spapr_hypercall;

739
740
741
742
743
744
745
    /* Allocate RMA if necessary */
    rma_alloc_size = kvmppc_alloc_rma("ppc_spapr.rma", sysmem);

    if (rma_alloc_size == -1) {
        hw_error("qemu: Unable to create RMA\n");
        exit(1);
    }
746

747
    if (rma_alloc_size && (rma_alloc_size < ram_size)) {
748
        spapr->rma_size = rma_alloc_size;
749
    } else {
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
        spapr->rma_size = ram_size;

        /* With KVM, we don't actually know whether KVM supports an
         * unbounded RMA (PR KVM) or is limited by the hash table size
         * (HV KVM using VRMA), so we always assume the latter
         *
         * In that case, we also limit the initial allocations for RTAS
         * etc... to 256M since we have no way to know what the VRMA size
         * is going to be as it depends on the size of the hash table
         * isn't determined yet.
         */
        if (kvm_enabled()) {
            spapr->vrma_adjust = 1;
            spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
        }
765
766
    }

767
    /* We place the device tree and RTAS just below either the top of the RMA,
768
769
     * or just below 2GB, whichever is lowere, so that it can be
     * processed with 32-bit real mode code if necessary */
770
    rtas_limit = MIN(spapr->rma_size, 0x80000000);
771
772
773
    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
    load_limit = spapr->fdt_addr - FW_OVERHEAD;
774

775
776
777
778
779
780
781
782
783
784
    /* We aim for a hash table of size 1/128 the size of RAM.  The
     * normal rule of thumb is 1/64 the size of RAM, but that's much
     * more than needed for the Linux guests we support. */
    spapr->htab_shift = 18; /* Minimum architected size */
    while (spapr->htab_shift <= 46) {
        if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) {
            break;
        }
        spapr->htab_shift++;
    }
785

786
787
    /* init CPUs */
    if (cpu_model == NULL) {
788
        cpu_model = kvm_enabled() ? "host" : "POWER7";
789
790
    }
    for (i = 0; i < smp_cpus; i++) {
791
792
        cpu = cpu_ppc_init(cpu_model);
        if (cpu == NULL) {
793
794
795
            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
            exit(1);
        }
796
797
        env = &cpu->env;

798
799
800
        /* Set time-base frequency to 512 MHz */
        cpu_ppc_tb_init(env, TIMEBASE_FREQ);

801
        /* PAPR always has exception vectors in RAM not ROM */
802
        env->hreset_excp_prefix = 0;
803
804
805

        /* Tell KVM that we're in PAPR mode */
        if (kvm_enabled()) {
806
            kvmppc_set_papr(cpu);
807
808
809
        }

        qemu_register_reset(spapr_cpu_reset, cpu);
810
811
812
    }

    /* allocate RAM */
813
    spapr->ram_limit = ram_size;
814
815
816
817
    if (spapr->ram_limit > rma_alloc_size) {
        ram_addr_t nonrma_base = rma_alloc_size;
        ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size;

818
819
        memory_region_init_ram(ram, "ppc_spapr.ram", nonrma_size);
        vmstate_register_ram_global(ram);
820
821
        memory_region_add_subregion(sysmem, nonrma_base, ram);
    }
822

823
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
824
    spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
825
                                           rtas_limit - spapr->rtas_addr);
826
    if (spapr->rtas_size < 0) {
827
828
829
        hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
        exit(1);
    }
830
831
832
833
834
    if (spapr->rtas_size > RTAS_MAX_SIZE) {
        hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n",
                 spapr->rtas_size, RTAS_MAX_SIZE);
        exit(1);
    }
835
    g_free(filename);
836

837

838
    /* Set up Interrupt Controller */
David Gibson's avatar
David Gibson committed
839
    spapr->icp = xics_system_init(XICS_IRQS);
840
    spapr->next_irq = XICS_IRQ_BASE;
841

842
843
844
    /* Set up EPOW events infrastructure */
    spapr_events_init(spapr);

845
846
847
    /* Set up IOMMU */
    spapr_iommu_init();

848
    /* Set up VIO bus */
849
850
    spapr->vio_bus = spapr_vio_bus_init();

Paolo Bonzini's avatar
Paolo Bonzini committed
851
    for (i = 0; i < MAX_SERIAL_PORTS; i++) {
852
        if (serial_hds[i]) {
853
            spapr_vty_create(spapr->vio_bus, serial_hds[i]);
854
855
        }
    }
856

David Gibson's avatar
David Gibson committed
857
858
859
    /* We always have at least the nvram device on VIO */
    spapr_create_nvram(spapr);

860
    /* Set up PCI */
861
862
    spapr_pci_rtas_init();

863
864
865
    spapr_create_phb(spapr, "pci", SPAPR_PCI_BUID,
                     SPAPR_PCI_MEM_WIN_ADDR,
                     SPAPR_PCI_MEM_WIN_SIZE,
866
867
                     SPAPR_PCI_IO_WIN_ADDR,
                     SPAPR_PCI_MSI_WIN_ADDR);
868
    phb = PCI_HOST_BRIDGE(QLIST_FIRST(&spapr->phbs));
869

Paolo Bonzini's avatar
Paolo Bonzini committed
870
    for (i = 0; i < nb_nics; i++) {
871
872
873
        NICInfo *nd = &nd_table[i];

        if (!nd->model) {
874
            nd->model = g_strdup("ibmveth");
875
876
877
        }

        if (strcmp(nd->model, "ibmveth") == 0) {
878
            spapr_vlan_create(spapr->vio_bus, nd);
879
        } else {
880
            pci_nic_init_nofail(&nd_table[i], nd->model, NULL);
881
882
883
        }
    }

884
    for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
885
        spapr_vscsi_create(spapr->vio_bus);
886
887
    }

888
    /* Graphics */
889
    if (spapr_vga_init(phb->bus)) {
890
        spapr->has_graphics = true;
891
892
    }

893
    if (usb_enabled(spapr->has_graphics)) {
894
        pci_create_simple(phb->bus, -1, "pci-ohci");
895
896
897
898
899
900
        if (spapr->has_graphics) {
            usbdevice_create("keyboard");
            usbdevice_create("mouse");
        }
    }

901
    if (spapr->rma_size < (MIN_RMA_SLOF << 20)) {
902
903
904
905
906
        fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
                "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
        exit(1);
    }

907
908
909
910
911
912
    if (kernel_filename) {
        uint64_t lowaddr = 0;

        kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
                               NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0);
        if (kernel_size < 0) {
913
914
            kernel_size = load_image_targphys(kernel_filename,
                                              KERNEL_LOAD_ADDR,
915
                                              load_limit - KERNEL_LOAD_ADDR);
916
917
918
919
920
921
922
923
924
        }
        if (kernel_size < 0) {
            fprintf(stderr, "qemu: could not load kernel '%s'\n",
                    kernel_filename);
            exit(1);
        }

        /* load initrd */
        if (initrd_filename) {
925
926
927
928
            /* Try to locate the initrd in the gap between the kernel
             * and the firmware. Add a bit of space just in case
             */
            initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
929
            initrd_size = load_image_targphys(initrd_filename, initrd_base,
930
                                              load_limit - initrd_base);
931
932
933
934
935
936
937
938
939
            if (initrd_size < 0) {
                fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
                        initrd_filename);
                exit(1);
            }
        } else {
            initrd_base = 0;
            initrd_size = 0;
        }
940
    }
941

942
943
944
945
946
947
948
949
950
951
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, FW_FILE_NAME);
    fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
    if (fw_size < 0) {
        hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
        exit(1);
    }
    g_free(filename);

    spapr->entry_point = 0x100;

952
    /* Prepare the device tree */
953
    spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
954
                                            initrd_base, initrd_size,
955
                                            kernel_size,
956
957
                                            boot_device, kernel_cmdline,
                                            spapr->epow_irq);
958
    assert(spapr->fdt_skel != NULL);
959
960
961
962
963
964
}

static QEMUMachine spapr_machine = {
    .name = "pseries",
    .desc = "pSeries Logical Partition (PAPR compliant)",
    .init = ppc_spapr_init,
965
    .reset = ppc_spapr_reset,
966
    .block_default_type = IF_SCSI,
967
968
    .max_cpus = MAX_CPUS,
    .no_parallel = 1,
Avik Sil's avatar
Avik Sil committed
969
    .boot_order = NULL,
970
971
972
973
974
975
976
977
};

static void spapr_machine_init(void)
{
    qemu_register_machine(&spapr_machine);
}

machine_init(spapr_machine_init);