/* $NetBSD: virtio_pci.c,v 1.55 2024/09/25 17:12:47 christos Exp $ */ /* * Copyright (c) 2020 The NetBSD Foundation, Inc. * Copyright (c) 2012 Stefan Fritsch. * Copyright (c) 2010 Minoura Makoto. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __KERNEL_RCSID(0, "$NetBSD: virtio_pci.c,v 1.55 2024/09/25 17:12:47 christos Exp $"); #include #include #include #include #include #include #include #include #include #include #include #include #include /* XXX: move to non-pci */ #include #define VIRTIO_PRIVATE #include /* XXX: move to non-pci */ #if defined(__alpha__) || defined(__sparc64__) /* * XXX VIRTIO_F_ACCESS_PLATFORM is required for standard PCI DMA * XXX to work on these platforms, at least by Qemu. * XXX * XXX Generalize this later. */ #define __NEED_VIRTIO_F_ACCESS_PLATFORM #endif /* __alpha__ || __sparc64__ */ #define VIRTIO_PCI_LOG(_sc, _use_log, _fmt, _args...) \ do { \ if ((_use_log)) { \ log(LOG_DEBUG, "%s: " _fmt, \ device_xname((_sc)->sc_dev), \ ##_args); \ } else { \ aprint_error_dev((_sc)->sc_dev, \ _fmt, ##_args); \ } \ } while(0) static int virtio_pci_match(device_t, cfdata_t, void *); static void virtio_pci_attach(device_t, device_t, void *); static int virtio_pci_rescan(device_t, const char *, const int *); static int virtio_pci_detach(device_t, int); #define NMAPREG ((PCI_MAPREG_END - PCI_MAPREG_START) / \ sizeof(pcireg_t)) struct virtio_pci_softc { struct virtio_softc sc_sc; bool sc_intr_pervq; /* IO space */ bus_space_tag_t sc_iot; bus_space_handle_t sc_ioh; bus_size_t sc_iosize; /* BARs */ bus_space_tag_t sc_bars_iot[NMAPREG]; bus_space_handle_t sc_bars_ioh[NMAPREG]; bus_size_t sc_bars_iosize[NMAPREG]; /* notify space */ bus_space_tag_t sc_notify_iot; bus_space_handle_t sc_notify_ioh; bus_size_t sc_notify_iosize; uint32_t sc_notify_off_multiplier; /* isr space */ bus_space_tag_t sc_isr_iot; bus_space_handle_t sc_isr_ioh; bus_size_t sc_isr_iosize; /* generic */ struct pci_attach_args sc_pa; pci_intr_handle_t *sc_ihp; void **sc_ihs; int sc_ihs_num; int sc_devcfg_offset; /* for 0.9 */ }; static int virtio_pci_attach_09(device_t, void *); static void virtio_pci_kick_09(struct virtio_softc *, uint16_t); static uint16_t virtio_pci_read_queue_size_09(struct virtio_softc *, uint16_t); static void virtio_pci_setup_queue_09(struct virtio_softc *, uint16_t, uint64_t); static void virtio_pci_set_status_09(struct virtio_softc *, int); static void virtio_pci_negotiate_features_09(struct virtio_softc *, uint64_t); static int virtio_pci_attach_10(device_t, void *); static void virtio_pci_kick_10(struct virtio_softc *, uint16_t); static uint16_t virtio_pci_read_queue_size_10(struct virtio_softc *, uint16_t); static void virtio_pci_setup_queue_10(struct virtio_softc *, uint16_t, uint64_t); static void virtio_pci_set_status_10(struct virtio_softc *, int); static void virtio_pci_negotiate_features_10(struct virtio_softc *, uint64_t); static int virtio_pci_find_cap(struct virtio_pci_softc *, int, void *, int); static int virtio_pci_alloc_interrupts(struct virtio_softc *); static void virtio_pci_free_interrupts(struct virtio_softc *); static int virtio_pci_adjust_config_region(struct virtio_pci_softc *); static int virtio_pci_intr(void *); static int virtio_pci_msix_queue_intr(void *); static int virtio_pci_msix_config_intr(void *); static int virtio_pci_setup_interrupts_09(struct virtio_softc *, int); static int virtio_pci_setup_interrupts_10(struct virtio_softc *, int); static int virtio_pci_establish_msix_interrupts(struct virtio_softc *, const struct pci_attach_args *); static int virtio_pci_establish_intx_interrupt(struct virtio_softc *, const struct pci_attach_args *); static bool virtio_pci_msix_enabled(struct virtio_pci_softc *); #define VIRTIO_MSIX_CONFIG_VECTOR_INDEX 0 #define VIRTIO_MSIX_QUEUE_VECTOR_INDEX 1 /* * For big-endian aarch64/armv7 on QEMU (and most real HW), only CPU cores * are running in big-endian mode, with all peripheral being configured to * little-endian mode. Their default bus_space(9) functions forcibly swap * byte-order. This guarantees that PIO'ed data from pci(4), e.g., are * correctly handled by bus_space(9), while DMA'ed ones should be swapped * by hand, in violation of virtio(4) specifications. */ #if (defined(__aarch64__) || defined(__arm__)) && BYTE_ORDER == BIG_ENDIAN # define READ_ENDIAN_09 BIG_ENDIAN # define READ_ENDIAN_10 BIG_ENDIAN # define STRUCT_ENDIAN_09 BIG_ENDIAN # define STRUCT_ENDIAN_10 LITTLE_ENDIAN #elif BYTE_ORDER == BIG_ENDIAN # define READ_ENDIAN_09 LITTLE_ENDIAN # define READ_ENDIAN_10 BIG_ENDIAN # define STRUCT_ENDIAN_09 BIG_ENDIAN # define STRUCT_ENDIAN_10 LITTLE_ENDIAN #else /* little endian */ # define READ_ENDIAN_09 LITTLE_ENDIAN # define READ_ENDIAN_10 LITTLE_ENDIAN # define STRUCT_ENDIAN_09 LITTLE_ENDIAN # define STRUCT_ENDIAN_10 LITTLE_ENDIAN #endif CFATTACH_DECL3_NEW(virtio_pci, sizeof(struct virtio_pci_softc), virtio_pci_match, virtio_pci_attach, virtio_pci_detach, NULL, virtio_pci_rescan, NULL, 0); static const struct virtio_ops virtio_pci_ops_09 = { .kick = virtio_pci_kick_09, .read_queue_size = virtio_pci_read_queue_size_09, .setup_queue = virtio_pci_setup_queue_09, .set_status = virtio_pci_set_status_09, .neg_features = virtio_pci_negotiate_features_09, .alloc_interrupts = virtio_pci_alloc_interrupts, .free_interrupts = virtio_pci_free_interrupts, .setup_interrupts = virtio_pci_setup_interrupts_09, }; static const struct virtio_ops virtio_pci_ops_10 = { .kick = virtio_pci_kick_10, .read_queue_size = virtio_pci_read_queue_size_10, .setup_queue = virtio_pci_setup_queue_10, .set_status = virtio_pci_set_status_10, .neg_features = virtio_pci_negotiate_features_10, .alloc_interrupts = virtio_pci_alloc_interrupts, .free_interrupts = virtio_pci_free_interrupts, .setup_interrupts = virtio_pci_setup_interrupts_10, }; static int virtio_pci_match(device_t parent, cfdata_t match, void *aux) { const struct pci_attach_args * const pa = aux; switch (PCI_VENDOR(pa->pa_id)) { case PCI_VENDOR_QUMRANET: /* Transitional devices MUST have a PCI Revision ID of 0. */ if (((PCI_PRODUCT_QUMRANET_VIRTIO_1000 <= PCI_PRODUCT(pa->pa_id)) && (PCI_PRODUCT(pa->pa_id) <= PCI_PRODUCT_QUMRANET_VIRTIO_103F)) && PCI_REVISION(pa->pa_class) == 0) return 1; /* * Non-transitional devices SHOULD have a PCI Revision * ID of 1 or higher. Drivers MUST match any PCI * Revision ID value. */ if (((PCI_PRODUCT_QUMRANET_VIRTIO_1040 <= PCI_PRODUCT(pa->pa_id)) && (PCI_PRODUCT(pa->pa_id) <= PCI_PRODUCT_QUMRANET_VIRTIO_107F)) && /* XXX: TODO */ PCI_REVISION(pa->pa_class) == 1) return 1; break; } return 0; } static void virtio_pci_attach(device_t parent, device_t self, void *aux) { struct virtio_pci_softc * const psc = device_private(self); struct virtio_softc * const sc = &psc->sc_sc; const struct pci_attach_args * const pa = aux; pci_chipset_tag_t pc = pa->pa_pc; pcitag_t tag = pa->pa_tag; int revision; int ret; pcireg_t id; pcireg_t csr; revision = PCI_REVISION(pa->pa_class); switch (revision) { case 0: /* subsystem ID shows what I am */ id = PCI_SUBSYS_ID(pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG)); break; case 1: /* pci product number shows what I am */ id = PCI_PRODUCT(pa->pa_id) - PCI_PRODUCT_QUMRANET_VIRTIO_1040; break; default: aprint_normal(": unknown revision 0x%02x; giving up\n", revision); return; } aprint_normal("\n"); aprint_naive("\n"); virtio_print_device_type(self, id, revision); csr = pci_conf_read(pc, tag, PCI_COMMAND_STATUS_REG); csr |= PCI_COMMAND_MASTER_ENABLE | PCI_COMMAND_IO_ENABLE; pci_conf_write(pc, tag, PCI_COMMAND_STATUS_REG, csr); sc->sc_dev = self; psc->sc_pa = *pa; psc->sc_iot = pa->pa_iot; sc->sc_dmat = pa->pa_dmat; if (pci_dma64_available(pa)) sc->sc_dmat = pa->pa_dmat64; /* attach is dependent on revision */ ret = 0; if (revision == 1) { /* try to attach 1.0 */ ret = virtio_pci_attach_10(self, aux); } if (ret == 0 && revision == 0) { /* * revision 0 means 0.9 only or both 0.9 and 1.0. The * latter are so-called "Transitional Devices". For * those devices, we want to use the 1.0 interface if * possible. * * XXX Currently only on platforms that require 1.0 * XXX features, such as VIRTIO_F_ACCESS_PLATFORM. */ #ifdef __NEED_VIRTIO_F_ACCESS_PLATFORM /* First, try to attach 1.0 */ ret = virtio_pci_attach_10(self, aux); if (ret != 0) { aprint_error_dev(self, "VirtIO 1.0 error = %d, falling back to 0.9\n", ret); /* Fall back to 0.9. */ ret = virtio_pci_attach_09(self, aux); } #else ret = virtio_pci_attach_09(self, aux); #endif /* __NEED_VIRTIO_F_ACCESS_PLATFORM */ } if (ret) { aprint_error_dev(self, "cannot attach (%d)\n", ret); return; } KASSERT(sc->sc_ops); /* preset config region */ psc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI; if (virtio_pci_adjust_config_region(psc)) return; /* generic */ virtio_device_reset(sc); virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK); virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER); sc->sc_childdevid = id; sc->sc_child = NULL; virtio_pci_rescan(self, NULL, NULL); return; } /* ARGSUSED */ static int virtio_pci_rescan(device_t self, const char *ifattr, const int *locs) { struct virtio_pci_softc * const psc = device_private(self); struct virtio_softc * const sc = &psc->sc_sc; struct virtio_attach_args va; if (sc->sc_child) /* Child already attached? */ return 0; memset(&va, 0, sizeof(va)); va.sc_childdevid = sc->sc_childdevid; config_found(self, &va, NULL, CFARGS_NONE); if (virtio_attach_failed(sc)) return 0; return 0; } static int virtio_pci_detach(device_t self, int flags) { struct virtio_pci_softc * const psc = device_private(self); struct virtio_softc * const sc = &psc->sc_sc; unsigned i; int r; r = config_detach_children(self, flags); if (r != 0) return r; /* Check that child never attached, or detached properly */ KASSERT(sc->sc_child == NULL); KASSERT(sc->sc_vqs == NULL); KASSERT(psc->sc_ihs_num == 0); if (sc->sc_version_1) { for (i = 0; i < __arraycount(psc->sc_bars_iot); i++) { if (psc->sc_bars_iosize[i] == 0) continue; bus_space_unmap(psc->sc_bars_iot[i], psc->sc_bars_ioh[i], psc->sc_bars_iosize[i]); psc->sc_bars_iosize[i] = 0; } } else { if (psc->sc_iosize) { bus_space_unmap(psc->sc_iot, psc->sc_ioh, psc->sc_iosize); psc->sc_iosize = 0; } } return 0; } static int virtio_pci_attach_09(device_t self, void *aux) { struct virtio_pci_softc * const psc = device_private(self); const struct pci_attach_args * const pa = aux; struct virtio_softc * const sc = &psc->sc_sc; /* complete IO region */ if (pci_mapreg_map(pa, PCI_MAPREG_START, PCI_MAPREG_TYPE_IO, 0, &psc->sc_iot, &psc->sc_ioh, NULL, &psc->sc_iosize)) { aprint_error_dev(self, "can't map i/o space\n"); return EIO; } /* queue space */ if (bus_space_subregion(psc->sc_iot, psc->sc_ioh, VIRTIO_CONFIG_QUEUE_NOTIFY, 2, &psc->sc_notify_ioh)) { aprint_error_dev(self, "can't map notify i/o space\n"); return EIO; } psc->sc_notify_iosize = 2; psc->sc_notify_iot = psc->sc_iot; /* ISR space */ if (bus_space_subregion(psc->sc_iot, psc->sc_ioh, VIRTIO_CONFIG_ISR_STATUS, 1, &psc->sc_isr_ioh)) { aprint_error_dev(self, "can't map isr i/o space\n"); return EIO; } psc->sc_isr_iosize = 1; psc->sc_isr_iot = psc->sc_iot; /* set our version 0.9 ops */ sc->sc_ops = &virtio_pci_ops_09; sc->sc_bus_endian = READ_ENDIAN_09; sc->sc_struct_endian = STRUCT_ENDIAN_09; return 0; } static int virtio_pci_attach_10(device_t self, void *aux) { struct virtio_pci_softc * const psc = device_private(self); const struct pci_attach_args * const pa = aux; struct virtio_softc * const sc = &psc->sc_sc; const pci_chipset_tag_t pc = pa->pa_pc; const pcitag_t tag = pa->pa_tag; struct virtio_pci_cap common, isr, device; struct virtio_pci_notify_cap notify; int have_device_cfg = 0; bus_size_t bars[NMAPREG] = { 0 }; int bars_idx[NMAPREG] = { 0 }; struct virtio_pci_cap * const caps[] = { &common, &isr, &device, ¬ify.cap }; int i, j, ret = 0; if (virtio_pci_find_cap(psc, VIRTIO_PCI_CAP_COMMON_CFG, &common, sizeof(common))) return ENODEV; if (virtio_pci_find_cap(psc, VIRTIO_PCI_CAP_NOTIFY_CFG, ¬ify, sizeof(notify))) return ENODEV; if (virtio_pci_find_cap(psc, VIRTIO_PCI_CAP_ISR_CFG, &isr, sizeof(isr))) return ENODEV; if (virtio_pci_find_cap(psc, VIRTIO_PCI_CAP_DEVICE_CFG, &device, sizeof(device))) memset(&device, 0, sizeof(device)); else have_device_cfg = 1; /* Figure out which bars we need to map */ for (i = 0; i < __arraycount(caps); i++) { int bar = caps[i]->bar; bus_size_t len = caps[i]->offset + caps[i]->length; if (caps[i]->length == 0) continue; if (bars[bar] < len) bars[bar] = len; } for (i = j = 0; i < __arraycount(bars); i++) { int reg; pcireg_t type; if (bars[i] == 0) continue; reg = PCI_BAR(i); type = pci_mapreg_type(pc, tag, reg); if (pci_mapreg_map(pa, reg, type, 0, &psc->sc_bars_iot[j], &psc->sc_bars_ioh[j], NULL, &psc->sc_bars_iosize[j])) { aprint_error_dev(self, "can't map bar %u \n", i); ret = EIO; goto err; } aprint_debug_dev(self, "bar[%d]: iot %p, size 0x%" PRIxBUSSIZE "\n", j, psc->sc_bars_iot[j], psc->sc_bars_iosize[j]); bars_idx[i] = j; j++; } i = bars_idx[notify.cap.bar]; if (bus_space_subregion(psc->sc_bars_iot[i], psc->sc_bars_ioh[i], notify.cap.offset, notify.cap.length, &psc->sc_notify_ioh)) { aprint_error_dev(self, "can't map notify i/o space\n"); ret = EIO; goto err; } psc->sc_notify_iosize = notify.cap.length; psc->sc_notify_iot = psc->sc_bars_iot[i]; psc->sc_notify_off_multiplier = le32toh(notify.notify_off_multiplier); if (have_device_cfg) { i = bars_idx[device.bar]; if (bus_space_subregion(psc->sc_bars_iot[i], psc->sc_bars_ioh[i], device.offset, device.length, &sc->sc_devcfg_ioh)) { aprint_error_dev(self, "can't map devcfg i/o space\n"); ret = EIO; goto err; } aprint_debug_dev(self, "device.offset = 0x%x, device.length = 0x%x\n", device.offset, device.length); sc->sc_devcfg_iosize = device.length; sc->sc_devcfg_iot = psc->sc_bars_iot[i]; } i = bars_idx[isr.bar]; if (bus_space_subregion(psc->sc_bars_iot[i], psc->sc_bars_ioh[i], isr.offset, isr.length, &psc->sc_isr_ioh)) { aprint_error_dev(self, "can't map isr i/o space\n"); ret = EIO; goto err; } psc->sc_isr_iosize = isr.length; psc->sc_isr_iot = psc->sc_bars_iot[i]; i = bars_idx[common.bar]; if (bus_space_subregion(psc->sc_bars_iot[i], psc->sc_bars_ioh[i], common.offset, common.length, &psc->sc_ioh)) { aprint_error_dev(self, "can't map common i/o space\n"); ret = EIO; goto err; } psc->sc_iosize = common.length; psc->sc_iot = psc->sc_bars_iot[i]; psc->sc_sc.sc_version_1 = 1; /* set our version 1.0 ops */ sc->sc_ops = &virtio_pci_ops_10; sc->sc_bus_endian = READ_ENDIAN_10; sc->sc_struct_endian = STRUCT_ENDIAN_10; return 0; err: /* undo our pci_mapreg_map()s */ for (i = 0; i < __arraycount(bars); i++) { if (psc->sc_bars_iosize[i] == 0) continue; bus_space_unmap(psc->sc_bars_iot[i], psc->sc_bars_ioh[i], psc->sc_bars_iosize[i]); psc->sc_bars_iosize[i] = 0; } return ret; } /* v1.0 attach helper */ static int virtio_pci_find_cap(struct virtio_pci_softc *psc, int cfg_type, void *buf, int buflen) { device_t self = psc->sc_sc.sc_dev; pci_chipset_tag_t pc = psc->sc_pa.pa_pc; pcitag_t tag = psc->sc_pa.pa_tag; unsigned int offset, i, len; union { pcireg_t reg[8]; struct virtio_pci_cap vcap; } *v = buf; if (buflen < sizeof(struct virtio_pci_cap)) return ERANGE; if (!pci_get_capability(pc, tag, PCI_CAP_VENDSPEC, &offset, &v->reg[0])) return ENOENT; do { for (i = 0; i < 4; i++) v->reg[i] = le32toh(pci_conf_read(pc, tag, offset + i * 4)); if (v->vcap.cfg_type == cfg_type) break; offset = v->vcap.cap_next; } while (offset != 0); if (offset == 0) return ENOENT; if (v->vcap.cap_len > sizeof(struct virtio_pci_cap)) { len = roundup(v->vcap.cap_len, sizeof(pcireg_t)); if (len > buflen) { aprint_error_dev(self, "%s cap too large\n", __func__); return ERANGE; } for (i = 4; i < len / sizeof(pcireg_t); i++) v->reg[i] = le32toh(pci_conf_read(pc, tag, offset + i * 4)); } /* endian fixup */ v->vcap.offset = le32toh(v->vcap.offset); v->vcap.length = le32toh(v->vcap.length); return 0; } /* ------------------------------------- * Version 0.9 support * -------------------------------------*/ static void virtio_pci_kick_09(struct virtio_softc *sc, uint16_t idx) { struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); bus_space_write_2(psc->sc_notify_iot, psc->sc_notify_ioh, 0, idx); } /* only applicable for v 0.9 but also called for 1.0 */ static int virtio_pci_adjust_config_region(struct virtio_pci_softc *psc) { struct virtio_softc * const sc = &psc->sc_sc; device_t self = sc->sc_dev; if (psc->sc_sc.sc_version_1) return 0; sc->sc_devcfg_iosize = psc->sc_iosize - psc->sc_devcfg_offset; sc->sc_devcfg_iot = psc->sc_iot; if (bus_space_subregion(psc->sc_iot, psc->sc_ioh, psc->sc_devcfg_offset, sc->sc_devcfg_iosize, &sc->sc_devcfg_ioh)) { aprint_error_dev(self, "can't map config i/o space\n"); return EIO; } return 0; } static uint16_t virtio_pci_read_queue_size_09(struct virtio_softc *sc, uint16_t idx) { struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); bus_space_write_2(psc->sc_iot, psc->sc_ioh, VIRTIO_CONFIG_QUEUE_SELECT, idx); return bus_space_read_2(psc->sc_iot, psc->sc_ioh, VIRTIO_CONFIG_QUEUE_SIZE); } static void virtio_pci_setup_queue_09(struct virtio_softc *sc, uint16_t idx, uint64_t addr) { struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); bus_space_write_2(psc->sc_iot, psc->sc_ioh, VIRTIO_CONFIG_QUEUE_SELECT, idx); bus_space_write_4(psc->sc_iot, psc->sc_ioh, VIRTIO_CONFIG_QUEUE_ADDRESS, addr / VIRTIO_PAGE_SIZE); if (psc->sc_ihs_num > 1) { int vec = VIRTIO_MSIX_QUEUE_VECTOR_INDEX; if (psc->sc_intr_pervq) vec += idx; bus_space_write_2(psc->sc_iot, psc->sc_ioh, VIRTIO_CONFIG_MSI_QUEUE_VECTOR, vec); } } static void virtio_pci_set_status_09(struct virtio_softc *sc, int status) { struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); int old = 0; if (status != 0) { old = bus_space_read_1(psc->sc_iot, psc->sc_ioh, VIRTIO_CONFIG_DEVICE_STATUS); } bus_space_write_1(psc->sc_iot, psc->sc_ioh, VIRTIO_CONFIG_DEVICE_STATUS, status|old); } static void virtio_pci_negotiate_features_09(struct virtio_softc *sc, uint64_t guest_features) { struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); uint32_t r; r = bus_space_read_4(psc->sc_iot, psc->sc_ioh, VIRTIO_CONFIG_DEVICE_FEATURES); r &= guest_features; bus_space_write_4(psc->sc_iot, psc->sc_ioh, VIRTIO_CONFIG_GUEST_FEATURES, r); sc->sc_active_features = r; } /* ------------------------------------- * Version 1.0 support * -------------------------------------*/ static void virtio_pci_kick_10(struct virtio_softc *sc, uint16_t idx) { struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); unsigned offset = sc->sc_vqs[idx].vq_notify_off * psc->sc_notify_off_multiplier; bus_space_write_2(psc->sc_notify_iot, psc->sc_notify_ioh, offset, idx); } static uint16_t virtio_pci_read_queue_size_10(struct virtio_softc *sc, uint16_t idx) { struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); bus_space_tag_t iot = psc->sc_iot; bus_space_handle_t ioh = psc->sc_ioh; bus_space_write_2(iot, ioh, VIRTIO_CONFIG1_QUEUE_SELECT, idx); return bus_space_read_2(iot, ioh, VIRTIO_CONFIG1_QUEUE_SIZE); } /* * By definition little endian only in v1.0. NB: "MAY" in the text * below refers to "independently" (i.e. the order of accesses) not * "32-bit" (which is restricted by the earlier "MUST"). * * 4.1.3.1 Driver Requirements: PCI Device Layout * * For device configuration access, the driver MUST use ... 32-bit * wide and aligned accesses for ... 64-bit wide fields. For 64-bit * fields, the driver MAY access each of the high and low 32-bit parts * of the field independently. */ static __inline void virtio_pci_bus_space_write_8(bus_space_tag_t iot, bus_space_handle_t ioh, bus_size_t offset, uint64_t value) { bus_space_write_4(iot, ioh, offset, BUS_ADDR_LO32(value)); bus_space_write_4(iot, ioh, offset + 4, BUS_ADDR_HI32(value)); } static void virtio_pci_setup_queue_10(struct virtio_softc *sc, uint16_t idx, uint64_t addr) { struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); struct virtqueue *vq = &sc->sc_vqs[idx]; bus_space_tag_t iot = psc->sc_iot; bus_space_handle_t ioh = psc->sc_ioh; KASSERT(vq->vq_index == idx); bus_space_write_2(iot, ioh, VIRTIO_CONFIG1_QUEUE_SELECT, vq->vq_index); if (addr == 0) { bus_space_write_2(iot, ioh, VIRTIO_CONFIG1_QUEUE_ENABLE, 0); virtio_pci_bus_space_write_8(iot, ioh, VIRTIO_CONFIG1_QUEUE_DESC, 0); virtio_pci_bus_space_write_8(iot, ioh, VIRTIO_CONFIG1_QUEUE_AVAIL, 0); virtio_pci_bus_space_write_8(iot, ioh, VIRTIO_CONFIG1_QUEUE_USED, 0); } else { virtio_pci_bus_space_write_8(iot, ioh, VIRTIO_CONFIG1_QUEUE_DESC, addr); virtio_pci_bus_space_write_8(iot, ioh, VIRTIO_CONFIG1_QUEUE_AVAIL, addr + vq->vq_availoffset); virtio_pci_bus_space_write_8(iot, ioh, VIRTIO_CONFIG1_QUEUE_USED, addr + vq->vq_usedoffset); bus_space_write_2(iot, ioh, VIRTIO_CONFIG1_QUEUE_ENABLE, 1); vq->vq_notify_off = bus_space_read_2(iot, ioh, VIRTIO_CONFIG1_QUEUE_NOTIFY_OFF); } if (psc->sc_ihs_num > 1) { int vec = VIRTIO_MSIX_QUEUE_VECTOR_INDEX; if (psc->sc_intr_pervq) vec += idx; bus_space_write_2(iot, ioh, VIRTIO_CONFIG1_QUEUE_MSIX_VECTOR, vec); } } static void virtio_pci_set_status_10(struct virtio_softc *sc, int status) { struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); bus_space_tag_t iot = psc->sc_iot; bus_space_handle_t ioh = psc->sc_ioh; int old = 0; if (status) old = bus_space_read_1(iot, ioh, VIRTIO_CONFIG1_DEVICE_STATUS); bus_space_write_1(iot, ioh, VIRTIO_CONFIG1_DEVICE_STATUS, status | old); } void virtio_pci_negotiate_features_10(struct virtio_softc *sc, uint64_t guest_features) { struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); device_t self = sc->sc_dev; bus_space_tag_t iot = psc->sc_iot; bus_space_handle_t ioh = psc->sc_ioh; uint64_t host, negotiated, device_status; guest_features |= VIRTIO_F_VERSION_1; #ifdef __NEED_VIRTIO_F_ACCESS_PLATFORM /* XXX This could use some work. */ guest_features |= VIRTIO_F_ACCESS_PLATFORM; #endif /* __NEED_VIRTIO_F_ACCESS_PLATFORM */ /* notify on empty is 0.9 only */ guest_features &= ~VIRTIO_F_NOTIFY_ON_EMPTY; sc->sc_active_features = 0; bus_space_write_4(iot, ioh, VIRTIO_CONFIG1_DEVICE_FEATURE_SELECT, 0); host = bus_space_read_4(iot, ioh, VIRTIO_CONFIG1_DEVICE_FEATURE); bus_space_write_4(iot, ioh, VIRTIO_CONFIG1_DEVICE_FEATURE_SELECT, 1); host |= (uint64_t)bus_space_read_4(iot, ioh, VIRTIO_CONFIG1_DEVICE_FEATURE) << 32; negotiated = host & guest_features; bus_space_write_4(iot, ioh, VIRTIO_CONFIG1_DRIVER_FEATURE_SELECT, 0); bus_space_write_4(iot, ioh, VIRTIO_CONFIG1_DRIVER_FEATURE, negotiated & 0xffffffff); bus_space_write_4(iot, ioh, VIRTIO_CONFIG1_DRIVER_FEATURE_SELECT, 1); bus_space_write_4(iot, ioh, VIRTIO_CONFIG1_DRIVER_FEATURE, negotiated >> 32); virtio_pci_set_status_10(sc, VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK); device_status = bus_space_read_1(iot, ioh, VIRTIO_CONFIG1_DEVICE_STATUS); if ((device_status & VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK) == 0) { aprint_error_dev(self, "feature negotiation failed\n"); bus_space_write_1(iot, ioh, VIRTIO_CONFIG1_DEVICE_STATUS, VIRTIO_CONFIG_DEVICE_STATUS_FAILED); return; } if ((negotiated & VIRTIO_F_VERSION_1) == 0) { aprint_error_dev(self, "host rejected version 1\n"); bus_space_write_1(iot, ioh, VIRTIO_CONFIG1_DEVICE_STATUS, VIRTIO_CONFIG_DEVICE_STATUS_FAILED); return; } sc->sc_active_features = negotiated; return; } /* ------------------------------------- * Generic PCI interrupt code * -------------------------------------*/ static int virtio_pci_setup_interrupts_10(struct virtio_softc *sc, int reinit) { struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); bus_space_tag_t iot = psc->sc_iot; bus_space_handle_t ioh = psc->sc_ioh; int vector, ret, qid; if (!virtio_pci_msix_enabled(psc)) return 0; vector = VIRTIO_MSIX_CONFIG_VECTOR_INDEX; bus_space_write_2(iot, ioh, VIRTIO_CONFIG1_CONFIG_MSIX_VECTOR, vector); ret = bus_space_read_2(iot, ioh, VIRTIO_CONFIG1_CONFIG_MSIX_VECTOR); if (ret != vector) { VIRTIO_PCI_LOG(sc, reinit, "can't set config msix vector\n"); return -1; } for (qid = 0; qid < sc->sc_nvqs; qid++) { vector = VIRTIO_MSIX_QUEUE_VECTOR_INDEX; if (psc->sc_intr_pervq) vector += qid; bus_space_write_2(iot, ioh, VIRTIO_CONFIG1_QUEUE_SELECT, qid); bus_space_write_2(iot, ioh, VIRTIO_CONFIG1_QUEUE_MSIX_VECTOR, vector); ret = bus_space_read_2(iot, ioh, VIRTIO_CONFIG1_QUEUE_MSIX_VECTOR); if (ret != vector) { VIRTIO_PCI_LOG(sc, reinit, "can't set queue %d " "msix vector\n", qid); return -1; } } return 0; } static int virtio_pci_setup_interrupts_09(struct virtio_softc *sc, int reinit) { struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); int offset, vector, ret, qid; if (!virtio_pci_msix_enabled(psc)) return 0; offset = VIRTIO_CONFIG_MSI_CONFIG_VECTOR; vector = VIRTIO_MSIX_CONFIG_VECTOR_INDEX; bus_space_write_2(psc->sc_iot, psc->sc_ioh, offset, vector); ret = bus_space_read_2(psc->sc_iot, psc->sc_ioh, offset); if (ret != vector) { aprint_debug_dev(sc->sc_dev, "%s: expected=%d, actual=%d\n", __func__, vector, ret); VIRTIO_PCI_LOG(sc, reinit, "can't set config msix vector\n"); return -1; } for (qid = 0; qid < sc->sc_nvqs; qid++) { offset = VIRTIO_CONFIG_QUEUE_SELECT; bus_space_write_2(psc->sc_iot, psc->sc_ioh, offset, qid); offset = VIRTIO_CONFIG_MSI_QUEUE_VECTOR; vector = VIRTIO_MSIX_QUEUE_VECTOR_INDEX; if (psc->sc_intr_pervq) vector += qid; bus_space_write_2(psc->sc_iot, psc->sc_ioh, offset, vector); ret = bus_space_read_2(psc->sc_iot, psc->sc_ioh, offset); if (ret != vector) { aprint_debug_dev(sc->sc_dev, "%s[qid=%d]:" " expected=%d, actual=%d\n", __func__, qid, vector, ret); VIRTIO_PCI_LOG(sc, reinit, "can't set queue %d " "msix vector\n", qid); return -1; } } return 0; } static int virtio_pci_establish_msix_interrupts(struct virtio_softc *sc, const struct pci_attach_args *pa) { struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); device_t self = sc->sc_dev; pci_chipset_tag_t pc = pa->pa_pc; struct virtqueue *vq; char intrbuf[PCI_INTRSTR_LEN]; char intr_xname[INTRDEVNAMEBUF]; char const *intrstr; int idx, qid, n; idx = VIRTIO_MSIX_CONFIG_VECTOR_INDEX; if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE) pci_intr_setattr(pc, &psc->sc_ihp[idx], PCI_INTR_MPSAFE, true); snprintf(intr_xname, sizeof(intr_xname), "%s config", device_xname(sc->sc_dev)); psc->sc_ihs[idx] = pci_intr_establish_xname(pc, psc->sc_ihp[idx], sc->sc_ipl, virtio_pci_msix_config_intr, sc, intr_xname); if (psc->sc_ihs[idx] == NULL) { aprint_error_dev(self, "couldn't establish MSI-X for config\n"); goto error; } idx = VIRTIO_MSIX_QUEUE_VECTOR_INDEX; if (psc->sc_intr_pervq) { for (qid = 0; qid < sc->sc_nvqs; qid++) { n = idx + qid; vq = &sc->sc_vqs[qid]; snprintf(intr_xname, sizeof(intr_xname), "%s vq#%d", device_xname(sc->sc_dev), qid); if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE) { pci_intr_setattr(pc, &psc->sc_ihp[n], PCI_INTR_MPSAFE, true); } psc->sc_ihs[n] = pci_intr_establish_xname(pc, psc->sc_ihp[n], sc->sc_ipl, vq->vq_intrhand, vq->vq_intrhand_arg, intr_xname); if (psc->sc_ihs[n] == NULL) { aprint_error_dev(self, "couldn't establish MSI-X for a vq\n"); goto error; } } } else { if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE) { pci_intr_setattr(pc, &psc->sc_ihp[idx], PCI_INTR_MPSAFE, true); } snprintf(intr_xname, sizeof(intr_xname), "%s queues", device_xname(sc->sc_dev)); psc->sc_ihs[idx] = pci_intr_establish_xname(pc, psc->sc_ihp[idx], sc->sc_ipl, virtio_pci_msix_queue_intr, sc, intr_xname); if (psc->sc_ihs[idx] == NULL) { aprint_error_dev(self, "couldn't establish MSI-X for queues\n"); goto error; } } idx = VIRTIO_MSIX_CONFIG_VECTOR_INDEX; intrstr = pci_intr_string(pc, psc->sc_ihp[idx], intrbuf, sizeof(intrbuf)); aprint_normal_dev(self, "config interrupting at %s\n", intrstr); idx = VIRTIO_MSIX_QUEUE_VECTOR_INDEX; if (psc->sc_intr_pervq) { kcpuset_t *affinity; int affinity_to, r; kcpuset_create(&affinity, false); for (qid = 0; qid < sc->sc_nvqs; qid++) { n = idx + qid; affinity_to = (qid / 2) % ncpu; intrstr = pci_intr_string(pc, psc->sc_ihp[n], intrbuf, sizeof(intrbuf)); kcpuset_zero(affinity); kcpuset_set(affinity, affinity_to); r = interrupt_distribute(psc->sc_ihs[n], affinity, NULL); if (r == 0) { aprint_normal_dev(self, "for vq #%d interrupting at %s" " affinity to %u\n", qid, intrstr, affinity_to); } else { aprint_normal_dev(self, "for vq #%d interrupting at %s\n", qid, intrstr); } } kcpuset_destroy(affinity); } else { intrstr = pci_intr_string(pc, psc->sc_ihp[idx], intrbuf, sizeof(intrbuf)); aprint_normal_dev(self, "queues interrupting at %s\n", intrstr); } return 0; error: idx = VIRTIO_MSIX_CONFIG_VECTOR_INDEX; if (psc->sc_ihs[idx] != NULL) pci_intr_disestablish(psc->sc_pa.pa_pc, psc->sc_ihs[idx]); idx = VIRTIO_MSIX_QUEUE_VECTOR_INDEX; if (psc->sc_intr_pervq) { for (qid = 0; qid < sc->sc_nvqs; qid++) { n = idx + qid; if (psc->sc_ihs[n] == NULL) continue; pci_intr_disestablish(psc->sc_pa.pa_pc, psc->sc_ihs[n]); } } else { if (psc->sc_ihs[idx] != NULL) { pci_intr_disestablish(psc->sc_pa.pa_pc, psc->sc_ihs[idx]); } } return -1; } static int virtio_pci_establish_intx_interrupt(struct virtio_softc *sc, const struct pci_attach_args *pa) { struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); device_t self = sc->sc_dev; pci_chipset_tag_t pc = pa->pa_pc; char intrbuf[PCI_INTRSTR_LEN]; char const *intrstr; if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE) pci_intr_setattr(pc, &psc->sc_ihp[0], PCI_INTR_MPSAFE, true); psc->sc_ihs[0] = pci_intr_establish_xname(pc, psc->sc_ihp[0], sc->sc_ipl, virtio_pci_intr, sc, device_xname(sc->sc_dev)); if (psc->sc_ihs[0] == NULL) { aprint_error_dev(self, "couldn't establish INTx\n"); return -1; } intrstr = pci_intr_string(pc, psc->sc_ihp[0], intrbuf, sizeof(intrbuf)); aprint_normal_dev(self, "interrupting at %s\n", intrstr); return 0; } static int virtio_pci_alloc_interrupts(struct virtio_softc *sc) { struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); device_t self = sc->sc_dev; pci_chipset_tag_t pc = psc->sc_pa.pa_pc; pcitag_t tag = psc->sc_pa.pa_tag; int error; int nmsix; int off; int counts[PCI_INTR_TYPE_SIZE]; pci_intr_type_t max_type; pcireg_t ctl; nmsix = pci_msix_count(psc->sc_pa.pa_pc, psc->sc_pa.pa_tag); aprint_debug_dev(self, "pci_msix_count=%d\n", nmsix); /* We need at least two: one for config and the other for queues */ if ((sc->sc_flags & VIRTIO_F_INTR_MSIX) == 0 || nmsix < 2) { /* Try INTx only */ max_type = PCI_INTR_TYPE_INTX; counts[PCI_INTR_TYPE_INTX] = 1; } else { /* Try MSI-X first and INTx second */ if (ISSET(sc->sc_flags, VIRTIO_F_INTR_PERVQ) && sc->sc_nvqs + VIRTIO_MSIX_QUEUE_VECTOR_INDEX <= nmsix) { nmsix = sc->sc_nvqs + VIRTIO_MSIX_QUEUE_VECTOR_INDEX; } else { nmsix = 2; } max_type = PCI_INTR_TYPE_MSIX; counts[PCI_INTR_TYPE_MSIX] = nmsix; counts[PCI_INTR_TYPE_MSI] = 0; counts[PCI_INTR_TYPE_INTX] = 1; } retry: error = pci_intr_alloc(&psc->sc_pa, &psc->sc_ihp, counts, max_type); if (error != 0) { aprint_error_dev(self, "couldn't map interrupt\n"); return -1; } if (pci_intr_type(pc, psc->sc_ihp[0]) == PCI_INTR_TYPE_MSIX) { psc->sc_intr_pervq = nmsix > 2 ? true : false; psc->sc_ihs = kmem_zalloc(sizeof(*psc->sc_ihs) * nmsix, KM_SLEEP); error = virtio_pci_establish_msix_interrupts(sc, &psc->sc_pa); if (error != 0) { kmem_free(psc->sc_ihs, sizeof(*psc->sc_ihs) * nmsix); pci_intr_release(pc, psc->sc_ihp, nmsix); /* Retry INTx */ max_type = PCI_INTR_TYPE_INTX; counts[PCI_INTR_TYPE_INTX] = 1; goto retry; } psc->sc_ihs_num = nmsix; psc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_MSI; virtio_pci_adjust_config_region(psc); } else if (pci_intr_type(pc, psc->sc_ihp[0]) == PCI_INTR_TYPE_INTX) { psc->sc_intr_pervq = false; psc->sc_ihs = kmem_zalloc(sizeof(*psc->sc_ihs) * 1, KM_SLEEP); error = virtio_pci_establish_intx_interrupt(sc, &psc->sc_pa); if (error != 0) { kmem_free(psc->sc_ihs, sizeof(*psc->sc_ihs) * 1); pci_intr_release(pc, psc->sc_ihp, 1); return -1; } psc->sc_ihs_num = 1; psc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI; virtio_pci_adjust_config_region(psc); error = pci_get_capability(pc, tag, PCI_CAP_MSIX, &off, NULL); if (error != 0) { ctl = pci_conf_read(pc, tag, off + PCI_MSIX_CTL); ctl &= ~PCI_MSIX_CTL_ENABLE; pci_conf_write(pc, tag, off + PCI_MSIX_CTL, ctl); } } if (!psc->sc_intr_pervq) CLR(sc->sc_flags, VIRTIO_F_INTR_PERVQ); return 0; } static void virtio_pci_free_interrupts(struct virtio_softc *sc) { struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); for (int i = 0; i < psc->sc_ihs_num; i++) { if (psc->sc_ihs[i] == NULL) continue; pci_intr_disestablish(psc->sc_pa.pa_pc, psc->sc_ihs[i]); psc->sc_ihs[i] = NULL; } if (psc->sc_ihs_num > 0) { pci_intr_release(psc->sc_pa.pa_pc, psc->sc_ihp, psc->sc_ihs_num); } if (psc->sc_ihs != NULL) { kmem_free(psc->sc_ihs, sizeof(*psc->sc_ihs) * psc->sc_ihs_num); psc->sc_ihs = NULL; } psc->sc_ihs_num = 0; } static bool virtio_pci_msix_enabled(struct virtio_pci_softc *psc) { pci_chipset_tag_t pc = psc->sc_pa.pa_pc; if (pci_intr_type(pc, psc->sc_ihp[0]) == PCI_INTR_TYPE_MSIX) return true; return false; } /* * Interrupt handler. */ static int virtio_pci_intr(void *arg) { struct virtio_softc *sc = arg; struct virtio_pci_softc * const psc = container_of(sc, struct virtio_pci_softc, sc_sc); int isr, r = 0; /* check and ack the interrupt */ isr = bus_space_read_1(psc->sc_isr_iot, psc->sc_isr_ioh, 0); if (isr == 0) return 0; if ((isr & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) && (sc->sc_config_change != NULL)) r = (sc->sc_config_change)(sc); if (sc->sc_intrhand != NULL) { if (sc->sc_soft_ih != NULL) softint_schedule(sc->sc_soft_ih); else r |= (sc->sc_intrhand)(sc); } return r; } static int virtio_pci_msix_queue_intr(void *arg) { struct virtio_softc *sc = arg; int r = 0; if (sc->sc_intrhand != NULL) { if (sc->sc_soft_ih != NULL) softint_schedule(sc->sc_soft_ih); else r |= (sc->sc_intrhand)(sc); } return r; } static int virtio_pci_msix_config_intr(void *arg) { struct virtio_softc *sc = arg; int r = 0; if (sc->sc_config_change != NULL) r = (sc->sc_config_change)(sc); return r; } MODULE(MODULE_CLASS_DRIVER, virtio_pci, "pci,virtio"); #ifdef _MODULE #include "ioconf.c" #endif static int virtio_pci_modcmd(modcmd_t cmd, void *opaque) { int error = 0; #ifdef _MODULE switch (cmd) { case MODULE_CMD_INIT: error = config_init_component(cfdriver_ioconf_virtio_pci, cfattach_ioconf_virtio_pci, cfdata_ioconf_virtio_pci); break; case MODULE_CMD_FINI: error = config_fini_component(cfdriver_ioconf_virtio_pci, cfattach_ioconf_virtio_pci, cfdata_ioconf_virtio_pci); break; default: error = ENOTTY; break; } #endif return error; }