/* $OpenBSD: if.c,v 1.721 2024/10/17 05:02:12 jsg Exp $ */ /* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.c 8.3 (Berkeley) 1/4/94 */ #include "bpfilter.h" #include "bridge.h" #include "carp.h" #include "ether.h" #include "pf.h" #include "ppp.h" #include "pppoe.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* uintptr_t */ #include #include #include #include #include #include #include #include "vlan.h" #if NVLAN > 0 #include #endif #include #include #include #ifdef MROUTING #include #endif #include #include #include #ifdef INET6 #include #include #include #include #include #endif #ifdef MPLS #include #endif #if NBPFILTER > 0 #include #endif #if NBRIDGE > 0 #include #endif #if NCARP > 0 #include #endif #if NPF > 0 #include #endif #include void if_attachsetup(struct ifnet *); void if_attach_common(struct ifnet *); void if_remove(struct ifnet *); int if_createrdomain(int, struct ifnet *); int if_setrdomain(struct ifnet *, int); void if_slowtimo(void *); void if_detached_qstart(struct ifqueue *); int if_detached_ioctl(struct ifnet *, u_long, caddr_t); int ifioctl_get(u_long, caddr_t); int ifconf(caddr_t); static int if_sffpage_check(const caddr_t); int if_getgroup(caddr_t, struct ifnet *); int if_getgroupmembers(caddr_t); int if_getgroupattribs(caddr_t); int if_setgroupattribs(caddr_t); int if_getgrouplist(caddr_t); void if_linkstate(struct ifnet *); void if_linkstate_task(void *); int if_clone_list(struct if_clonereq *); struct if_clone *if_clone_lookup(const char *, int *); int if_group_egress_build(void); void if_watchdog_task(void *); void if_netisr(void *); #ifdef DDB void ifa_print_all(void); #endif void if_qstart_compat(struct ifqueue *); /* * interface index map * * the kernel maintains a mapping of interface indexes to struct ifnet * pointers. * * the map is an array of struct ifnet pointers prefixed by an if_map * structure. the if_map structure stores the length of its array. * * as interfaces are attached to the system, the map is grown on demand * up to USHRT_MAX entries. * * interface index 0 is reserved and represents no interface. this * supports the use of the interface index as the scope for IPv6 link * local addresses, where scope 0 means no scope has been specified. * it also supports the use of interface index as the unique identifier * for network interfaces in SNMP applications as per RFC2863. therefore * if_get(0) returns NULL. */ struct ifnet *if_ref(struct ifnet *); /* * struct if_idxmap * * infrastructure to manage updates and accesses to the current if_map. * * interface index 0 is special and represents "no interface", so we * use the 0th slot in map to store the length of the array. */ struct if_idxmap { unsigned int serial; unsigned int count; struct ifnet **map; /* SMR protected */ struct rwlock lock; unsigned char *usedidx; /* bitmap of indices in use */ }; struct if_idxmap_dtor { struct smr_entry smr; struct ifnet **map; }; void if_idxmap_init(unsigned int); void if_idxmap_free(void *); void if_idxmap_alloc(struct ifnet *); void if_idxmap_insert(struct ifnet *); void if_idxmap_remove(struct ifnet *); TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head); /* [N] list of interface groups */ LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners); /* [I] list of clonable interfaces */ int if_cloners_count; /* [I] number of clonable interfaces */ struct rwlock if_cloners_lock = RWLOCK_INITIALIZER("clonelk"); /* hooks should only be added, deleted, and run from a process context */ struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE); void if_hooks_run(struct task_list *); int ifq_congestion; int netisr; struct softnet { char sn_name[16]; struct taskq *sn_taskq; }; #define NET_TASKQ 4 struct softnet softnets[NET_TASKQ]; struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL); /* * Serialize socket operations to ensure no new sleeping points * are introduced in IP output paths. */ struct rwlock netlock = RWLOCK_INITIALIZER("netlock"); /* * Network interface utility routines. */ void ifinit(void) { unsigned int i; /* * most machines boot with 4 or 5 interfaces, so size the initial map * to accommodate this */ if_idxmap_init(8); /* 8 is a nice power of 2 for malloc */ for (i = 0; i < NET_TASKQ; i++) { struct softnet *sn = &softnets[i]; snprintf(sn->sn_name, sizeof(sn->sn_name), "softnet%u", i); sn->sn_taskq = taskq_create(sn->sn_name, 1, IPL_NET, TASKQ_MPSAFE); if (sn->sn_taskq == NULL) panic("unable to create network taskq %d", i); } } static struct if_idxmap if_idxmap; /* * XXXSMP: For `ifnetlist' modification both kernel and net locks * should be taken. For read-only access only one lock of them required. */ struct ifnet_head ifnetlist = TAILQ_HEAD_INITIALIZER(ifnetlist); static inline unsigned int if_idxmap_limit(struct ifnet **if_map) { return ((uintptr_t)if_map[0]); } static inline size_t if_idxmap_usedidx_size(unsigned int limit) { return (max(howmany(limit, NBBY), sizeof(struct if_idxmap_dtor))); } void if_idxmap_init(unsigned int limit) { struct ifnet **if_map; rw_init(&if_idxmap.lock, "idxmaplk"); if_idxmap.serial = 1; /* skip ifidx 0 */ if_map = mallocarray(limit, sizeof(*if_map), M_IFADDR, M_WAITOK | M_ZERO); if_map[0] = (struct ifnet *)(uintptr_t)limit; if_idxmap.usedidx = malloc(if_idxmap_usedidx_size(limit), M_IFADDR, M_WAITOK | M_ZERO); setbit(if_idxmap.usedidx, 0); /* blacklist ifidx 0 */ /* this is called early so there's nothing to race with */ SMR_PTR_SET_LOCKED(&if_idxmap.map, if_map); } void if_idxmap_alloc(struct ifnet *ifp) { struct ifnet **if_map; unsigned int limit; unsigned int index, i; refcnt_init(&ifp->if_refcnt); rw_enter_write(&if_idxmap.lock); if (++if_idxmap.count >= USHRT_MAX) panic("too many interfaces"); if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map); limit = if_idxmap_limit(if_map); index = if_idxmap.serial++ & USHRT_MAX; if (index >= limit) { struct if_idxmap_dtor *dtor; struct ifnet **oif_map; unsigned int olimit; unsigned char *nusedidx; oif_map = if_map; olimit = limit; limit = olimit * 2; if_map = mallocarray(limit, sizeof(*if_map), M_IFADDR, M_WAITOK | M_ZERO); if_map[0] = (struct ifnet *)(uintptr_t)limit; for (i = 1; i < olimit; i++) { struct ifnet *oifp = SMR_PTR_GET_LOCKED(&oif_map[i]); if (oifp == NULL) continue; /* * nif_map isn't visible yet, so don't need * SMR_PTR_SET_LOCKED and its membar. */ if_map[i] = if_ref(oifp); } nusedidx = malloc(if_idxmap_usedidx_size(limit), M_IFADDR, M_WAITOK | M_ZERO); memcpy(nusedidx, if_idxmap.usedidx, howmany(olimit, NBBY)); /* use the old usedidx bitmap as an smr_entry for the if_map */ dtor = (struct if_idxmap_dtor *)if_idxmap.usedidx; if_idxmap.usedidx = nusedidx; SMR_PTR_SET_LOCKED(&if_idxmap.map, if_map); dtor->map = oif_map; smr_init(&dtor->smr); smr_call(&dtor->smr, if_idxmap_free, dtor); } /* pick the next free index */ for (i = 0; i < USHRT_MAX; i++) { if (index != 0 && isclr(if_idxmap.usedidx, index)) break; index = if_idxmap.serial++ & USHRT_MAX; } KASSERT(index != 0 && index < limit); KASSERT(isclr(if_idxmap.usedidx, index)); setbit(if_idxmap.usedidx, index); ifp->if_index = index; rw_exit_write(&if_idxmap.lock); } void if_idxmap_free(void *arg) { struct if_idxmap_dtor *dtor = arg; struct ifnet **oif_map = dtor->map; unsigned int olimit = if_idxmap_limit(oif_map); unsigned int i; for (i = 1; i < olimit; i++) if_put(oif_map[i]); free(oif_map, M_IFADDR, olimit * sizeof(*oif_map)); free(dtor, M_IFADDR, if_idxmap_usedidx_size(olimit)); } void if_idxmap_insert(struct ifnet *ifp) { struct ifnet **if_map; unsigned int index = ifp->if_index; rw_enter_write(&if_idxmap.lock); if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map); KASSERTMSG(index != 0 && index < if_idxmap_limit(if_map), "%s(%p) index %u vs limit %u", ifp->if_xname, ifp, index, if_idxmap_limit(if_map)); KASSERT(SMR_PTR_GET_LOCKED(&if_map[index]) == NULL); KASSERT(isset(if_idxmap.usedidx, index)); /* commit */ SMR_PTR_SET_LOCKED(&if_map[index], if_ref(ifp)); rw_exit_write(&if_idxmap.lock); } void if_idxmap_remove(struct ifnet *ifp) { struct ifnet **if_map; unsigned int index = ifp->if_index; rw_enter_write(&if_idxmap.lock); if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map); KASSERT(index != 0 && index < if_idxmap_limit(if_map)); KASSERT(SMR_PTR_GET_LOCKED(&if_map[index]) == ifp); KASSERT(isset(if_idxmap.usedidx, index)); SMR_PTR_SET_LOCKED(&if_map[index], NULL); if_idxmap.count--; clrbit(if_idxmap.usedidx, index); /* end of if_idxmap modifications */ rw_exit_write(&if_idxmap.lock); smr_barrier(); if_put(ifp); } /* * Attach an interface to the * list of "active" interfaces. */ void if_attachsetup(struct ifnet *ifp) { unsigned long ifidx; NET_ASSERT_LOCKED(); if_addgroup(ifp, IFG_ALL); #ifdef INET6 nd6_ifattach(ifp); #endif #if NPF > 0 pfi_attach_ifnet(ifp); #endif timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp); if_slowtimo(ifp); if_idxmap_insert(ifp); KASSERT(if_get(0) == NULL); ifidx = ifp->if_index; task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx); task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx); /* Announce the interface. */ rtm_ifannounce(ifp, IFAN_ARRIVAL); } /* * Allocate the link level name for the specified interface. This * is an attachment helper. It must be called after ifp->if_addrlen * is initialized, which may not be the case when if_attach() is * called. */ void if_alloc_sadl(struct ifnet *ifp) { unsigned int socksize; int namelen, masklen; struct sockaddr_dl *sdl; /* * If the interface already has a link name, release it * now. This is useful for interfaces that can change * link types, and thus switch link names often. */ if_free_sadl(ifp); namelen = strlen(ifp->if_xname); masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; socksize = masklen + ifp->if_addrlen; #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1))) if (socksize < sizeof(*sdl)) socksize = sizeof(*sdl); socksize = ROUNDUP(socksize); sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO); sdl->sdl_len = socksize; sdl->sdl_family = AF_LINK; bcopy(ifp->if_xname, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl->sdl_alen = ifp->if_addrlen; sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; ifp->if_sadl = sdl; } /* * Free the link level name for the specified interface. This is * a detach helper. This is called from if_detach() or from * link layer type specific detach functions. */ void if_free_sadl(struct ifnet *ifp) { if (ifp->if_sadl == NULL) return; free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len); ifp->if_sadl = NULL; } void if_attachhead(struct ifnet *ifp) { if_attach_common(ifp); NET_LOCK(); TAILQ_INSERT_HEAD(&ifnetlist, ifp, if_list); if_attachsetup(ifp); NET_UNLOCK(); } void if_attach(struct ifnet *ifp) { if_attach_common(ifp); NET_LOCK(); TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_list); if_attachsetup(ifp); NET_UNLOCK(); } void if_attach_queues(struct ifnet *ifp, unsigned int nqs) { struct ifqueue **map; struct ifqueue *ifq; int i; KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs); KASSERT(nqs != 0); map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK); ifp->if_snd.ifq_softc = NULL; map[0] = &ifp->if_snd; for (i = 1; i < nqs; i++) { ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO); ifq_init_maxlen(ifq, ifp->if_snd.ifq_maxlen); ifq_init(ifq, ifp, i); map[i] = ifq; } ifp->if_ifqs = map; ifp->if_nifqs = nqs; } void if_attach_iqueues(struct ifnet *ifp, unsigned int niqs) { struct ifiqueue **map; struct ifiqueue *ifiq; unsigned int i; KASSERT(niqs != 0); map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK); ifp->if_rcv.ifiq_softc = NULL; map[0] = &ifp->if_rcv; for (i = 1; i < niqs; i++) { ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO); ifiq_init(ifiq, ifp, i); map[i] = ifiq; } ifp->if_iqs = map; ifp->if_niqs = niqs; } void if_attach_common(struct ifnet *ifp) { KASSERT(ifp->if_ioctl != NULL); TAILQ_INIT(&ifp->if_addrlist); TAILQ_INIT(&ifp->if_maddrlist); TAILQ_INIT(&ifp->if_groups); if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) { KASSERTMSG(ifp->if_qstart == NULL, "%s: if_qstart set without MPSAFE set", ifp->if_xname); ifp->if_qstart = if_qstart_compat; } else { KASSERTMSG(ifp->if_start == NULL, "%s: if_start set with MPSAFE set", ifp->if_xname); KASSERTMSG(ifp->if_qstart != NULL, "%s: if_qstart not set with MPSAFE set", ifp->if_xname); } if_idxmap_alloc(ifp); ifq_init(&ifp->if_snd, ifp, 0); ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd; ifp->if_ifqs = ifp->if_snd.ifq_ifqs; ifp->if_nifqs = 1; if (ifp->if_txmit == 0) ifp->if_txmit = IF_TXMIT_DEFAULT; ifiq_init(&ifp->if_rcv, ifp, 0); ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv; ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs; ifp->if_niqs = 1; TAILQ_INIT(&ifp->if_addrhooks); TAILQ_INIT(&ifp->if_linkstatehooks); TAILQ_INIT(&ifp->if_detachhooks); if (ifp->if_rtrequest == NULL) ifp->if_rtrequest = if_rtrequest_dummy; if (ifp->if_enqueue == NULL) ifp->if_enqueue = if_enqueue_ifq; #if NBPFILTER > 0 if (ifp->if_bpf_mtap == NULL) ifp->if_bpf_mtap = bpf_mtap_ether; #endif ifp->if_llprio = IFQ_DEFPRIO; } void if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args) { /* * only switch the ifq_ops on the first ifq on an interface. * * the only ifq_ops we provide priq and hfsc, and hfsc only * works on a single ifq. because the code uses the ifq_ops * on the first ifq (if_snd) to select a queue for an mbuf, * by switching only the first one we change both the algorithm * and force the routing of all new packets to it. */ ifq_attach(&ifp->if_snd, newops, args); } void if_start(struct ifnet *ifp) { KASSERT(ifp->if_qstart == if_qstart_compat); if_qstart_compat(&ifp->if_snd); } void if_qstart_compat(struct ifqueue *ifq) { struct ifnet *ifp = ifq->ifq_if; int s; /* * the stack assumes that an interface can have multiple * transmit rings, but a lot of drivers are still written * so that interfaces and send rings have a 1:1 mapping. * this provides compatibility between the stack and the older * drivers by translating from the only queue they have * (ifp->if_snd) back to the interface and calling if_start. */ KERNEL_LOCK(); s = splnet(); (*ifp->if_start)(ifp); splx(s); KERNEL_UNLOCK(); } int if_enqueue(struct ifnet *ifp, struct mbuf *m) { CLR(m->m_pkthdr.csum_flags, M_TIMESTAMP); #if NPF > 0 if (m->m_pkthdr.pf.delay > 0) return (pf_delay_pkt(m, ifp->if_index)); #endif #if NBRIDGE > 0 if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) { int error; error = bridge_enqueue(ifp, m); return (error); } #endif #if NPF > 0 pf_pkt_addr_changed(m); #endif /* NPF > 0 */ return ((*ifp->if_enqueue)(ifp, m)); } int if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m) { struct ifqueue *ifq = &ifp->if_snd; int error; if (ifp->if_nifqs > 1) { unsigned int idx; /* * use the operations on the first ifq to pick which of * the array gets this mbuf. */ idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m); ifq = ifp->if_ifqs[idx]; } error = ifq_enqueue(ifq, m); if (error) return (error); ifq_start(ifq); return (0); } void if_input(struct ifnet *ifp, struct mbuf_list *ml) { ifiq_input(&ifp->if_rcv, ml); } int if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af) { int keepflags, keepcksum; uint16_t keepmss; #if NBPFILTER > 0 /* * Only send packets to bpf if they are destined to local * addresses. * * if_input_local() is also called for SIMPLEX interfaces to * duplicate packets for local use. But don't dup them to bpf. */ if (ifp->if_flags & IFF_LOOPBACK) { caddr_t if_bpf = ifp->if_bpf; if (if_bpf) bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT); } #endif keepflags = m->m_flags & (M_BCAST|M_MCAST); /* * Preserve outgoing checksum flags, in case the packet is * forwarded to another interface. Then the checksum, which * is now incorrect, will be calculated before sending. */ keepcksum = m->m_pkthdr.csum_flags & (M_IPV4_CSUM_OUT | M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT | M_TCP_TSO); keepmss = m->m_pkthdr.ph_mss; m_resethdr(m); m->m_flags |= M_LOOP | keepflags; m->m_pkthdr.csum_flags = keepcksum; m->m_pkthdr.ph_mss = keepmss; m->m_pkthdr.ph_ifidx = ifp->if_index; m->m_pkthdr.ph_rtableid = ifp->if_rdomain; if (ISSET(keepcksum, M_TCP_TSO) && m->m_pkthdr.len > ifp->if_mtu) { if (ifp->if_mtu > 0 && ((af == AF_INET && ISSET(ifp->if_capabilities, IFCAP_TSOv4)) || (af == AF_INET6 && ISSET(ifp->if_capabilities, IFCAP_TSOv6)))) { tcpstat_inc(tcps_inswlro); tcpstat_add(tcps_inpktlro, (m->m_pkthdr.len + ifp->if_mtu - 1) / ifp->if_mtu); } else { tcpstat_inc(tcps_inbadlro); m_freem(m); return (EPROTONOSUPPORT); } } if (ISSET(keepcksum, M_TCP_CSUM_OUT)) m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; if (ISSET(keepcksum, M_UDP_CSUM_OUT)) m->m_pkthdr.csum_flags |= M_UDP_CSUM_IN_OK; if (ISSET(keepcksum, M_ICMP_CSUM_OUT)) m->m_pkthdr.csum_flags |= M_ICMP_CSUM_IN_OK; /* do not count multicast loopback and simplex interfaces */ if (ISSET(ifp->if_flags, IFF_LOOPBACK)) { counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes, m->m_pkthdr.len); } switch (af) { case AF_INET: if (ISSET(keepcksum, M_IPV4_CSUM_OUT)) m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK; ipv4_input(ifp, m); break; #ifdef INET6 case AF_INET6: ipv6_input(ifp, m); break; #endif /* INET6 */ #ifdef MPLS case AF_MPLS: mpls_input(ifp, m); break; #endif /* MPLS */ default: printf("%s: can't handle af%d\n", ifp->if_xname, af); m_freem(m); return (EAFNOSUPPORT); } return (0); } int if_output_ml(struct ifnet *ifp, struct mbuf_list *ml, struct sockaddr *dst, struct rtentry *rt) { struct mbuf *m; int error = 0; while ((m = ml_dequeue(ml)) != NULL) { error = ifp->if_output(ifp, m, dst, rt); if (error) break; } if (error) ml_purge(ml); return error; } int if_output_tso(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst, struct rtentry *rt, u_int mtu) { uint32_t ifcap; int error; switch (dst->sa_family) { case AF_INET: ifcap = IFCAP_TSOv4; break; #ifdef INET6 case AF_INET6: ifcap = IFCAP_TSOv6; break; #endif default: unhandled_af(dst->sa_family); } /* * Try to send with TSO first. When forwarding LRO may set * maximum segment size in mbuf header. Chop TCP segment * even if it would fit interface MTU to preserve maximum * path MTU. */ error = tcp_if_output_tso(ifp, mp, dst, rt, ifcap, mtu); if (error || *mp == NULL) return error; if ((*mp)->m_pkthdr.len <= mtu) { switch (dst->sa_family) { case AF_INET: in_hdr_cksum_out(*mp, ifp); in_proto_cksum_out(*mp, ifp); break; #ifdef INET6 case AF_INET6: in6_proto_cksum_out(*mp, ifp); break; #endif } error = ifp->if_output(ifp, *mp, dst, rt); *mp = NULL; return error; } /* mp still contains mbuf that has to be fragmented or dropped. */ return 0; } int if_output_mq(struct ifnet *ifp, struct mbuf_queue *mq, unsigned int *total, struct sockaddr *dst, struct rtentry *rt) { struct mbuf_list ml; unsigned int len; int error; mq_delist(mq, &ml); len = ml_len(&ml); error = if_output_ml(ifp, &ml, dst, rt); /* XXXSMP we also discard if other CPU enqueues */ if (mq_len(mq) > 0) { /* mbuf is back in queue. Discard. */ atomic_sub_int(total, len + mq_purge(mq)); } else atomic_sub_int(total, len); return error; } int if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af) { struct ifiqueue *ifiq; unsigned int flow = 0; m->m_pkthdr.ph_family = af; m->m_pkthdr.ph_ifidx = ifp->if_index; m->m_pkthdr.ph_rtableid = ifp->if_rdomain; if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) flow = m->m_pkthdr.ph_flowid; ifiq = ifp->if_iqs[flow % ifp->if_niqs]; return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS); } void if_input_process(struct ifnet *ifp, struct mbuf_list *ml) { struct mbuf *m; if (ml_empty(ml)) return; if (!ISSET(ifp->if_xflags, IFXF_CLONED)) enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml)); /* * We grab the shared netlock for packet processing in the softnet * threads. Packets can regrab the exclusive lock via queues. * ioctl, sysctl, and socket syscall may use shared lock if access is * read only or MP safe. Usually they hold the exclusive net lock. */ NET_LOCK_SHARED(); while ((m = ml_dequeue(ml)) != NULL) (*ifp->if_input)(ifp, m); NET_UNLOCK_SHARED(); } void if_vinput(struct ifnet *ifp, struct mbuf *m) { #if NBPFILTER > 0 caddr_t if_bpf; #endif m->m_pkthdr.ph_ifidx = ifp->if_index; m->m_pkthdr.ph_rtableid = ifp->if_rdomain; counters_pkt(ifp->if_counters, ifc_ipackets, ifc_ibytes, m->m_pkthdr.len); #if NPF > 0 pf_pkt_addr_changed(m); #endif #if NBPFILTER > 0 if_bpf = ifp->if_bpf; if (if_bpf) { if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) { m_freem(m); return; } } #endif if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR))) (*ifp->if_input)(ifp, m); else m_freem(m); } void if_netisr(void *unused) { int n, t = 0; NET_LOCK(); while ((n = netisr) != 0) { /* Like sched_pause() but with a rwlock dance. */ if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) { NET_UNLOCK(); yield(); NET_LOCK(); } atomic_clearbits_int(&netisr, n); #if NETHER > 0 if (n & (1 << NETISR_ARP)) arpintr(); #endif if (n & (1 << NETISR_IP)) ipintr(); #ifdef INET6 if (n & (1 << NETISR_IPV6)) ip6intr(); #endif #if NPPP > 0 if (n & (1 << NETISR_PPP)) { KERNEL_LOCK(); pppintr(); KERNEL_UNLOCK(); } #endif #if NBRIDGE > 0 if (n & (1 << NETISR_BRIDGE)) bridgeintr(); #endif #ifdef PIPEX if (n & (1 << NETISR_PIPEX)) pipexintr(); #endif #if NPPPOE > 0 if (n & (1 << NETISR_PPPOE)) { KERNEL_LOCK(); pppoeintr(); KERNEL_UNLOCK(); } #endif t |= n; } NET_UNLOCK(); } void if_hooks_run(struct task_list *hooks) { struct task *t, *nt; struct task cursor = { .t_func = NULL }; void (*func)(void *); void *arg; mtx_enter(&if_hooks_mtx); for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) { if (t->t_func == NULL) { /* skip cursors */ nt = TAILQ_NEXT(t, t_entry); continue; } func = t->t_func; arg = t->t_arg; TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry); mtx_leave(&if_hooks_mtx); (*func)(arg); mtx_enter(&if_hooks_mtx); nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */ TAILQ_REMOVE(hooks, &cursor, t_entry); } mtx_leave(&if_hooks_mtx); } void if_remove(struct ifnet *ifp) { /* Remove the interface from the list of all interfaces. */ NET_LOCK(); TAILQ_REMOVE(&ifnetlist, ifp, if_list); NET_UNLOCK(); /* Remove the interface from the interface index map. */ if_idxmap_remove(ifp); /* Sleep until the last reference is released. */ refcnt_finalize(&ifp->if_refcnt, "ifrm"); } void if_deactivate(struct ifnet *ifp) { /* * Call detach hooks from head to tail. To make sure detach * hooks are executed in the reverse order they were added, all * the hooks have to be added to the head! */ NET_LOCK(); if_hooks_run(&ifp->if_detachhooks); NET_UNLOCK(); } void if_detachhook_add(struct ifnet *ifp, struct task *t) { mtx_enter(&if_hooks_mtx); TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry); mtx_leave(&if_hooks_mtx); } void if_detachhook_del(struct ifnet *ifp, struct task *t) { mtx_enter(&if_hooks_mtx); TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry); mtx_leave(&if_hooks_mtx); } /* * Detach an interface from everything in the kernel. Also deallocate * private resources. */ void if_detach(struct ifnet *ifp) { struct ifaddr *ifa; struct ifg_list *ifg; int i, s; /* Undo pseudo-driver changes. */ if_deactivate(ifp); /* Other CPUs must not have a reference before we start destroying. */ if_remove(ifp); ifp->if_qstart = if_detached_qstart; /* Wait until the start routines finished. */ ifq_barrier(&ifp->if_snd); ifq_clr_oactive(&ifp->if_snd); #if NBPFILTER > 0 bpfdetach(ifp); #endif NET_LOCK(); s = splnet(); ifp->if_ioctl = if_detached_ioctl; ifp->if_watchdog = NULL; /* Remove the watchdog timeout & task */ timeout_del(&ifp->if_slowtimo); task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask); /* Remove the link state task */ task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask); rti_delete(ifp); #if NETHER > 0 && defined(NFSCLIENT) if (ifp->if_index == revarp_ifidx) revarp_ifidx = 0; #endif #ifdef MROUTING vif_delete(ifp); #endif in_ifdetach(ifp); #ifdef INET6 in6_ifdetach(ifp); #endif #if NPF > 0 pfi_detach_ifnet(ifp); #endif while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL) if_delgroup(ifp, ifg->ifgl_group->ifg_group); if_free_sadl(ifp); /* We should not have any address left at this point. */ if (!TAILQ_EMPTY(&ifp->if_addrlist)) { #ifdef DIAGNOSTIC printf("%s: address list non empty\n", ifp->if_xname); #endif while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) { ifa_del(ifp, ifa); ifa->ifa_ifp = NULL; ifafree(ifa); } } splx(s); NET_UNLOCK(); KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks)); KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks)); KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks)); #ifdef INET6 nd6_ifdetach(ifp); #endif /* Announce that the interface is gone. */ rtm_ifannounce(ifp, IFAN_DEPARTURE); if (ifp->if_counters != NULL) if_counters_free(ifp); for (i = 0; i < ifp->if_nifqs; i++) ifq_destroy(ifp->if_ifqs[i]); if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) { for (i = 1; i < ifp->if_nifqs; i++) { free(ifp->if_ifqs[i], M_DEVBUF, sizeof(struct ifqueue)); } free(ifp->if_ifqs, M_DEVBUF, sizeof(struct ifqueue *) * ifp->if_nifqs); } for (i = 0; i < ifp->if_niqs; i++) ifiq_destroy(ifp->if_iqs[i]); if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) { for (i = 1; i < ifp->if_niqs; i++) { free(ifp->if_iqs[i], M_DEVBUF, sizeof(struct ifiqueue)); } free(ifp->if_iqs, M_DEVBUF, sizeof(struct ifiqueue *) * ifp->if_niqs); } } /* * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''. */ int if_isconnected(const struct ifnet *ifp0, unsigned int ifidx) { struct ifnet *ifp; int connected = 0; ifp = if_get(ifidx); if (ifp == NULL) return (0); if (ifp0->if_index == ifp->if_index) connected = 1; #if NBRIDGE > 0 if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx) connected = 1; #endif #if NCARP > 0 if ((ifp0->if_type == IFT_CARP && ifp0->if_carpdevidx == ifp->if_index) || (ifp->if_type == IFT_CARP && ifp->if_carpdevidx == ifp0->if_index)) connected = 1; #endif if_put(ifp); return (connected); } /* * Create a clone network interface. */ int if_clone_create(const char *name, int rdomain) { struct if_clone *ifc; struct ifnet *ifp; int unit, ret; ifc = if_clone_lookup(name, &unit); if (ifc == NULL) return (EINVAL); rw_enter_write(&if_cloners_lock); if ((ifp = if_unit(name)) != NULL) { ret = EEXIST; goto unlock; } ret = (*ifc->ifc_create)(ifc, unit); if (ret != 0 || (ifp = if_unit(name)) == NULL) goto unlock; NET_LOCK(); if_addgroup(ifp, ifc->ifc_name); if (rdomain != 0) if_setrdomain(ifp, rdomain); NET_UNLOCK(); unlock: rw_exit_write(&if_cloners_lock); if_put(ifp); return (ret); } /* * Destroy a clone network interface. */ int if_clone_destroy(const char *name) { struct if_clone *ifc; struct ifnet *ifp; int ret; ifc = if_clone_lookup(name, NULL); if (ifc == NULL) return (EINVAL); if (ifc->ifc_destroy == NULL) return (EOPNOTSUPP); rw_enter_write(&if_cloners_lock); TAILQ_FOREACH(ifp, &ifnetlist, if_list) { if (strcmp(ifp->if_xname, name) == 0) break; } if (ifp == NULL) { rw_exit_write(&if_cloners_lock); return (ENXIO); } NET_LOCK(); if (ifp->if_flags & IFF_UP) { int s; s = splnet(); if_down(ifp); splx(s); } NET_UNLOCK(); ret = (*ifc->ifc_destroy)(ifp); rw_exit_write(&if_cloners_lock); return (ret); } /* * Look up a network interface cloner. */ struct if_clone * if_clone_lookup(const char *name, int *unitp) { struct if_clone *ifc; const char *cp; int unit; /* separate interface name from unit */ for (cp = name; cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9'); cp++) continue; if (cp == name || cp - name == IFNAMSIZ || !*cp) return (NULL); /* No name or unit number */ if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0') return (NULL); /* unit number 0 padded */ LIST_FOREACH(ifc, &if_cloners, ifc_list) { if (strlen(ifc->ifc_name) == cp - name && !strncmp(name, ifc->ifc_name, cp - name)) break; } if (ifc == NULL) return (NULL); unit = 0; while (cp - name < IFNAMSIZ && *cp) { if (*cp < '0' || *cp > '9' || unit > (INT_MAX - (*cp - '0')) / 10) { /* Bogus unit number. */ return (NULL); } unit = (unit * 10) + (*cp++ - '0'); } if (unitp != NULL) *unitp = unit; return (ifc); } /* * Register a network interface cloner. */ void if_clone_attach(struct if_clone *ifc) { /* * we are called at kernel boot by main(), when pseudo devices are * being attached. The main() is the only guy which may alter the * if_cloners. While system is running and main() is done with * initialization, the if_cloners becomes immutable. */ KASSERT(pdevinit_done == 0); LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list); if_cloners_count++; } /* * Provide list of interface cloners to userspace. */ int if_clone_list(struct if_clonereq *ifcr) { char outbuf[IFNAMSIZ], *dst; struct if_clone *ifc; int count, error = 0; if ((dst = ifcr->ifcr_buffer) == NULL) { /* Just asking how many there are. */ ifcr->ifcr_total = if_cloners_count; return (0); } if (ifcr->ifcr_count < 0) return (EINVAL); ifcr->ifcr_total = if_cloners_count; count = MIN(if_cloners_count, ifcr->ifcr_count); LIST_FOREACH(ifc, &if_cloners, ifc_list) { if (count == 0) break; bzero(outbuf, sizeof outbuf); strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ); error = copyout(outbuf, dst, IFNAMSIZ); if (error) break; count--; dst += IFNAMSIZ; } return (error); } /* * set queue congestion marker */ void if_congestion(void) { extern int ticks; ifq_congestion = ticks; } int if_congested(void) { extern int ticks; int diff; diff = ticks - ifq_congestion; if (diff < 0) { ifq_congestion = ticks - hz; return (0); } return (diff <= (hz / 100)); } #define equal(a1, a2) \ (bcmp((caddr_t)(a1), (caddr_t)(a2), \ (a1)->sa_len) == 0) /* * Locate an interface based on a complete address. */ struct ifaddr * ifa_ifwithaddr(const struct sockaddr *addr, u_int rtableid) { struct ifnet *ifp; struct ifaddr *ifa; u_int rdomain; NET_ASSERT_LOCKED(); rdomain = rtable_l2(rtableid); TAILQ_FOREACH(ifp, &ifnetlist, if_list) { if (ifp->if_rdomain != rdomain) continue; TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (equal(addr, ifa->ifa_addr)) { return (ifa); } } } return (NULL); } /* * Locate the point to point interface with a given destination address. */ struct ifaddr * ifa_ifwithdstaddr(const struct sockaddr *addr, u_int rdomain) { struct ifnet *ifp; struct ifaddr *ifa; NET_ASSERT_LOCKED(); rdomain = rtable_l2(rdomain); TAILQ_FOREACH(ifp, &ifnetlist, if_list) { if (ifp->if_rdomain != rdomain) continue; if (ifp->if_flags & IFF_POINTOPOINT) { TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != addr->sa_family || ifa->ifa_dstaddr == NULL) continue; if (equal(addr, ifa->ifa_dstaddr)) { return (ifa); } } } } return (NULL); } /* * Find an interface address specific to an interface best matching * a given address. */ struct ifaddr * ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp) { struct ifaddr *ifa; const char *cp, *cp2, *cp3; char *cplim; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; if (af >= AF_MAX) return (NULL); TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != af) continue; if (ifa_maybe == NULL) ifa_maybe = ifa; if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) { if (equal(addr, ifa->ifa_addr) || (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))) return (ifa); continue; } cp = addr->sa_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; for (; cp3 < cplim; cp3++) if ((*cp++ ^ *cp2++) & *cp3) break; if (cp3 == cplim) return (ifa); } return (ifa_maybe); } void if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt) { } /* * Default action when installing a local route on a point-to-point * interface. */ void p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt) { struct ifnet *lo0ifp; struct ifaddr *ifa, *lo0ifa; switch (req) { case RTM_ADD: if (!ISSET(rt->rt_flags, RTF_LOCAL)) break; TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (memcmp(rt_key(rt), ifa->ifa_addr, rt_key(rt)->sa_len) == 0) break; } if (ifa == NULL) break; KASSERT(ifa == rt->rt_ifa); lo0ifp = if_get(rtable_loindex(ifp->if_rdomain)); KASSERT(lo0ifp != NULL); TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) { if (lo0ifa->ifa_addr->sa_family == ifa->ifa_addr->sa_family) break; } if_put(lo0ifp); if (lo0ifa == NULL) break; rt->rt_flags &= ~RTF_LLINFO; break; case RTM_DELETE: case RTM_RESOLVE: default: break; } } int p2p_bpf_mtap(caddr_t if_bpf, const struct mbuf *m, u_int dir) { #if NBPFILTER > 0 return (bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, m, dir)); #else return (0); #endif } void p2p_input(struct ifnet *ifp, struct mbuf *m) { void (*input)(struct ifnet *, struct mbuf *); switch (m->m_pkthdr.ph_family) { case AF_INET: input = ipv4_input; break; #ifdef INET6 case AF_INET6: input = ipv6_input; break; #endif #ifdef MPLS case AF_MPLS: input = mpls_input; break; #endif default: m_freem(m); return; } (*input)(ifp, m); } /* * Bring down all interfaces */ void if_downall(void) { struct ifreq ifrq; /* XXX only partly built */ struct ifnet *ifp; NET_LOCK(); TAILQ_FOREACH(ifp, &ifnetlist, if_list) { if ((ifp->if_flags & IFF_UP) == 0) continue; if_down(ifp); ifrq.ifr_flags = ifp->if_flags; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq); } NET_UNLOCK(); } /* * Mark an interface down and notify protocols of * the transition. */ void if_down(struct ifnet *ifp) { NET_ASSERT_LOCKED(); ifp->if_flags &= ~IFF_UP; getmicrotime(&ifp->if_lastchange); ifq_purge(&ifp->if_snd); if_linkstate(ifp); } /* * Mark an interface up and notify protocols of * the transition. */ void if_up(struct ifnet *ifp) { NET_ASSERT_LOCKED(); ifp->if_flags |= IFF_UP; getmicrotime(&ifp->if_lastchange); #ifdef INET6 /* Userland expects the kernel to set ::1 on default lo(4). */ if (ifp->if_index == rtable_loindex(ifp->if_rdomain)) in6_ifattach(ifp); #endif if_linkstate(ifp); } /* * Notify userland, the routing table and hooks owner of * a link-state transition. */ void if_linkstate_task(void *xifidx) { unsigned int ifidx = (unsigned long)xifidx; struct ifnet *ifp; NET_LOCK(); KERNEL_LOCK(); ifp = if_get(ifidx); if (ifp != NULL) if_linkstate(ifp); if_put(ifp); KERNEL_UNLOCK(); NET_UNLOCK(); } void if_linkstate(struct ifnet *ifp) { NET_ASSERT_LOCKED(); if (panicstr == NULL) { rtm_ifchg(ifp); rt_if_track(ifp); } if_hooks_run(&ifp->if_linkstatehooks); } void if_linkstatehook_add(struct ifnet *ifp, struct task *t) { mtx_enter(&if_hooks_mtx); TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry); mtx_leave(&if_hooks_mtx); } void if_linkstatehook_del(struct ifnet *ifp, struct task *t) { mtx_enter(&if_hooks_mtx); TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry); mtx_leave(&if_hooks_mtx); } /* * Schedule a link state change task. */ void if_link_state_change(struct ifnet *ifp) { task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask); } /* * Handle interface watchdog timer routine. Called * from softclock, we decrement timer (if set) and * call the appropriate interface routine on expiration. */ void if_slowtimo(void *arg) { struct ifnet *ifp = arg; int s = splnet(); if (ifp->if_watchdog) { if (ifp->if_timer > 0 && --ifp->if_timer == 0) task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask); timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO); } splx(s); } void if_watchdog_task(void *xifidx) { unsigned int ifidx = (unsigned long)xifidx; struct ifnet *ifp; int s; ifp = if_get(ifidx); if (ifp == NULL) return; KERNEL_LOCK(); s = splnet(); if (ifp->if_watchdog) (*ifp->if_watchdog)(ifp); splx(s); KERNEL_UNLOCK(); if_put(ifp); } /* * Map interface name to interface structure pointer. */ struct ifnet * if_unit(const char *name) { struct ifnet *ifp; KERNEL_ASSERT_LOCKED(); TAILQ_FOREACH(ifp, &ifnetlist, if_list) { if (strcmp(ifp->if_xname, name) == 0) { if_ref(ifp); return (ifp); } } return (NULL); } /* * Map interface index to interface structure pointer. */ struct ifnet * if_get(unsigned int index) { struct ifnet **if_map; struct ifnet *ifp = NULL; if (index == 0) return (NULL); smr_read_enter(); if_map = SMR_PTR_GET(&if_idxmap.map); if (index < if_idxmap_limit(if_map)) { ifp = SMR_PTR_GET(&if_map[index]); if (ifp != NULL) { KASSERT(ifp->if_index == index); if_ref(ifp); } } smr_read_leave(); return (ifp); } struct ifnet * if_ref(struct ifnet *ifp) { refcnt_take(&ifp->if_refcnt); return (ifp); } void if_put(struct ifnet *ifp) { if (ifp == NULL) return; refcnt_rele_wake(&ifp->if_refcnt); } int if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr) { if (ifp->if_sadl == NULL) return (EINVAL); memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN); memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN); return (0); } int if_createrdomain(int rdomain, struct ifnet *ifp) { int error; struct ifnet *loifp; char loifname[IFNAMSIZ]; unsigned int unit = rdomain; if ((error = rtable_add(rdomain)) != 0) return (error); if (!rtable_empty(rdomain)) return (EEXIST); /* Create rdomain including its loopback if with unit == rdomain */ snprintf(loifname, sizeof(loifname), "lo%u", unit); error = if_clone_create(loifname, 0); if ((loifp = if_unit(loifname)) == NULL) return (ENXIO); if (error && (ifp != loifp || error != EEXIST)) { if_put(loifp); return (error); } rtable_l2set(rdomain, rdomain, loifp->if_index); loifp->if_rdomain = rdomain; if_put(loifp); return (0); } int if_setrdomain(struct ifnet *ifp, int rdomain) { struct ifreq ifr; int error, up = 0, s; if (rdomain < 0 || rdomain > RT_TABLEID_MAX) return (EINVAL); if (rdomain != ifp->if_rdomain && (ifp->if_flags & IFF_LOOPBACK) && (ifp->if_index == rtable_loindex(ifp->if_rdomain))) return (EPERM); if (!rtable_exists(rdomain)) return (ESRCH); /* make sure that the routing table is a real rdomain */ if (rdomain != rtable_l2(rdomain)) return (EINVAL); if (rdomain != ifp->if_rdomain) { s = splnet(); /* * We are tearing down the world. * Take down the IF so: * 1. everything that cares gets a message * 2. the automagic IPv6 bits are recreated */ if (ifp->if_flags & IFF_UP) { up = 1; if_down(ifp); } rti_delete(ifp); #ifdef MROUTING vif_delete(ifp); #endif in_ifdetach(ifp); #ifdef INET6 in6_ifdetach(ifp); #endif splx(s); } /* Let devices like enc(4) or mpe(4) know about the change */ ifr.ifr_rdomainid = rdomain; if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN, (caddr_t)&ifr)) != ENOTTY) return (error); error = 0; /* Add interface to the specified rdomain */ ifp->if_rdomain = rdomain; /* If we took down the IF, bring it back */ if (up) { s = splnet(); if_up(ifp); splx(s); } return (0); } /* * Interface ioctls. */ int ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) { struct ifnet *ifp; struct ifreq *ifr = (struct ifreq *)data; struct ifgroupreq *ifgr = (struct ifgroupreq *)data; struct if_afreq *ifar = (struct if_afreq *)data; char ifdescrbuf[IFDESCRSIZE]; char ifrtlabelbuf[RTLABEL_LEN]; int s, error = 0, oif_xflags; size_t bytesdone; unsigned short oif_flags; switch (cmd) { case SIOCIFCREATE: if ((error = suser(p)) != 0) return (error); KERNEL_LOCK(); error = if_clone_create(ifr->ifr_name, 0); KERNEL_UNLOCK(); return (error); case SIOCIFDESTROY: if ((error = suser(p)) != 0) return (error); KERNEL_LOCK(); error = if_clone_destroy(ifr->ifr_name); KERNEL_UNLOCK(); return (error); case SIOCSIFGATTR: if ((error = suser(p)) != 0) return (error); KERNEL_LOCK(); NET_LOCK(); error = if_setgroupattribs(data); NET_UNLOCK(); KERNEL_UNLOCK(); return (error); case SIOCGIFCONF: case SIOCIFGCLONERS: case SIOCGIFGMEMB: case SIOCGIFGATTR: case SIOCGIFGLIST: case SIOCGIFFLAGS: case SIOCGIFXFLAGS: case SIOCGIFMETRIC: case SIOCGIFMTU: case SIOCGIFHARDMTU: case SIOCGIFDATA: case SIOCGIFDESCR: case SIOCGIFRTLABEL: case SIOCGIFPRIORITY: case SIOCGIFRDOMAIN: case SIOCGIFGROUP: case SIOCGIFLLPRIO: error = ifioctl_get(cmd, data); return (error); } KERNEL_LOCK(); ifp = if_unit(ifr->ifr_name); if (ifp == NULL) { KERNEL_UNLOCK(); return (ENXIO); } oif_flags = ifp->if_flags; oif_xflags = ifp->if_xflags; switch (cmd) { case SIOCIFAFATTACH: case SIOCIFAFDETACH: if ((error = suser(p)) != 0) break; NET_LOCK(); switch (ifar->ifar_af) { case AF_INET: /* attach is a noop for AF_INET */ if (cmd == SIOCIFAFDETACH) in_ifdetach(ifp); break; #ifdef INET6 case AF_INET6: if (cmd == SIOCIFAFATTACH) error = in6_ifattach(ifp); else in6_ifdetach(ifp); break; #endif /* INET6 */ default: error = EAFNOSUPPORT; } NET_UNLOCK(); break; case SIOCSIFXFLAGS: if ((error = suser(p)) != 0) break; NET_LOCK(); #ifdef INET6 if ((ISSET(ifr->ifr_flags, IFXF_AUTOCONF6) || ISSET(ifr->ifr_flags, IFXF_AUTOCONF6TEMP)) && !ISSET(ifp->if_xflags, IFXF_AUTOCONF6) && !ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)) { error = in6_ifattach(ifp); if (error != 0) { NET_UNLOCK(); break; } } if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) && !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII)) ifp->if_xflags |= IFXF_INET6_NOSOII; if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) && ISSET(ifp->if_xflags, IFXF_INET6_NOSOII)) ifp->if_xflags &= ~IFXF_INET6_NOSOII; #endif /* INET6 */ #ifdef MPLS if (ISSET(ifr->ifr_flags, IFXF_MPLS) && !ISSET(ifp->if_xflags, IFXF_MPLS)) { s = splnet(); ifp->if_xflags |= IFXF_MPLS; ifp->if_ll_output = ifp->if_output; ifp->if_output = mpls_output; splx(s); } if (ISSET(ifp->if_xflags, IFXF_MPLS) && !ISSET(ifr->ifr_flags, IFXF_MPLS)) { s = splnet(); ifp->if_xflags &= ~IFXF_MPLS; ifp->if_output = ifp->if_ll_output; ifp->if_ll_output = NULL; splx(s); } #endif /* MPLS */ #ifndef SMALL_KERNEL if (ifp->if_capabilities & IFCAP_WOL) { if (ISSET(ifr->ifr_flags, IFXF_WOL) && !ISSET(ifp->if_xflags, IFXF_WOL)) { s = splnet(); ifp->if_xflags |= IFXF_WOL; error = ifp->if_wol(ifp, 1); splx(s); } if (ISSET(ifp->if_xflags, IFXF_WOL) && !ISSET(ifr->ifr_flags, IFXF_WOL)) { s = splnet(); ifp->if_xflags &= ~IFXF_WOL; error = ifp->if_wol(ifp, 0); splx(s); } } else if (ISSET(ifr->ifr_flags, IFXF_WOL)) { ifr->ifr_flags &= ~IFXF_WOL; error = ENOTSUP; } #endif if (ISSET(ifr->ifr_flags, IFXF_LRO) != ISSET(ifp->if_xflags, IFXF_LRO)) error = ifsetlro(ifp, ISSET(ifr->ifr_flags, IFXF_LRO)); if (error == 0) ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) | (ifr->ifr_flags & ~IFXF_CANTCHANGE); if (!ISSET(ifp->if_flags, IFF_UP) && ((!ISSET(oif_xflags, IFXF_AUTOCONF4) && ISSET(ifp->if_xflags, IFXF_AUTOCONF4)) || (!ISSET(oif_xflags, IFXF_AUTOCONF6) && ISSET(ifp->if_xflags, IFXF_AUTOCONF6)) || (!ISSET(oif_xflags, IFXF_AUTOCONF6TEMP) && ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)))) { ifr->ifr_flags = ifp->if_flags | IFF_UP; goto forceup; } NET_UNLOCK(); break; case SIOCSIFFLAGS: if ((error = suser(p)) != 0) break; NET_LOCK(); forceup: ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | (ifr->ifr_flags & ~IFF_CANTCHANGE); error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, data); if (error != 0) { ifp->if_flags = oif_flags; if (cmd == SIOCSIFXFLAGS) ifp->if_xflags = oif_xflags; } else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) { s = splnet(); if (ISSET(ifp->if_flags, IFF_UP)) if_up(ifp); else if_down(ifp); splx(s); } NET_UNLOCK(); break; case SIOCSIFMETRIC: if ((error = suser(p)) != 0) break; NET_LOCK(); ifp->if_metric = ifr->ifr_metric; NET_UNLOCK(); break; case SIOCSIFMTU: if ((error = suser(p)) != 0) break; NET_LOCK(); error = (*ifp->if_ioctl)(ifp, cmd, data); NET_UNLOCK(); if (error == 0) rtm_ifchg(ifp); break; case SIOCSIFDESCR: if ((error = suser(p)) != 0) break; error = copyinstr(ifr->ifr_data, ifdescrbuf, IFDESCRSIZE, &bytesdone); if (error == 0) { (void)memset(ifp->if_description, 0, IFDESCRSIZE); strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE); } break; case SIOCSIFRTLABEL: if ((error = suser(p)) != 0) break; error = copyinstr(ifr->ifr_data, ifrtlabelbuf, RTLABEL_LEN, &bytesdone); if (error == 0) { rtlabel_unref(ifp->if_rtlabelid); ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf); } break; case SIOCSIFPRIORITY: if ((error = suser(p)) != 0) break; if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) { error = EINVAL; break; } ifp->if_priority = ifr->ifr_metric; break; case SIOCSIFRDOMAIN: if ((error = suser(p)) != 0) break; error = if_createrdomain(ifr->ifr_rdomainid, ifp); if (!error || error == EEXIST) { NET_LOCK(); error = if_setrdomain(ifp, ifr->ifr_rdomainid); NET_UNLOCK(); } break; case SIOCAIFGROUP: if ((error = suser(p))) break; NET_LOCK(); error = if_addgroup(ifp, ifgr->ifgr_group); if (error == 0) { error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == ENOTTY) error = 0; } NET_UNLOCK(); break; case SIOCDIFGROUP: if ((error = suser(p))) break; NET_LOCK(); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == ENOTTY) error = 0; if (error == 0) error = if_delgroup(ifp, ifgr->ifgr_group); NET_UNLOCK(); break; case SIOCSIFLLADDR: if ((error = suser(p))) break; if ((ifp->if_sadl == NULL) || (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) || (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) { error = EINVAL; break; } NET_LOCK(); switch (ifp->if_type) { case IFT_ETHER: case IFT_CARP: case IFT_XETHER: case IFT_ISO88025: error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == ENOTTY) error = 0; if (error == 0) error = if_setlladdr(ifp, ifr->ifr_addr.sa_data); break; default: error = ENODEV; } if (error == 0) ifnewlladdr(ifp); NET_UNLOCK(); if (error == 0) rtm_ifchg(ifp); break; case SIOCSIFLLPRIO: if ((error = suser(p))) break; if (ifr->ifr_llprio < IFQ_MINPRIO || ifr->ifr_llprio > IFQ_MAXPRIO) { error = EINVAL; break; } NET_LOCK(); ifp->if_llprio = ifr->ifr_llprio; NET_UNLOCK(); break; case SIOCGIFSFFPAGE: error = suser(p); if (error != 0) break; error = if_sffpage_check(data); if (error != 0) break; /* don't take NET_LOCK because i2c reads take a long time */ error = ((*ifp->if_ioctl)(ifp, cmd, data)); break; case SIOCSIFMEDIA: if ((error = suser(p)) != 0) break; /* FALLTHROUGH */ case SIOCGIFMEDIA: /* net lock is not needed */ error = ((*ifp->if_ioctl)(ifp, cmd, data)); break; case SIOCSETKALIVE: case SIOCDIFPHYADDR: case SIOCSLIFPHYADDR: case SIOCSLIFPHYRTABLE: case SIOCSLIFPHYTTL: case SIOCSLIFPHYDF: case SIOCSLIFPHYECN: case SIOCADDMULTI: case SIOCDELMULTI: case SIOCSVNETID: case SIOCDVNETID: case SIOCSVNETFLOWID: case SIOCSTXHPRIO: case SIOCSRXHPRIO: case SIOCSIFPAIR: case SIOCSIFPARENT: case SIOCDIFPARENT: case SIOCSETMPWCFG: case SIOCSETLABEL: case SIOCDELLABEL: case SIOCSPWE3CTRLWORD: case SIOCSPWE3FAT: case SIOCSPWE3NEIGHBOR: case SIOCDPWE3NEIGHBOR: #if NBRIDGE > 0 case SIOCBRDGADD: case SIOCBRDGDEL: case SIOCBRDGSIFFLGS: case SIOCBRDGSCACHE: case SIOCBRDGADDS: case SIOCBRDGDELS: case SIOCBRDGSADDR: case SIOCBRDGSTO: case SIOCBRDGDADDR: case SIOCBRDGFLUSH: case SIOCBRDGADDL: case SIOCBRDGSIFPROT: case SIOCBRDGARL: case SIOCBRDGFRL: case SIOCBRDGSPRI: case SIOCBRDGSHT: case SIOCBRDGSFD: case SIOCBRDGSMA: case SIOCBRDGSIFPRIO: case SIOCBRDGSIFCOST: case SIOCBRDGSTXHC: case SIOCBRDGSPROTO: #endif if ((error = suser(p)) != 0) break; /* FALLTHROUGH */ default: error = pru_control(so, cmd, data, ifp); if (error != EOPNOTSUPP) break; switch (cmd) { case SIOCAIFADDR: case SIOCDIFADDR: case SIOCSIFADDR: case SIOCSIFNETMASK: case SIOCSIFDSTADDR: case SIOCSIFBRDADDR: #ifdef INET6 case SIOCAIFADDR_IN6: case SIOCDIFADDR_IN6: #endif error = suser(p); break; default: error = 0; break; } if (error) break; NET_LOCK(); error = ((*ifp->if_ioctl)(ifp, cmd, data)); NET_UNLOCK(); break; } if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags) { /* if_up() and if_down() already sent an update, skip here */ if (((oif_flags ^ ifp->if_flags) & IFF_UP) == 0) rtm_ifchg(ifp); } if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0) getmicrotime(&ifp->if_lastchange); KERNEL_UNLOCK(); if_put(ifp); return (error); } int ifioctl_get(u_long cmd, caddr_t data) { struct ifnet *ifp; struct ifreq *ifr = (struct ifreq *)data; char ifdescrbuf[IFDESCRSIZE]; char ifrtlabelbuf[RTLABEL_LEN]; int error = 0; size_t bytesdone; switch(cmd) { case SIOCGIFCONF: NET_LOCK_SHARED(); error = ifconf(data); NET_UNLOCK_SHARED(); return (error); case SIOCIFGCLONERS: error = if_clone_list((struct if_clonereq *)data); return (error); case SIOCGIFGMEMB: NET_LOCK_SHARED(); error = if_getgroupmembers(data); NET_UNLOCK_SHARED(); return (error); case SIOCGIFGATTR: NET_LOCK_SHARED(); error = if_getgroupattribs(data); NET_UNLOCK_SHARED(); return (error); case SIOCGIFGLIST: NET_LOCK_SHARED(); error = if_getgrouplist(data); NET_UNLOCK_SHARED(); return (error); } KERNEL_LOCK(); ifp = if_unit(ifr->ifr_name); if (ifp == NULL) { KERNEL_UNLOCK(); return (ENXIO); } NET_LOCK_SHARED(); switch(cmd) { case SIOCGIFFLAGS: ifr->ifr_flags = ifp->if_flags; if (ifq_is_oactive(&ifp->if_snd)) ifr->ifr_flags |= IFF_OACTIVE; break; case SIOCGIFXFLAGS: ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED); break; case SIOCGIFMETRIC: ifr->ifr_metric = ifp->if_metric; break; case SIOCGIFMTU: ifr->ifr_mtu = ifp->if_mtu; break; case SIOCGIFHARDMTU: ifr->ifr_hardmtu = ifp->if_hardmtu; break; case SIOCGIFDATA: { struct if_data ifdata; if_getdata(ifp, &ifdata); error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata)); break; } case SIOCGIFDESCR: strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE); error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE, &bytesdone); break; case SIOCGIFRTLABEL: if (ifp->if_rtlabelid && rtlabel_id2name(ifp->if_rtlabelid, ifrtlabelbuf, RTLABEL_LEN) != NULL) { error = copyoutstr(ifrtlabelbuf, ifr->ifr_data, RTLABEL_LEN, &bytesdone); } else error = ENOENT; break; case SIOCGIFPRIORITY: ifr->ifr_metric = ifp->if_priority; break; case SIOCGIFRDOMAIN: ifr->ifr_rdomainid = ifp->if_rdomain; break; case SIOCGIFGROUP: error = if_getgroup(data, ifp); break; case SIOCGIFLLPRIO: ifr->ifr_llprio = ifp->if_llprio; break; default: panic("invalid ioctl %lu", cmd); } NET_UNLOCK_SHARED(); KERNEL_UNLOCK(); if_put(ifp); return (error); } static int if_sffpage_check(const caddr_t data) { const struct if_sffpage *sff = (const struct if_sffpage *)data; switch (sff->sff_addr) { case IFSFF_ADDR_EEPROM: case IFSFF_ADDR_DDM: break; default: return (EINVAL); } return (0); } int if_txhprio_l2_check(int hdrprio) { switch (hdrprio) { case IF_HDRPRIO_PACKET: return (0); default: if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX) return (0); break; } return (EINVAL); } int if_txhprio_l3_check(int hdrprio) { switch (hdrprio) { case IF_HDRPRIO_PACKET: case IF_HDRPRIO_PAYLOAD: return (0); default: if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX) return (0); break; } return (EINVAL); } int if_rxhprio_l2_check(int hdrprio) { switch (hdrprio) { case IF_HDRPRIO_PACKET: case IF_HDRPRIO_OUTER: return (0); default: if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX) return (0); break; } return (EINVAL); } int if_rxhprio_l3_check(int hdrprio) { switch (hdrprio) { case IF_HDRPRIO_PACKET: case IF_HDRPRIO_PAYLOAD: case IF_HDRPRIO_OUTER: return (0); default: if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX) return (0); break; } return (EINVAL); } /* * Return interface configuration * of system. List may be used * in later ioctl's (above) to get * other information. */ int ifconf(caddr_t data) { struct ifconf *ifc = (struct ifconf *)data; struct ifnet *ifp; struct ifaddr *ifa; struct ifreq ifr, *ifrp; int space = ifc->ifc_len, error = 0; /* If ifc->ifc_len is 0, fill it in with the needed size and return. */ if (space == 0) { TAILQ_FOREACH(ifp, &ifnetlist, if_list) { struct sockaddr *sa; if (TAILQ_EMPTY(&ifp->if_addrlist)) space += sizeof (ifr); else TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { sa = ifa->ifa_addr; if (sa->sa_len > sizeof(*sa)) space += sa->sa_len - sizeof(*sa); space += sizeof(ifr); } } ifc->ifc_len = space; return (0); } ifrp = ifc->ifc_req; TAILQ_FOREACH(ifp, &ifnetlist, if_list) { if (space < sizeof(ifr)) break; bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ); if (TAILQ_EMPTY(&ifp->if_addrlist)) { bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr)); error = copyout((caddr_t)&ifr, (caddr_t)ifrp, sizeof(ifr)); if (error) break; space -= sizeof (ifr), ifrp++; } else TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { struct sockaddr *sa = ifa->ifa_addr; if (space < sizeof(ifr)) break; if (sa->sa_len <= sizeof(*sa)) { ifr.ifr_addr = *sa; error = copyout((caddr_t)&ifr, (caddr_t)ifrp, sizeof (ifr)); ifrp++; } else { space -= sa->sa_len - sizeof(*sa); if (space < sizeof (ifr)) break; error = copyout((caddr_t)&ifr, (caddr_t)ifrp, sizeof(ifr.ifr_name)); if (error == 0) error = copyout((caddr_t)sa, (caddr_t)&ifrp->ifr_addr, sa->sa_len); ifrp = (struct ifreq *)(sa->sa_len + (caddr_t)&ifrp->ifr_addr); } if (error) break; space -= sizeof (ifr); } } ifc->ifc_len -= space; return (error); } void if_counters_alloc(struct ifnet *ifp) { KASSERT(ifp->if_counters == NULL); ifp->if_counters = counters_alloc(ifc_ncounters); } void if_counters_free(struct ifnet *ifp) { KASSERT(ifp->if_counters != NULL); counters_free(ifp->if_counters, ifc_ncounters); ifp->if_counters = NULL; } void if_getdata(struct ifnet *ifp, struct if_data *data) { unsigned int i; *data = ifp->if_data; if (ifp->if_counters != NULL) { uint64_t counters[ifc_ncounters]; counters_read(ifp->if_counters, counters, nitems(counters), NULL); data->ifi_ipackets += counters[ifc_ipackets]; data->ifi_ierrors += counters[ifc_ierrors]; data->ifi_opackets += counters[ifc_opackets]; data->ifi_oerrors += counters[ifc_oerrors]; data->ifi_collisions += counters[ifc_collisions]; data->ifi_ibytes += counters[ifc_ibytes]; data->ifi_obytes += counters[ifc_obytes]; data->ifi_imcasts += counters[ifc_imcasts]; data->ifi_omcasts += counters[ifc_omcasts]; data->ifi_iqdrops += counters[ifc_iqdrops]; data->ifi_oqdrops += counters[ifc_oqdrops]; data->ifi_noproto += counters[ifc_noproto]; } for (i = 0; i < ifp->if_nifqs; i++) { struct ifqueue *ifq = ifp->if_ifqs[i]; ifq_add_data(ifq, data); } for (i = 0; i < ifp->if_niqs; i++) { struct ifiqueue *ifiq = ifp->if_iqs[i]; ifiq_add_data(ifiq, data); } } /* * Dummy functions replaced in ifnet during detach (if protocols decide to * fiddle with the if during detach. */ void if_detached_qstart(struct ifqueue *ifq) { ifq_purge(ifq); } int if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b) { return ENODEV; } /* * Create interface group without members */ struct ifg_group * if_creategroup(const char *groupname) { struct ifg_group *ifg; if ((ifg = malloc(sizeof(*ifg), M_IFGROUP, M_NOWAIT)) == NULL) return (NULL); strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); ifg->ifg_refcnt = 1; ifg->ifg_carp_demoted = 0; TAILQ_INIT(&ifg->ifg_members); #if NPF > 0 pfi_attach_ifgroup(ifg); #endif TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next); return (ifg); } /* * Add a group to an interface */ int if_addgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; struct ifg_group *ifg = NULL; struct ifg_member *ifgm; size_t namelen; namelen = strlen(groupname); if (namelen == 0 || namelen >= IFNAMSIZ || (groupname[namelen - 1] >= '0' && groupname[namelen - 1] <= '9')) return (EINVAL); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) return (EEXIST); if ((ifgl = malloc(sizeof(*ifgl), M_IFGROUP, M_NOWAIT)) == NULL) return (ENOMEM); if ((ifgm = malloc(sizeof(*ifgm), M_IFGROUP, M_NOWAIT)) == NULL) { free(ifgl, M_IFGROUP, sizeof(*ifgl)); return (ENOMEM); } TAILQ_FOREACH(ifg, &ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, groupname)) break; if (ifg == NULL) { ifg = if_creategroup(groupname); if (ifg == NULL) { free(ifgl, M_IFGROUP, sizeof(*ifgl)); free(ifgm, M_IFGROUP, sizeof(*ifgm)); return (ENOMEM); } } else ifg->ifg_refcnt++; KASSERT(ifg->ifg_refcnt != 0); ifgl->ifgl_group = ifg; ifgm->ifgm_ifp = ifp; TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); #if NPF > 0 pfi_group_addmember(groupname); #endif return (0); } /* * Remove a group from an interface */ int if_delgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; struct ifg_member *ifgm; TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) break; if (ifgl == NULL) return (ENOENT); TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) if (ifgm->ifgm_ifp == ifp) break; if (ifgm != NULL) { TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); free(ifgm, M_IFGROUP, sizeof(*ifgm)); } #if NPF > 0 pfi_group_delmember(groupname); #endif KASSERT(ifgl->ifgl_group->ifg_refcnt != 0); if (--ifgl->ifgl_group->ifg_refcnt == 0) { TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next); #if NPF > 0 pfi_detach_ifgroup(ifgl->ifgl_group); #endif free(ifgl->ifgl_group, M_IFGROUP, sizeof(*ifgl->ifgl_group)); } free(ifgl, M_IFGROUP, sizeof(*ifgl)); return (0); } /* * Stores all groups from an interface in memory pointed * to by data */ int if_getgroup(caddr_t data, struct ifnet *ifp) { int len, error; struct ifg_list *ifgl; struct ifg_req ifgrq, *ifgp; struct ifgroupreq *ifgr = (struct ifgroupreq *)data; if (ifgr->ifgr_len == 0) { TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) ifgr->ifgr_len += sizeof(struct ifg_req); return (0); } len = ifgr->ifgr_len; ifgp = ifgr->ifgr_groups; TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { if (len < sizeof(ifgrq)) return (EINVAL); bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group, sizeof(ifgrq.ifgrq_group)); if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp, sizeof(struct ifg_req)))) return (error); len -= sizeof(ifgrq); ifgp++; } return (0); } /* * Stores all members of a group in memory pointed to by data */ int if_getgroupmembers(caddr_t data) { struct ifgroupreq *ifgr = (struct ifgroupreq *)data; struct ifg_group *ifg; struct ifg_member *ifgm; struct ifg_req ifgrq, *ifgp; int len, error; TAILQ_FOREACH(ifg, &ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, ifgr->ifgr_name)) break; if (ifg == NULL) return (ENOENT); if (ifgr->ifgr_len == 0) { TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) ifgr->ifgr_len += sizeof(ifgrq); return (0); } len = ifgr->ifgr_len; ifgp = ifgr->ifgr_groups; TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { if (len < sizeof(ifgrq)) return (EINVAL); bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname, sizeof(ifgrq.ifgrq_member)); if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp, sizeof(struct ifg_req)))) return (error); len -= sizeof(ifgrq); ifgp++; } return (0); } int if_getgroupattribs(caddr_t data) { struct ifgroupreq *ifgr = (struct ifgroupreq *)data; struct ifg_group *ifg; TAILQ_FOREACH(ifg, &ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, ifgr->ifgr_name)) break; if (ifg == NULL) return (ENOENT); ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted; return (0); } int if_setgroupattribs(caddr_t data) { struct ifgroupreq *ifgr = (struct ifgroupreq *)data; struct ifg_group *ifg; struct ifg_member *ifgm; int demote; TAILQ_FOREACH(ifg, &ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, ifgr->ifgr_name)) break; if (ifg == NULL) return (ENOENT); demote = ifgr->ifgr_attrib.ifg_carp_demoted; if (demote + ifg->ifg_carp_demoted > 0xff || demote + ifg->ifg_carp_demoted < 0) return (EINVAL); ifg->ifg_carp_demoted += demote; TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data); return (0); } /* * Stores all groups in memory pointed to by data */ int if_getgrouplist(caddr_t data) { struct ifgroupreq *ifgr = (struct ifgroupreq *)data; struct ifg_group *ifg; struct ifg_req ifgrq, *ifgp; int len, error; if (ifgr->ifgr_len == 0) { TAILQ_FOREACH(ifg, &ifg_head, ifg_next) ifgr->ifgr_len += sizeof(ifgrq); return (0); } len = ifgr->ifgr_len; ifgp = ifgr->ifgr_groups; TAILQ_FOREACH(ifg, &ifg_head, ifg_next) { if (len < sizeof(ifgrq)) return (EINVAL); bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_group, ifg->ifg_group, sizeof(ifgrq.ifgrq_group)); if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp, sizeof(struct ifg_req)))) return (error); len -= sizeof(ifgrq); ifgp++; } return (0); } void if_group_routechange(const struct sockaddr *dst, const struct sockaddr *mask) { switch (dst->sa_family) { case AF_INET: if (satosin_const(dst)->sin_addr.s_addr == INADDR_ANY && mask && (mask->sa_len == 0 || satosin_const(mask)->sin_addr.s_addr == INADDR_ANY)) if_group_egress_build(); break; #ifdef INET6 case AF_INET6: if (IN6_ARE_ADDR_EQUAL(&(satosin6_const(dst))->sin6_addr, &in6addr_any) && mask && (mask->sa_len == 0 || IN6_ARE_ADDR_EQUAL(&(satosin6_const(mask))->sin6_addr, &in6addr_any))) if_group_egress_build(); break; #endif } } int if_group_egress_build(void) { struct ifnet *ifp; struct ifg_group *ifg; struct ifg_member *ifgm, *next; struct sockaddr_in sa_in; #ifdef INET6 struct sockaddr_in6 sa_in6; #endif struct rtentry *rt; TAILQ_FOREACH(ifg, &ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, IFG_EGRESS)) break; if (ifg != NULL) TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next) if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS); bzero(&sa_in, sizeof(sa_in)); sa_in.sin_len = sizeof(sa_in); sa_in.sin_family = AF_INET; rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY); while (rt != NULL) { ifp = if_get(rt->rt_ifidx); if (ifp != NULL) { if_addgroup(ifp, IFG_EGRESS); if_put(ifp); } rt = rtable_iterate(rt); } #ifdef INET6 bcopy(&sa6_any, &sa_in6, sizeof(sa_in6)); rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL, RTP_ANY); while (rt != NULL) { ifp = if_get(rt->rt_ifidx); if (ifp != NULL) { if_addgroup(ifp, IFG_EGRESS); if_put(ifp); } rt = rtable_iterate(rt); } #endif /* INET6 */ return (0); } /* * Set/clear promiscuous mode on interface ifp based on the truth value * of pswitch. The calls are reference counted so that only the first * "on" request actually has an effect, as does the final "off" request. * Results are undefined if the "off" and "on" requests are not matched. */ int ifpromisc(struct ifnet *ifp, int pswitch) { struct ifreq ifr; unsigned short oif_flags; int oif_pcount, error; NET_ASSERT_LOCKED(); /* modifying if_flags and if_pcount */ oif_flags = ifp->if_flags; oif_pcount = ifp->if_pcount; if (pswitch) { if (ifp->if_pcount++ != 0) return (0); ifp->if_flags |= IFF_PROMISC; } else { if (--ifp->if_pcount > 0) return (0); ifp->if_flags &= ~IFF_PROMISC; } if ((ifp->if_flags & IFF_UP) == 0) return (0); memset(&ifr, 0, sizeof(ifr)); ifr.ifr_flags = ifp->if_flags; error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr)); if (error) { ifp->if_flags = oif_flags; ifp->if_pcount = oif_pcount; } return (error); } /* Set/clear LRO flag and restart interface if needed. */ int ifsetlro(struct ifnet *ifp, int on) { struct ifreq ifrq; int error = 0; int s = splnet(); struct if_parent parent; memset(&parent, 0, sizeof(parent)); if ((*ifp->if_ioctl)(ifp, SIOCGIFPARENT, (caddr_t)&parent) != -1) { struct ifnet *ifp0 = if_unit(parent.ifp_parent); if (ifp0 != NULL) { ifsetlro(ifp0, on); if_put(ifp0); } } if (!ISSET(ifp->if_capabilities, IFCAP_LRO)) { error = ENOTSUP; goto out; } NET_ASSERT_LOCKED(); /* for ioctl */ KERNEL_ASSERT_LOCKED(); /* for if_flags */ if (on && !ISSET(ifp->if_xflags, IFXF_LRO)) { if (ifp->if_type == IFT_ETHER && ether_brport_isset(ifp)) { error = EBUSY; goto out; } SET(ifp->if_xflags, IFXF_LRO); } else if (!on && ISSET(ifp->if_xflags, IFXF_LRO)) CLR(ifp->if_xflags, IFXF_LRO); else goto out; /* restart interface */ if (ISSET(ifp->if_flags, IFF_UP)) { /* go down for a moment... */ CLR(ifp->if_flags, IFF_UP); ifrq.ifr_flags = ifp->if_flags; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq); /* ... and up again */ SET(ifp->if_flags, IFF_UP); ifrq.ifr_flags = ifp->if_flags; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq); } out: splx(s); return error; } void ifa_add(struct ifnet *ifp, struct ifaddr *ifa) { NET_ASSERT_LOCKED_EXCLUSIVE(); TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list); } void ifa_del(struct ifnet *ifp, struct ifaddr *ifa) { NET_ASSERT_LOCKED_EXCLUSIVE(); TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list); } void ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa) { if (ifa->ifa_broadaddr->sa_len != sa->sa_len) panic("ifa_update_broadaddr does not support dynamic length"); bcopy(sa, ifa->ifa_broadaddr, sa->sa_len); } #ifdef DDB /* debug function, can be called from ddb> */ void ifa_print_all(void) { struct ifnet *ifp; struct ifaddr *ifa; TAILQ_FOREACH(ifp, &ifnetlist, if_list) { TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { char addr[INET6_ADDRSTRLEN]; switch (ifa->ifa_addr->sa_family) { case AF_INET: printf("%s", inet_ntop(AF_INET, &satosin(ifa->ifa_addr)->sin_addr, addr, sizeof(addr))); break; #ifdef INET6 case AF_INET6: printf("%s", inet_ntop(AF_INET6, &(satosin6(ifa->ifa_addr))->sin6_addr, addr, sizeof(addr))); break; #endif } printf(" on %s\n", ifp->if_xname); } } } #endif /* DDB */ void ifnewlladdr(struct ifnet *ifp) { #ifdef INET6 struct ifaddr *ifa; int i_am_router = (atomic_load_int(&ip6_forwarding) != 0); #endif struct ifreq ifrq; short up; NET_ASSERT_LOCKED(); /* for ioctl and in6 */ KERNEL_ASSERT_LOCKED(); /* for if_flags */ up = ifp->if_flags & IFF_UP; if (up) { /* go down for a moment... */ ifp->if_flags &= ~IFF_UP; ifrq.ifr_flags = ifp->if_flags; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq); } ifp->if_flags |= IFF_UP; ifrq.ifr_flags = ifp->if_flags; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq); #ifdef INET6 /* * Update the link-local address. Don't do it if we're * a router to avoid confusing hosts on the network. */ if (!i_am_router) { ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa; if (ifa) { in6_purgeaddr(ifa); if_hooks_run(&ifp->if_addrhooks); in6_ifattach(ifp); } } #endif if (!up) { /* go back down */ ifp->if_flags &= ~IFF_UP; ifrq.ifr_flags = ifp->if_flags; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq); } } void if_addrhook_add(struct ifnet *ifp, struct task *t) { mtx_enter(&if_hooks_mtx); TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry); mtx_leave(&if_hooks_mtx); } void if_addrhook_del(struct ifnet *ifp, struct task *t) { mtx_enter(&if_hooks_mtx); TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry); mtx_leave(&if_hooks_mtx); } void if_addrhooks_run(struct ifnet *ifp) { if_hooks_run(&ifp->if_addrhooks); } void if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm) { extern int ticks; memset(rxr, 0, sizeof(*rxr)); rxr->rxr_adjusted = ticks; rxr->rxr_cwm = rxr->rxr_lwm = lwm; rxr->rxr_hwm = hwm; } static inline void if_rxr_adjust_cwm(struct if_rxring *rxr) { extern int ticks; if (rxr->rxr_alive >= rxr->rxr_lwm) return; else if (rxr->rxr_cwm < rxr->rxr_hwm) rxr->rxr_cwm++; rxr->rxr_adjusted = ticks; } void if_rxr_livelocked(struct if_rxring *rxr) { extern int ticks; if (ticks - rxr->rxr_adjusted >= 1) { if (rxr->rxr_cwm > rxr->rxr_lwm) rxr->rxr_cwm--; rxr->rxr_adjusted = ticks; } } u_int if_rxr_get(struct if_rxring *rxr, u_int max) { extern int ticks; u_int diff; if (ticks - rxr->rxr_adjusted >= 1) { /* we're free to try for an adjustment */ if_rxr_adjust_cwm(rxr); } if (rxr->rxr_alive >= rxr->rxr_cwm) return (0); diff = min(rxr->rxr_cwm - rxr->rxr_alive, max); rxr->rxr_alive += diff; return (diff); } int if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e) { struct if_rxrinfo kifri; int error; u_int n; error = copyin(uifri, &kifri, sizeof(kifri)); if (error) return (error); n = min(t, kifri.ifri_total); kifri.ifri_total = t; if (n > 0) { error = copyout(e, kifri.ifri_entries, sizeof(*e) * n); if (error) return (error); } return (copyout(&kifri, uifri, sizeof(kifri))); } int if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size, struct if_rxring *rxr) { struct if_rxring_info ifr; memset(&ifr, 0, sizeof(ifr)); if (name != NULL) strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); ifr.ifr_size = size; ifr.ifr_info = *rxr; return (if_rxr_info_ioctl(ifri, 1, &ifr)); } /* * Network stack input queues. */ void niq_init(struct niqueue *niq, u_int maxlen, u_int isr) { mq_init(&niq->ni_q, maxlen, IPL_NET); niq->ni_isr = isr; } int niq_enqueue(struct niqueue *niq, struct mbuf *m) { int rv; rv = mq_enqueue(&niq->ni_q, m); if (rv == 0) schednetisr(niq->ni_isr); else if_congestion(); return (rv); } int niq_enlist(struct niqueue *niq, struct mbuf_list *ml) { int rv; rv = mq_enlist(&niq->ni_q, ml); if (rv == 0) schednetisr(niq->ni_isr); else if_congestion(); return (rv); } __dead void unhandled_af(int af) { panic("unhandled af %d", af); } struct taskq * net_tq(unsigned int ifindex) { struct softnet *sn; static int nettaskqs; if (nettaskqs == 0) nettaskqs = min(NET_TASKQ, ncpus); sn = &softnets[ifindex % nettaskqs]; return (sn->sn_taskq); } void net_tq_barriers(const char *wmesg) { struct task barriers[NET_TASKQ]; struct refcnt r = REFCNT_INITIALIZER(); int i; for (i = 0; i < nitems(barriers); i++) { task_set(&barriers[i], (void (*)(void *))refcnt_rele_wake, &r); refcnt_take(&r); task_add(softnets[i].sn_taskq, &barriers[i]); } refcnt_finalize(&r, wmesg); }