/*- * Copyright (c) 2025 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Emmanuel Nyarko. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * NPF route extension. */ #ifdef _KERNEL #include __KERNEL_RCSID(0, "$NetBSD: npf_ext_route.c,v 1.1 2026/04/08 00:33:07 joe Exp $"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif #include "npf_impl.h" NPF_EXT_MODULE(npf_ext_route, ""); #define NPFEXT_ROUTE_VER 1 #define NPF_LOGADDR(buf, a) \ snprintf(buf, sizeof(buf), "%u.%u.%u.%u", \ ((a)->s_addr >> 24) & 0xFF, \ ((a)->s_addr >> 16) & 0xFF, \ ((a)->s_addr >> 8) & 0xFF, \ (a)->s_addr & 0xFF) static void * npf_ext_route_id; typedef struct { char ifname[IFNAMSIZ]; } npf_ext_route_t; static int npf_route_ctor(npf_rproc_t *rp, const nvlist_t* params) { npf_ext_route_t *meta; const char *ifname; meta = kmem_zalloc(sizeof(*meta), KM_SLEEP); ifname = nvlist_get_string(params, "route-interface"); if (!ifname) return EINVAL; /* XXX use something like npf_ifmap */ strlcpy(meta->ifname, ifname, IFNAMSIZ); npf_rproc_assign(rp, meta); return 0; } static void npf_route_dtor(npf_rproc_t *rp, void *meta) { kmem_free(meta, sizeof(npf_ext_route_t)); } static void npf_chcksum(struct ifnet *ifp1, struct mbuf *m0, struct ip *ip1, int *sw_csum) { int hlen; struct mbuf *m = m0; struct ip *ip = ip1; struct ifnet *ifp = ifp1; int csum; /* NB: This code is copied from ip_output */ hlen = ip->ip_hl << 2; ip->ip_sum = 0; m->m_pkthdr.csum_data |= hlen << 16; /* Maybe skip checksums on loopback interfaces. */ if (IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) { m->m_pkthdr.csum_flags |= M_CSUM_IPv4; } *sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx; csum = *sw_csum; if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) { /* * Perform any checksums that the hardware can't do * for us. * * XXX Does any hardware require the {th,uh}_sum * XXX fields to be 0? */ if (csum & M_CSUM_IPv4) { KASSERT(IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)); ip->ip_sum = in_cksum(m, hlen); m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4; } if (csum & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { if (IN_NEED_CHECKSUM(ifp, csum & (M_CSUM_TCPv4|M_CSUM_UDPv4))) { in_undefer_cksum_tcpudp(m); } m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4); } } } static int npf_fragment(npf_t *npf, struct ifnet *ifp, struct ip *ip1, struct mbuf **m0, struct sockaddr_in *dst) { int error; struct ip *ip = ip1; struct mbuf *m = *m0; /* * Too large for interface; fragment if possible. * Must be able to put at least 8 bytes per fragment. */ if (ntohs(ip->ip_off) & IP_DF) { npf_stats_inc(npf, NPF_STAT_NOFRAGMENT); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, ifp->if_mtu); return EINVAL; } /* * We can't use HW checksumming if we're about to fragment the packet. * * XXX Some hardware can do this. */ if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { if (IN_NEED_CHECKSUM(ifp, m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4))) { in_undefer_cksum_tcpudp(m); } m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4); } error = ip_fragment(m, ifp, ifp->if_mtu); if (error) { m = NULL; return error; } for (; m; m = *m0) { *m0 = m->m_nextpkt; m->m_nextpkt = NULL; if (error) { m_freem(m); continue; } KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_UDPv4 | M_CSUM_TCPv4)) == 0); error = ip_if_output(ifp, m, sintocsa(dst), NULL); } if (error == 0) { npf_stats_inc(npf, NPF_STAT_FRAGMENTS); } return error; } /* * Ensure sending address is valid. * if the packet could be dropped without error (protocol dependent). */ static int ip_ifaddrvalid(const struct in_ifaddr *ia) { if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY) return 0; if (ia->ia4_flags & IN_IFF_DUPLICATED) return EADDRINUSE; else if (ia->ia4_flags & (IN_IFF_TENTATIVE | IN_IFF_DETACHED)) return EADDRNOTAVAIL; return 0; } /* * search for the source address structure to * maintain output statistics, and verify address * validity */ static int npf_validate_saddr(npf_t * npf, struct ip *ip, struct ifnet* ifp) { int error = 0; struct in_ifaddr *ia = NULL; union { struct sockaddr sa; struct sockaddr_in sin; } usrc; struct psref psref_ia; KASSERT(ia == NULL); sockaddr_in_init(&usrc.sin, &ip->ip_src, 0); ia = ifatoia(ifaof_ifpforaddr_psref(&usrc.sa, ifp, &psref_ia)); /* * Ensure we only send from a valid address. * A NULL address is valid because the packet could be * generated from a packet filter. */ if (ia != NULL && (error = ip_ifaddrvalid(ia)) != 0) { char buf[32]; NPF_LOGADDR(buf, &ip->ip_src); log(LOG_ERR, "refusing to send from invalid address %s (pid %d)\n", buf, curproc->p_pid); npf_stats_inc(npf, NPF_STAT_NOREROUTE); } ia4_release(ia, &psref_ia); return error; } #if defined(INET6) /* this code is copied from ip6_output*/ static void npf_validate_s6addr(struct mbuf *m0, struct ifnet *ifp1, int *sw_csum) { struct in6_ifaddr *ia6; struct mbuf *m = m0; struct ifnet *ifp = ifp1; struct ip6_hdr *ip6; int csum; int s; ip6 = mtod(m, struct ip6_hdr *); s = pserialize_read_enter(); ia6 = in6_ifawithifp(ifp, &ip6->ip6_src); if (ia6) { /* Record statistics for this interface address. */ ia6->ia_ifa.ifa_data.ifad_outbytes += m->m_pkthdr.len; } pserialize_read_exit(s); /* check sum */ *sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx; csum = *sw_csum; if ((csum & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) { if (IN6_NEED_CHECKSUM(ifp, csum & (M_CSUM_UDPv6|M_CSUM_TCPv6))) { in6_undefer_cksum_tcpudp(m); } m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6); } } #endif /* main routing function for kernel module */ static bool npf_route(npf_cache_t *npc, void *meta, const npf_match_info_t __unused *mi, int *decision) { struct mbuf *m0 = nbuf_head_mbuf(npc->npc_nbuf); const npf_ext_route_t *route = meta; npf_t *npf = npf_getkernctx(); struct ifnet *ifp; int error; int sw_csum; union { struct sockaddr_in v4; struct sockaddr_in6 v6; } dst; /* Skip, if already blocking. * also when routing is applied to a stateful rule, incoming packets * are routed since the procedure becomes attached to the connection * and hence will not be desirable */ if (*decision == NPF_DECISION_BLOCK || (mi->mi_di == PFIL_IN)) { return true; } /* global lock for interface lookup */ KERNEL_LOCK(1, NULL); ifp = ifunit(route->ifname); if (ifp == NULL) { /* XXX: oops */ goto bad; } if (npf_iscached(npc, NPC_IP6)) { #if defined(INET6) struct ip6_hdr *ip6 = npc->npc_ip.v6; sockaddr_in6_init(&dst.v6, &ip6->ip6_dst, 0, 0, 0); if (IN6_IS_SCOPE_EMBEDDABLE(&dst.v6.sin6_addr)) { error = in6_setscope(&dst.v6.sin6_addr, ifp, NULL); if (error) { goto bad; } } npf_validate_s6addr(m0, ifp, &sw_csum); if (m0->m_pkthdr.len <= ifp->if_mtu) { if (__predict_false(sw_csum & M_CSUM_TSOv6)) { /* * TSO6 is required by a packet, but disabled for * the interface. */ error = ip6_tso_output(ifp, ifp, m0, &dst.v6, NULL); } else error = ip6_if_output(ifp, ifp, m0, &dst.v6, NULL); if (error) { goto bad; } } else { /* router not allowed to fragmenrt */ npf_stats_inc(npf, NPF_STAT_NOFRAGMENT); icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); } #endif } else if (npf_iscached(npc, NPC_IP4)) { struct ip *ip = npc->npc_ip.v4; struct mbuf *m = m0; KASSERT(ip != NULL); KASSERT(m != NULL); /* * NB: This code is copied from ip_output and re-arranged * checks fragmentation, checksum and source address validity */ sockaddr_in_init(&dst.v4, &ip->ip_dst, 0); error = npf_validate_saddr(npf, ip, ifp); if (error) goto bad; if (ntohs(ip->ip_len) > ifp->if_mtu) goto fragment; npf_chcksum(ifp, m, ip, &sw_csum); /* Send it */ if (__predict_false(sw_csum & M_CSUM_TSOv4)) { /* * TSO4 is required by a packet, but disabled for * the interface. */ error = ip_tso_output(ifp, m, sintocsa(&dst.v4), NULL); } else error = ip_if_output(ifp, m, sintocsa(&dst.v4), NULL); if (error) { goto bad; } goto done; fragment: error = npf_fragment(npf, ifp, ip, &m0, &dst.v4); if (error) { goto bad; } } /* * for routing procedures, we reverse the returns * because we need the kernel to stop processing the mbuf * after we leave the filtering context */ done: npf_stats_inc(npf, NPF_STAT_REROUTE); m0 = NULL; KERNEL_UNLOCK_ONE(NULL); return false; bad: npf_stats_inc(npf, NPF_STAT_NOREROUTE); m_freem(m0); m0 = NULL; KERNEL_UNLOCK_ONE(NULL); return true; } __dso_public int npf_ext_route_init(npf_t *npf) { static const npf_ext_ops_t npf_route_ops = { .version = NPFEXT_ROUTE_VER, .ctx = NULL, .ctor = npf_route_ctor, .dtor = npf_route_dtor, .proc = npf_route }; npf_ext_route_id = npf_ext_register(npf, "route", &npf_route_ops); return npf_ext_route_id ? 0 : EEXIST; } __dso_public int npf_ext_route_fini(npf_t *npf) { return npf_ext_unregister(npf, npf_ext_route_id); } #ifdef _KERNEL static int npf_ext_route_modcmd(modcmd_t cmd, void *arg) { npf_t *npf = npf_getkernctx(); switch (cmd) { case MODULE_CMD_INIT: return npf_ext_route_init(npf); case MODULE_CMD_FINI: return npf_ext_route_fini(npf); case MODULE_CMD_AUTOUNLOAD: /* Allow auto-unload only if NPF permits it. */ return npf_autounload_p() ? 0 : EBUSY; default: return ENOTTY; } return 0; } #endif