// https://syzkaller.appspot.com/bug?id=ecc13ed3cdced06f4d20c13ae9973406c7192692 // autogenerated by syzkaller (https://github.com/google/syzkaller) #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static unsigned long long procid; static __thread int clone_ongoing; static __thread int skip_segv; static __thread jmp_buf segv_env; static void segv_handler(int sig, siginfo_t* info, void* ctx) { if (__atomic_load_n(&clone_ongoing, __ATOMIC_RELAXED) != 0) { exit(sig); } uintptr_t addr = (uintptr_t)info->si_addr; const uintptr_t prog_start = 1 << 20; const uintptr_t prog_end = 100 << 20; int skip = __atomic_load_n(&skip_segv, __ATOMIC_RELAXED) != 0; int valid = addr < prog_start || addr > prog_end; if (skip && valid) { _longjmp(segv_env, 1); } exit(sig); } static void install_segv_handler(void) { struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_handler = SIG_IGN; syscall(SYS_rt_sigaction, 0x20, &sa, NULL, 8); syscall(SYS_rt_sigaction, 0x21, &sa, NULL, 8); memset(&sa, 0, sizeof(sa)); sa.sa_sigaction = segv_handler; sa.sa_flags = SA_NODEFER | SA_SIGINFO; sigaction(SIGSEGV, &sa, NULL); sigaction(SIGBUS, &sa, NULL); } #define NONFAILING(...) \ ({ \ int ok = 1; \ __atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST); \ if (_setjmp(segv_env) == 0) { \ __VA_ARGS__; \ } else \ ok = 0; \ __atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST); \ ok; \ }) static void sleep_ms(uint64_t ms) { usleep(ms * 1000); } static uint64_t current_time_ms(void) { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts)) exit(1); return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; } static void use_temporary_dir(void) { char tmpdir_template[] = "./syzkaller.XXXXXX"; char* tmpdir = mkdtemp(tmpdir_template); if (!tmpdir) exit(1); if (chmod(tmpdir, 0777)) exit(1); if (chdir(tmpdir)) exit(1); } static bool write_file(const char* file, const char* what, ...) { char buf[1024]; va_list args; va_start(args, what); vsnprintf(buf, sizeof(buf), what, args); va_end(args); buf[sizeof(buf) - 1] = 0; int len = strlen(buf); int fd = open(file, O_WRONLY | O_CLOEXEC); if (fd == -1) return false; if (write(fd, buf, len) != len) { int err = errno; close(fd); errno = err; return false; } close(fd); return true; } struct nlmsg { char* pos; int nesting; struct nlattr* nested[8]; char buf[4096]; }; static void netlink_init(struct nlmsg* nlmsg, int typ, int flags, const void* data, int size) { memset(nlmsg, 0, sizeof(*nlmsg)); struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf; hdr->nlmsg_type = typ; hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags; memcpy(hdr + 1, data, size); nlmsg->pos = (char*)(hdr + 1) + NLMSG_ALIGN(size); } static void netlink_attr(struct nlmsg* nlmsg, int typ, const void* data, int size) { struct nlattr* attr = (struct nlattr*)nlmsg->pos; attr->nla_len = sizeof(*attr) + size; attr->nla_type = typ; if (size > 0) memcpy(attr + 1, data, size); nlmsg->pos += NLMSG_ALIGN(attr->nla_len); } static void netlink_nest(struct nlmsg* nlmsg, int typ) { struct nlattr* attr = (struct nlattr*)nlmsg->pos; attr->nla_type = typ; nlmsg->pos += sizeof(*attr); nlmsg->nested[nlmsg->nesting++] = attr; } static void netlink_done(struct nlmsg* nlmsg) { struct nlattr* attr = nlmsg->nested[--nlmsg->nesting]; attr->nla_len = nlmsg->pos - (char*)attr; } static int netlink_send_ext(struct nlmsg* nlmsg, int sock, uint16_t reply_type, int* reply_len, bool dofail) { if (nlmsg->pos > nlmsg->buf + sizeof(nlmsg->buf) || nlmsg->nesting) exit(1); struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf; hdr->nlmsg_len = nlmsg->pos - nlmsg->buf; struct sockaddr_nl addr; memset(&addr, 0, sizeof(addr)); addr.nl_family = AF_NETLINK; ssize_t n = sendto(sock, nlmsg->buf, hdr->nlmsg_len, 0, (struct sockaddr*)&addr, sizeof(addr)); if (n != (ssize_t)hdr->nlmsg_len) { if (dofail) exit(1); return -1; } n = recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0); if (reply_len) *reply_len = 0; if (n < 0) { if (dofail) exit(1); return -1; } if (n < (ssize_t)sizeof(struct nlmsghdr)) { errno = EINVAL; if (dofail) exit(1); return -1; } if (hdr->nlmsg_type == NLMSG_DONE) return 0; if (reply_len && hdr->nlmsg_type == reply_type) { *reply_len = n; return 0; } if (n < (ssize_t)(sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr))) { errno = EINVAL; if (dofail) exit(1); return -1; } if (hdr->nlmsg_type != NLMSG_ERROR) { errno = EINVAL; if (dofail) exit(1); return -1; } errno = -((struct nlmsgerr*)(hdr + 1))->error; return -errno; } static int netlink_send(struct nlmsg* nlmsg, int sock) { return netlink_send_ext(nlmsg, sock, 0, NULL, true); } static int netlink_query_family_id(struct nlmsg* nlmsg, int sock, const char* family_name, bool dofail) { struct genlmsghdr genlhdr; memset(&genlhdr, 0, sizeof(genlhdr)); genlhdr.cmd = CTRL_CMD_GETFAMILY; netlink_init(nlmsg, GENL_ID_CTRL, 0, &genlhdr, sizeof(genlhdr)); netlink_attr(nlmsg, CTRL_ATTR_FAMILY_NAME, family_name, strnlen(family_name, GENL_NAMSIZ - 1) + 1); int n = 0; int err = netlink_send_ext(nlmsg, sock, GENL_ID_CTRL, &n, dofail); if (err < 0) { return -1; } uint16_t id = 0; struct nlattr* attr = (struct nlattr*)(nlmsg->buf + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(genlhdr))); for (; (char*)attr < nlmsg->buf + n; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) { if (attr->nla_type == CTRL_ATTR_FAMILY_ID) { id = *(uint16_t*)(attr + 1); break; } } if (!id) { errno = EINVAL; return -1; } recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0); return id; } static void netlink_add_device_impl(struct nlmsg* nlmsg, const char* type, const char* name, bool up) { struct ifinfomsg hdr; memset(&hdr, 0, sizeof(hdr)); if (up) hdr.ifi_flags = hdr.ifi_change = IFF_UP; netlink_init(nlmsg, RTM_NEWLINK, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr)); if (name) netlink_attr(nlmsg, IFLA_IFNAME, name, strlen(name)); netlink_nest(nlmsg, IFLA_LINKINFO); netlink_attr(nlmsg, IFLA_INFO_KIND, type, strlen(type)); } static void netlink_device_change(struct nlmsg* nlmsg, int sock, const char* name, bool up, const char* master, const void* mac, int macsize, const char* new_name) { struct ifinfomsg hdr; memset(&hdr, 0, sizeof(hdr)); if (up) hdr.ifi_flags = hdr.ifi_change = IFF_UP; hdr.ifi_index = if_nametoindex(name); netlink_init(nlmsg, RTM_NEWLINK, 0, &hdr, sizeof(hdr)); if (new_name) netlink_attr(nlmsg, IFLA_IFNAME, new_name, strlen(new_name)); if (master) { int ifindex = if_nametoindex(master); netlink_attr(nlmsg, IFLA_MASTER, &ifindex, sizeof(ifindex)); } if (macsize) netlink_attr(nlmsg, IFLA_ADDRESS, mac, macsize); int err = netlink_send(nlmsg, sock); if (err < 0) { } } static int netlink_add_addr(struct nlmsg* nlmsg, int sock, const char* dev, const void* addr, int addrsize) { struct ifaddrmsg hdr; memset(&hdr, 0, sizeof(hdr)); hdr.ifa_family = addrsize == 4 ? AF_INET : AF_INET6; hdr.ifa_prefixlen = addrsize == 4 ? 24 : 120; hdr.ifa_scope = RT_SCOPE_UNIVERSE; hdr.ifa_index = if_nametoindex(dev); netlink_init(nlmsg, RTM_NEWADDR, NLM_F_CREATE | NLM_F_REPLACE, &hdr, sizeof(hdr)); netlink_attr(nlmsg, IFA_LOCAL, addr, addrsize); netlink_attr(nlmsg, IFA_ADDRESS, addr, addrsize); return netlink_send(nlmsg, sock); } static void netlink_add_addr4(struct nlmsg* nlmsg, int sock, const char* dev, const char* addr) { struct in_addr in_addr; inet_pton(AF_INET, addr, &in_addr); int err = netlink_add_addr(nlmsg, sock, dev, &in_addr, sizeof(in_addr)); if (err < 0) { } } static void netlink_add_addr6(struct nlmsg* nlmsg, int sock, const char* dev, const char* addr) { struct in6_addr in6_addr; inet_pton(AF_INET6, addr, &in6_addr); int err = netlink_add_addr(nlmsg, sock, dev, &in6_addr, sizeof(in6_addr)); if (err < 0) { } } static void netlink_add_neigh(struct nlmsg* nlmsg, int sock, const char* name, const void* addr, int addrsize, const void* mac, int macsize) { struct ndmsg hdr; memset(&hdr, 0, sizeof(hdr)); hdr.ndm_family = addrsize == 4 ? AF_INET : AF_INET6; hdr.ndm_ifindex = if_nametoindex(name); hdr.ndm_state = NUD_PERMANENT; netlink_init(nlmsg, RTM_NEWNEIGH, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr)); netlink_attr(nlmsg, NDA_DST, addr, addrsize); netlink_attr(nlmsg, NDA_LLADDR, mac, macsize); int err = netlink_send(nlmsg, sock); if (err < 0) { } } static struct nlmsg nlmsg; static int tunfd = -1; #define TUN_IFACE "syz_tun" #define LOCAL_MAC 0xaaaaaaaaaaaa #define REMOTE_MAC 0xaaaaaaaaaabb #define LOCAL_IPV4 "172.20.20.170" #define REMOTE_IPV4 "172.20.20.187" #define LOCAL_IPV6 "fe80::aa" #define REMOTE_IPV6 "fe80::bb" #define IFF_NAPI 0x0010 static void initialize_tun(void) { tunfd = open("/dev/net/tun", O_RDWR | O_NONBLOCK); if (tunfd == -1) { printf("tun: can't open /dev/net/tun: please enable CONFIG_TUN=y\n"); printf("otherwise fuzzing or reproducing might not work as intended\n"); return; } const int kTunFd = 200; if (dup2(tunfd, kTunFd) < 0) exit(1); close(tunfd); tunfd = kTunFd; struct ifreq ifr; memset(&ifr, 0, sizeof(ifr)); strncpy(ifr.ifr_name, TUN_IFACE, IFNAMSIZ); ifr.ifr_flags = IFF_TAP | IFF_NO_PI; if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0) { exit(1); } char sysctl[64]; sprintf(sysctl, "/proc/sys/net/ipv6/conf/%s/accept_dad", TUN_IFACE); write_file(sysctl, "0"); sprintf(sysctl, "/proc/sys/net/ipv6/conf/%s/router_solicitations", TUN_IFACE); write_file(sysctl, "0"); int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sock == -1) exit(1); netlink_add_addr4(&nlmsg, sock, TUN_IFACE, LOCAL_IPV4); netlink_add_addr6(&nlmsg, sock, TUN_IFACE, LOCAL_IPV6); uint64_t macaddr = REMOTE_MAC; struct in_addr in_addr; inet_pton(AF_INET, REMOTE_IPV4, &in_addr); netlink_add_neigh(&nlmsg, sock, TUN_IFACE, &in_addr, sizeof(in_addr), &macaddr, ETH_ALEN); struct in6_addr in6_addr; inet_pton(AF_INET6, REMOTE_IPV6, &in6_addr); netlink_add_neigh(&nlmsg, sock, TUN_IFACE, &in6_addr, sizeof(in6_addr), &macaddr, ETH_ALEN); macaddr = LOCAL_MAC; netlink_device_change(&nlmsg, sock, TUN_IFACE, true, 0, &macaddr, ETH_ALEN, NULL); close(sock); } #define WIFI_INITIAL_DEVICE_COUNT 2 #define WIFI_MAC_BASE \ { \ 0x08, 0x02, 0x11, 0x00, 0x00, 0x00 \ } #define WIFI_IBSS_BSSID \ { \ 0x50, 0x50, 0x50, 0x50, 0x50, 0x50 \ } #define WIFI_IBSS_SSID \ { \ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 \ } #define WIFI_DEFAULT_FREQUENCY 2412 #define WIFI_DEFAULT_SIGNAL 0 #define WIFI_DEFAULT_RX_RATE 1 #define HWSIM_CMD_REGISTER 1 #define HWSIM_CMD_FRAME 2 #define HWSIM_CMD_NEW_RADIO 4 #define HWSIM_ATTR_SUPPORT_P2P_DEVICE 14 #define HWSIM_ATTR_PERM_ADDR 22 #define IF_OPER_UP 6 struct join_ibss_props { int wiphy_freq; bool wiphy_freq_fixed; uint8_t* mac; uint8_t* ssid; int ssid_len; }; static int set_interface_state(const char* interface_name, int on) { struct ifreq ifr; int sock = socket(AF_INET, SOCK_DGRAM, 0); if (sock < 0) { return -1; } memset(&ifr, 0, sizeof(ifr)); strcpy(ifr.ifr_name, interface_name); int ret = ioctl(sock, SIOCGIFFLAGS, &ifr); if (ret < 0) { close(sock); return -1; } if (on) ifr.ifr_flags |= IFF_UP; else ifr.ifr_flags &= ~IFF_UP; ret = ioctl(sock, SIOCSIFFLAGS, &ifr); close(sock); if (ret < 0) { return -1; } return 0; } static int nl80211_set_interface(struct nlmsg* nlmsg, int sock, int nl80211_family, uint32_t ifindex, uint32_t iftype, bool dofail) { struct genlmsghdr genlhdr; memset(&genlhdr, 0, sizeof(genlhdr)); genlhdr.cmd = NL80211_CMD_SET_INTERFACE; netlink_init(nlmsg, nl80211_family, 0, &genlhdr, sizeof(genlhdr)); netlink_attr(nlmsg, NL80211_ATTR_IFINDEX, &ifindex, sizeof(ifindex)); netlink_attr(nlmsg, NL80211_ATTR_IFTYPE, &iftype, sizeof(iftype)); int err = netlink_send_ext(nlmsg, sock, 0, NULL, dofail); if (err < 0) { } return err; } static int nl80211_join_ibss(struct nlmsg* nlmsg, int sock, int nl80211_family, uint32_t ifindex, struct join_ibss_props* props, bool dofail) { struct genlmsghdr genlhdr; memset(&genlhdr, 0, sizeof(genlhdr)); genlhdr.cmd = NL80211_CMD_JOIN_IBSS; netlink_init(nlmsg, nl80211_family, 0, &genlhdr, sizeof(genlhdr)); netlink_attr(nlmsg, NL80211_ATTR_IFINDEX, &ifindex, sizeof(ifindex)); netlink_attr(nlmsg, NL80211_ATTR_SSID, props->ssid, props->ssid_len); netlink_attr(nlmsg, NL80211_ATTR_WIPHY_FREQ, &(props->wiphy_freq), sizeof(props->wiphy_freq)); if (props->mac) netlink_attr(nlmsg, NL80211_ATTR_MAC, props->mac, ETH_ALEN); if (props->wiphy_freq_fixed) netlink_attr(nlmsg, NL80211_ATTR_FREQ_FIXED, NULL, 0); int err = netlink_send_ext(nlmsg, sock, 0, NULL, dofail); if (err < 0) { } return err; } static int get_ifla_operstate(struct nlmsg* nlmsg, int ifindex, bool dofail) { struct ifinfomsg info; memset(&info, 0, sizeof(info)); info.ifi_family = AF_UNSPEC; info.ifi_index = ifindex; int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sock == -1) { return -1; } netlink_init(nlmsg, RTM_GETLINK, 0, &info, sizeof(info)); int n; int err = netlink_send_ext(nlmsg, sock, RTM_NEWLINK, &n, dofail); close(sock); if (err) { return -1; } struct rtattr* attr = IFLA_RTA(NLMSG_DATA(nlmsg->buf)); for (; RTA_OK(attr, n); attr = RTA_NEXT(attr, n)) { if (attr->rta_type == IFLA_OPERSTATE) return *((int32_t*)RTA_DATA(attr)); } return -1; } static int await_ifla_operstate(struct nlmsg* nlmsg, char* interface, int operstate, bool dofail) { int ifindex = if_nametoindex(interface); while (true) { usleep(1000); int ret = get_ifla_operstate(nlmsg, ifindex, dofail); if (ret < 0) return ret; if (ret == operstate) return 0; } return 0; } static int nl80211_setup_ibss_interface(struct nlmsg* nlmsg, int sock, int nl80211_family_id, char* interface, struct join_ibss_props* ibss_props, bool dofail) { int ifindex = if_nametoindex(interface); if (ifindex == 0) { return -1; } int ret = nl80211_set_interface(nlmsg, sock, nl80211_family_id, ifindex, NL80211_IFTYPE_ADHOC, dofail); if (ret < 0) { return -1; } ret = set_interface_state(interface, 1); if (ret < 0) { return -1; } ret = nl80211_join_ibss(nlmsg, sock, nl80211_family_id, ifindex, ibss_props, dofail); if (ret < 0) { return -1; } return 0; } static int hwsim80211_create_device(struct nlmsg* nlmsg, int sock, int hwsim_family, uint8_t mac_addr[ETH_ALEN]) { struct genlmsghdr genlhdr; memset(&genlhdr, 0, sizeof(genlhdr)); genlhdr.cmd = HWSIM_CMD_NEW_RADIO; netlink_init(nlmsg, hwsim_family, 0, &genlhdr, sizeof(genlhdr)); netlink_attr(nlmsg, HWSIM_ATTR_SUPPORT_P2P_DEVICE, NULL, 0); netlink_attr(nlmsg, HWSIM_ATTR_PERM_ADDR, mac_addr, ETH_ALEN); int err = netlink_send(nlmsg, sock); if (err < 0) { } return err; } static void initialize_wifi_devices(void) { int rfkill = open("/dev/rfkill", O_RDWR); if (rfkill == -1) exit(1); struct rfkill_event event = {0}; event.type = RFKILL_TYPE_ALL; event.op = RFKILL_OP_CHANGE_ALL; if (write(rfkill, &event, sizeof(event)) != (ssize_t)(sizeof(event))) exit(1); close(rfkill); uint8_t mac_addr[6] = WIFI_MAC_BASE; int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); if (sock < 0) exit(1); int hwsim_family_id = netlink_query_family_id(&nlmsg, sock, "MAC80211_HWSIM", true); int nl80211_family_id = netlink_query_family_id(&nlmsg, sock, "nl80211", true); if (hwsim_family_id < 0 || nl80211_family_id < 0) exit(1); uint8_t ssid[] = WIFI_IBSS_SSID; uint8_t bssid[] = WIFI_IBSS_BSSID; struct join_ibss_props ibss_props = {.wiphy_freq = WIFI_DEFAULT_FREQUENCY, .wiphy_freq_fixed = true, .mac = bssid, .ssid = ssid, .ssid_len = sizeof(ssid)}; for (int device_id = 0; device_id < WIFI_INITIAL_DEVICE_COUNT; device_id++) { mac_addr[5] = device_id; int ret = hwsim80211_create_device(&nlmsg, sock, hwsim_family_id, mac_addr); if (ret < 0) exit(1); char interface[6] = "wlan0"; interface[4] += device_id; if (nl80211_setup_ibss_interface(&nlmsg, sock, nl80211_family_id, interface, &ibss_props, true) < 0) exit(1); } for (int device_id = 0; device_id < WIFI_INITIAL_DEVICE_COUNT; device_id++) { char interface[6] = "wlan0"; interface[4] += device_id; int ret = await_ifla_operstate(&nlmsg, interface, IF_OPER_UP, true); if (ret < 0) exit(1); } close(sock); } static int runcmdline(char* cmdline) { int ret = system(cmdline); if (ret) { } return ret; } static int read_tun(char* data, int size) { if (tunfd < 0) return -1; int rv = read(tunfd, data, size); if (rv < 0) { if (errno == EAGAIN || errno == EBADF || errno == EBADFD) return -1; exit(1); } return rv; } static void flush_tun() { char data[1000]; while (read_tun(&data[0], sizeof(data)) != -1) { } } #define MAX_FDS 30 #define ADDR_TEXT 0x0000 #define ADDR_GDT 0x1000 #define ADDR_LDT 0x1800 #define ADDR_PML4 0x2000 #define ADDR_PDP 0x3000 #define ADDR_PD 0x4000 #define ADDR_STACK0 0x0f80 #define ADDR_VAR_HLT 0x2800 #define ADDR_VAR_SYSRET 0x2808 #define ADDR_VAR_SYSEXIT 0x2810 #define ADDR_VAR_IDT 0x3800 #define ADDR_VAR_TSS64 0x3a00 #define ADDR_VAR_TSS64_CPL3 0x3c00 #define ADDR_VAR_TSS16 0x3d00 #define ADDR_VAR_TSS16_2 0x3e00 #define ADDR_VAR_TSS16_CPL3 0x3f00 #define ADDR_VAR_TSS32 0x4800 #define ADDR_VAR_TSS32_2 0x4a00 #define ADDR_VAR_TSS32_CPL3 0x4c00 #define ADDR_VAR_TSS32_VM86 0x4e00 #define ADDR_VAR_VMXON_PTR 0x5f00 #define ADDR_VAR_VMCS_PTR 0x5f08 #define ADDR_VAR_VMEXIT_PTR 0x5f10 #define ADDR_VAR_VMWRITE_FLD 0x5f18 #define ADDR_VAR_VMWRITE_VAL 0x5f20 #define ADDR_VAR_VMXON 0x6000 #define ADDR_VAR_VMCS 0x7000 #define ADDR_VAR_VMEXIT_CODE 0x9000 #define ADDR_VAR_USER_CODE 0x9100 #define ADDR_VAR_USER_CODE2 0x9120 #define SEL_LDT (1 << 3) #define SEL_CS16 (2 << 3) #define SEL_DS16 (3 << 3) #define SEL_CS16_CPL3 ((4 << 3) + 3) #define SEL_DS16_CPL3 ((5 << 3) + 3) #define SEL_CS32 (6 << 3) #define SEL_DS32 (7 << 3) #define SEL_CS32_CPL3 ((8 << 3) + 3) #define SEL_DS32_CPL3 ((9 << 3) + 3) #define SEL_CS64 (10 << 3) #define SEL_DS64 (11 << 3) #define SEL_CS64_CPL3 ((12 << 3) + 3) #define SEL_DS64_CPL3 ((13 << 3) + 3) #define SEL_CGATE16 (14 << 3) #define SEL_TGATE16 (15 << 3) #define SEL_CGATE32 (16 << 3) #define SEL_TGATE32 (17 << 3) #define SEL_CGATE64 (18 << 3) #define SEL_CGATE64_HI (19 << 3) #define SEL_TSS16 (20 << 3) #define SEL_TSS16_2 (21 << 3) #define SEL_TSS16_CPL3 ((22 << 3) + 3) #define SEL_TSS32 (23 << 3) #define SEL_TSS32_2 (24 << 3) #define SEL_TSS32_CPL3 ((25 << 3) + 3) #define SEL_TSS32_VM86 (26 << 3) #define SEL_TSS64 (27 << 3) #define SEL_TSS64_HI (28 << 3) #define SEL_TSS64_CPL3 ((29 << 3) + 3) #define SEL_TSS64_CPL3_HI (30 << 3) #define MSR_IA32_FEATURE_CONTROL 0x3a #define MSR_IA32_VMX_BASIC 0x480 #define MSR_IA32_SMBASE 0x9e #define MSR_IA32_SYSENTER_CS 0x174 #define MSR_IA32_SYSENTER_ESP 0x175 #define MSR_IA32_SYSENTER_EIP 0x176 #define MSR_IA32_STAR 0xC0000081 #define MSR_IA32_LSTAR 0xC0000082 #define MSR_IA32_VMX_PROCBASED_CTLS2 0x48B #define NEXT_INSN $0xbadc0de #define PREFIX_SIZE 0xba1d const char kvm_asm16_cpl3[] = "\x0f\x20\xc0\x66\x83\xc8\x01\x0f\x22\xc0\xb8\xa0\x00\x0f\x00\xd8\xb8\x2b" "\x00\x8e\xd8\x8e\xc0\x8e\xe0\x8e\xe8\xbc\x00\x01\xc7\x06\x00\x01\x1d\xba" "\xc7\x06\x02\x01\x23\x00\xc7\x06\x04\x01\x00\x01\xc7\x06\x06\x01\x2b\x00" "\xcb"; const char kvm_asm32_paged[] = "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0"; const char kvm_asm32_vm86[] = "\x66\xb8\xb8\x00\x0f\x00\xd8\xea\x00\x00\x00\x00\xd0\x00"; const char kvm_asm32_paged_vm86[] = "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\x66\xb8\xb8\x00\x0f\x00\xd8" "\xea\x00\x00\x00\x00\xd0\x00"; const char kvm_asm64_enable_long[] = "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\xea\xde\xc0\xad\x0b\x50\x00" "\x48\xc7\xc0\xd8\x00\x00\x00\x0f\x00\xd8"; const char kvm_asm64_init_vm[] = "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\xea\xde\xc0\xad\x0b\x50\x00" "\x48\xc7\xc0\xd8\x00\x00\x00\x0f\x00\xd8\x48\xc7\xc1\x3a\x00\x00\x00\x0f" "\x32\x48\x83\xc8\x05\x0f\x30\x0f\x20\xe0\x48\x0d\x00\x20\x00\x00\x0f\x22" "\xe0\x48\xc7\xc1\x80\x04\x00\x00\x0f\x32\x48\xc7\xc2\x00\x60\x00\x00\x89" "\x02\x48\xc7\xc2\x00\x70\x00\x00\x89\x02\x48\xc7\xc0\x00\x5f\x00\x00\xf3" "\x0f\xc7\x30\x48\xc7\xc0\x08\x5f\x00\x00\x66\x0f\xc7\x30\x0f\xc7\x30\x48" "\xc7\xc1\x81\x04\x00\x00\x0f\x32\x48\x83\xc8\x00\x48\x21\xd0\x48\xc7\xc2" "\x00\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x82\x04\x00\x00\x0f\x32\x48\x83" "\xc8\x00\x48\x21\xd0\x48\xc7\xc2\x02\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc2" "\x1e\x40\x00\x00\x48\xc7\xc0\x81\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x83" "\x04\x00\x00\x0f\x32\x48\x0d\xff\x6f\x03\x00\x48\x21\xd0\x48\xc7\xc2\x0c" "\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x84\x04\x00\x00\x0f\x32\x48\x0d\xff" "\x17\x00\x00\x48\x21\xd0\x48\xc7\xc2\x12\x40\x00\x00\x0f\x79\xd0\x48\xc7" "\xc2\x04\x2c\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2" "\x00\x28\x00\x00\x48\xc7\xc0\xff\xff\xff\xff\x0f\x79\xd0\x48\xc7\xc2\x02" "\x0c\x00\x00\x48\xc7\xc0\x50\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc0\x58\x00" "\x00\x00\x48\xc7\xc2\x00\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x04\x0c\x00" "\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x08" "\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x0c\x00\x00\x0f\x79\xd0\x48\xc7" "\xc0\xd8\x00\x00\x00\x48\xc7\xc2\x0c\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2" "\x02\x2c\x00\x00\x48\xc7\xc0\x00\x05\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x00" "\x4c\x00\x00\x48\xc7\xc0\x50\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x10\x6c" "\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x12\x6c\x00" "\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x0f\x20\xc0\x48\xc7\xc2\x00" "\x6c\x00\x00\x48\x89\xc0\x0f\x79\xd0\x0f\x20\xd8\x48\xc7\xc2\x02\x6c\x00" "\x00\x48\x89\xc0\x0f\x79\xd0\x0f\x20\xe0\x48\xc7\xc2\x04\x6c\x00\x00\x48" "\x89\xc0\x0f\x79\xd0\x48\xc7\xc2\x06\x6c\x00\x00\x48\xc7\xc0\x00\x00\x00" "\x00\x0f\x79\xd0\x48\xc7\xc2\x08\x6c\x00\x00\x48\xc7\xc0\x00\x00\x00\x00" "\x0f\x79\xd0\x48\xc7\xc2\x0a\x6c\x00\x00\x48\xc7\xc0\x00\x3a\x00\x00\x0f" "\x79\xd0\x48\xc7\xc2\x0c\x6c\x00\x00\x48\xc7\xc0\x00\x10\x00\x00\x0f\x79" "\xd0\x48\xc7\xc2\x0e\x6c\x00\x00\x48\xc7\xc0\x00\x38\x00\x00\x0f\x79\xd0" "\x48\xc7\xc2\x14\x6c\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48" "\xc7\xc2\x16\x6c\x00\x00\x48\x8b\x04\x25\x10\x5f\x00\x00\x0f\x79\xd0\x48" "\xc7\xc2\x00\x00\x00\x00\x48\xc7\xc0\x01\x00\x00\x00\x0f\x79\xd0\x48\xc7" "\xc2\x02\x00\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2" "\x00\x20\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x02" "\x20\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x04\x20" "\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x20\x00" "\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x77\x02\x00\x00" "\x0f\x32\x48\xc1\xe2\x20\x48\x09\xd0\x48\xc7\xc2\x00\x2c\x00\x00\x48\x89" "\xc0\x0f\x79\xd0\x48\xc7\xc2\x04\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00" "\x0f\x79\xd0\x48\xc7\xc2\x0a\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f" "\x79\xd0\x48\xc7\xc2\x0e\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79" "\xd0\x48\xc7\xc2\x10\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0" "\x48\xc7\xc2\x16\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48" "\xc7\xc2\x14\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7" "\xc2\x00\x60\x00\x00\x48\xc7\xc0\xff\xff\xff\xff\x0f\x79\xd0\x48\xc7\xc2" "\x02\x60\x00\x00\x48\xc7\xc0\xff\xff\xff\xff\x0f\x79\xd0\x48\xc7\xc2\x1c" "\x20\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x1e\x20" "\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x20\x20\x00" "\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x22\x20\x00\x00" "\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x00\x08\x00\x00\x48" "\xc7\xc0\x58\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x02\x08\x00\x00\x48\xc7" "\xc0\x50\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x04\x08\x00\x00\x48\xc7\xc0" "\x58\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x08\x00\x00\x48\xc7\xc0\x58" "\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x08\x08\x00\x00\x48\xc7\xc0\x58\x00" "\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x08\x00\x00\x48\xc7\xc0\x58\x00\x00" "\x00\x0f\x79\xd0\x48\xc7\xc2\x0c\x08\x00\x00\x48\xc7\xc0\x00\x00\x00\x00" "\x0f\x79\xd0\x48\xc7\xc2\x0e\x08\x00\x00\x48\xc7\xc0\xd8\x00\x00\x00\x0f" "\x79\xd0\x48\xc7\xc2\x12\x68\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79" "\xd0\x48\xc7\xc2\x14\x68\x00\x00\x48\xc7\xc0\x00\x3a\x00\x00\x0f\x79\xd0" "\x48\xc7\xc2\x16\x68\x00\x00\x48\xc7\xc0\x00\x10\x00\x00\x0f\x79\xd0\x48" "\xc7\xc2\x18\x68\x00\x00\x48\xc7\xc0\x00\x38\x00\x00\x0f\x79\xd0\x48\xc7" "\xc2\x00\x48\x00\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2" "\x02\x48\x00\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x04" "\x48\x00\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x48" "\x00\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x08\x48\x00" "\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x48\x00\x00" "\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x0c\x48\x00\x00\x48" "\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0e\x48\x00\x00\x48\xc7" "\xc0\xff\x1f\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x10\x48\x00\x00\x48\xc7\xc0" "\xff\x1f\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x12\x48\x00\x00\x48\xc7\xc0\xff" "\x1f\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x14\x48\x00\x00\x48\xc7\xc0\x93\x40" "\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x16\x48\x00\x00\x48\xc7\xc0\x9b\x20\x00" "\x00\x0f\x79\xd0\x48\xc7\xc2\x18\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00" "\x0f\x79\xd0\x48\xc7\xc2\x1a\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f" "\x79\xd0\x48\xc7\xc2\x1c\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f\x79" "\xd0\x48\xc7\xc2\x1e\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f\x79\xd0" "\x48\xc7\xc2\x20\x48\x00\x00\x48\xc7\xc0\x82\x00\x00\x00\x0f\x79\xd0\x48" "\xc7\xc2\x22\x48\x00\x00\x48\xc7\xc0\x8b\x00\x00\x00\x0f\x79\xd0\x48\xc7" "\xc2\x1c\x68\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2" "\x1e\x68\x00\x00\x48\xc7\xc0\x00\x91\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x20" "\x68\x00\x00\x48\xc7\xc0\x02\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x28" "\x00\x00\x48\xc7\xc0\x00\x05\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x28\x00" "\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0c\x28\x00\x00" "\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0e\x28\x00\x00\x48" "\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x10\x28\x00\x00\x48\xc7" "\xc0\x00\x00\x00\x00\x0f\x79\xd0\x0f\x20\xc0\x48\xc7\xc2\x00\x68\x00\x00" "\x48\x89\xc0\x0f\x79\xd0\x0f\x20\xd8\x48\xc7\xc2\x02\x68\x00\x00\x48\x89" "\xc0\x0f\x79\xd0\x0f\x20\xe0\x48\xc7\xc2\x04\x68\x00\x00\x48\x89\xc0\x0f" "\x79\xd0\x48\xc7\xc0\x18\x5f\x00\x00\x48\x8b\x10\x48\xc7\xc0\x20\x5f\x00" "\x00\x48\x8b\x08\x48\x31\xc0\x0f\x78\xd0\x48\x31\xc8\x0f\x79\xd0\x0f\x01" "\xc2\x48\xc7\xc2\x00\x44\x00\x00\x0f\x78\xd0\xf4"; const char kvm_asm64_vm_exit[] = "\x48\xc7\xc3\x00\x44\x00\x00\x0f\x78\xda\x48\xc7\xc3\x02\x44\x00\x00\x0f" "\x78\xd9\x48\xc7\xc0\x00\x64\x00\x00\x0f\x78\xc0\x48\xc7\xc3\x1e\x68\x00" "\x00\x0f\x78\xdb\xf4"; const char kvm_asm64_cpl3[] = "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\xea\xde\xc0\xad\x0b\x50\x00" "\x48\xc7\xc0\xd8\x00\x00\x00\x0f\x00\xd8\x48\xc7\xc0\x6b\x00\x00\x00\x8e" "\xd8\x8e\xc0\x8e\xe0\x8e\xe8\x48\xc7\xc4\x80\x0f\x00\x00\x48\xc7\x04\x24" "\x1d\xba\x00\x00\x48\xc7\x44\x24\x04\x63\x00\x00\x00\x48\xc7\x44\x24\x08" "\x80\x0f\x00\x00\x48\xc7\x44\x24\x0c\x6b\x00\x00\x00\xcb"; #define KVM_SMI _IO(KVMIO, 0xb7) #define CR0_PE 1 #define CR0_MP (1 << 1) #define CR0_EM (1 << 2) #define CR0_TS (1 << 3) #define CR0_ET (1 << 4) #define CR0_NE (1 << 5) #define CR0_WP (1 << 16) #define CR0_AM (1 << 18) #define CR0_NW (1 << 29) #define CR0_CD (1 << 30) #define CR0_PG (1 << 31) #define CR4_VME 1 #define CR4_PVI (1 << 1) #define CR4_TSD (1 << 2) #define CR4_DE (1 << 3) #define CR4_PSE (1 << 4) #define CR4_PAE (1 << 5) #define CR4_MCE (1 << 6) #define CR4_PGE (1 << 7) #define CR4_PCE (1 << 8) #define CR4_OSFXSR (1 << 8) #define CR4_OSXMMEXCPT (1 << 10) #define CR4_UMIP (1 << 11) #define CR4_VMXE (1 << 13) #define CR4_SMXE (1 << 14) #define CR4_FSGSBASE (1 << 16) #define CR4_PCIDE (1 << 17) #define CR4_OSXSAVE (1 << 18) #define CR4_SMEP (1 << 20) #define CR4_SMAP (1 << 21) #define CR4_PKE (1 << 22) #define EFER_SCE 1 #define EFER_LME (1 << 8) #define EFER_LMA (1 << 10) #define EFER_NXE (1 << 11) #define EFER_SVME (1 << 12) #define EFER_LMSLE (1 << 13) #define EFER_FFXSR (1 << 14) #define EFER_TCE (1 << 15) #define PDE32_PRESENT 1 #define PDE32_RW (1 << 1) #define PDE32_USER (1 << 2) #define PDE32_PS (1 << 7) #define PDE64_PRESENT 1 #define PDE64_RW (1 << 1) #define PDE64_USER (1 << 2) #define PDE64_ACCESSED (1 << 5) #define PDE64_DIRTY (1 << 6) #define PDE64_PS (1 << 7) #define PDE64_G (1 << 8) struct tss16 { uint16_t prev; uint16_t sp0; uint16_t ss0; uint16_t sp1; uint16_t ss1; uint16_t sp2; uint16_t ss2; uint16_t ip; uint16_t flags; uint16_t ax; uint16_t cx; uint16_t dx; uint16_t bx; uint16_t sp; uint16_t bp; uint16_t si; uint16_t di; uint16_t es; uint16_t cs; uint16_t ss; uint16_t ds; uint16_t ldt; } __attribute__((packed)); struct tss32 { uint16_t prev, prevh; uint32_t sp0; uint16_t ss0, ss0h; uint32_t sp1; uint16_t ss1, ss1h; uint32_t sp2; uint16_t ss2, ss2h; uint32_t cr3; uint32_t ip; uint32_t flags; uint32_t ax; uint32_t cx; uint32_t dx; uint32_t bx; uint32_t sp; uint32_t bp; uint32_t si; uint32_t di; uint16_t es, esh; uint16_t cs, csh; uint16_t ss, ssh; uint16_t ds, dsh; uint16_t fs, fsh; uint16_t gs, gsh; uint16_t ldt, ldth; uint16_t trace; uint16_t io_bitmap; } __attribute__((packed)); struct tss64 { uint32_t reserved0; uint64_t rsp[3]; uint64_t reserved1; uint64_t ist[7]; uint64_t reserved2; uint32_t reserved3; uint32_t io_bitmap; } __attribute__((packed)); static void fill_segment_descriptor(uint64_t* dt, uint64_t* lt, struct kvm_segment* seg) { uint16_t index = seg->selector >> 3; uint64_t limit = seg->g ? seg->limit >> 12 : seg->limit; uint64_t sd = (limit & 0xffff) | (seg->base & 0xffffff) << 16 | (uint64_t)seg->type << 40 | (uint64_t)seg->s << 44 | (uint64_t)seg->dpl << 45 | (uint64_t)seg->present << 47 | (limit & 0xf0000ULL) << 48 | (uint64_t)seg->avl << 52 | (uint64_t)seg->l << 53 | (uint64_t)seg->db << 54 | (uint64_t)seg->g << 55 | (seg->base & 0xff000000ULL) << 56; dt[index] = sd; lt[index] = sd; } static void fill_segment_descriptor_dword(uint64_t* dt, uint64_t* lt, struct kvm_segment* seg) { fill_segment_descriptor(dt, lt, seg); uint16_t index = seg->selector >> 3; dt[index + 1] = 0; lt[index + 1] = 0; } static void setup_syscall_msrs(int cpufd, uint16_t sel_cs, uint16_t sel_cs_cpl3) { char buf[sizeof(struct kvm_msrs) + 5 * sizeof(struct kvm_msr_entry)]; memset(buf, 0, sizeof(buf)); struct kvm_msrs* msrs = (struct kvm_msrs*)buf; struct kvm_msr_entry* entries = msrs->entries; msrs->nmsrs = 5; entries[0].index = MSR_IA32_SYSENTER_CS; entries[0].data = sel_cs; entries[1].index = MSR_IA32_SYSENTER_ESP; entries[1].data = ADDR_STACK0; entries[2].index = MSR_IA32_SYSENTER_EIP; entries[2].data = ADDR_VAR_SYSEXIT; entries[3].index = MSR_IA32_STAR; entries[3].data = ((uint64_t)sel_cs << 32) | ((uint64_t)sel_cs_cpl3 << 48); entries[4].index = MSR_IA32_LSTAR; entries[4].data = ADDR_VAR_SYSRET; ioctl(cpufd, KVM_SET_MSRS, msrs); } static void setup_32bit_idt(struct kvm_sregs* sregs, char* host_mem, uintptr_t guest_mem) { sregs->idt.base = guest_mem + ADDR_VAR_IDT; sregs->idt.limit = 0x1ff; uint64_t* idt = (uint64_t*)(host_mem + sregs->idt.base); for (int i = 0; i < 32; i++) { struct kvm_segment gate; gate.selector = i << 3; switch (i % 6) { case 0: gate.type = 6; gate.base = SEL_CS16; break; case 1: gate.type = 7; gate.base = SEL_CS16; break; case 2: gate.type = 3; gate.base = SEL_TGATE16; break; case 3: gate.type = 14; gate.base = SEL_CS32; break; case 4: gate.type = 15; gate.base = SEL_CS32; break; case 5: gate.type = 11; gate.base = SEL_TGATE32; break; } gate.limit = guest_mem + ADDR_VAR_USER_CODE2; gate.present = 1; gate.dpl = 0; gate.s = 0; gate.g = 0; gate.db = 0; gate.l = 0; gate.avl = 0; fill_segment_descriptor(idt, idt, &gate); } } static void setup_64bit_idt(struct kvm_sregs* sregs, char* host_mem, uintptr_t guest_mem) { sregs->idt.base = guest_mem + ADDR_VAR_IDT; sregs->idt.limit = 0x1ff; uint64_t* idt = (uint64_t*)(host_mem + sregs->idt.base); for (int i = 0; i < 32; i++) { struct kvm_segment gate; gate.selector = (i * 2) << 3; gate.type = (i & 1) ? 14 : 15; gate.base = SEL_CS64; gate.limit = guest_mem + ADDR_VAR_USER_CODE2; gate.present = 1; gate.dpl = 0; gate.s = 0; gate.g = 0; gate.db = 0; gate.l = 0; gate.avl = 0; fill_segment_descriptor_dword(idt, idt, &gate); } } struct kvm_text { uintptr_t typ; const void* text; uintptr_t size; }; struct kvm_opt { uint64_t typ; uint64_t val; }; #define KVM_SETUP_PAGING (1 << 0) #define KVM_SETUP_PAE (1 << 1) #define KVM_SETUP_PROTECTED (1 << 2) #define KVM_SETUP_CPL3 (1 << 3) #define KVM_SETUP_VIRT86 (1 << 4) #define KVM_SETUP_SMM (1 << 5) #define KVM_SETUP_VM (1 << 6) static volatile long syz_kvm_setup_cpu(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4, volatile long a5, volatile long a6, volatile long a7) { const int vmfd = a0; const int cpufd = a1; char* const host_mem = (char*)a2; const struct kvm_text* const text_array_ptr = (struct kvm_text*)a3; const uintptr_t text_count = a4; const uintptr_t flags = a5; const struct kvm_opt* const opt_array_ptr = (struct kvm_opt*)a6; uintptr_t opt_count = a7; const uintptr_t page_size = 4 << 10; const uintptr_t ioapic_page = 10; const uintptr_t guest_mem_size = 24 * page_size; const uintptr_t guest_mem = 0; (void)text_count; int text_type = text_array_ptr[0].typ; const void* text = text_array_ptr[0].text; uintptr_t text_size = text_array_ptr[0].size; for (uintptr_t i = 0; i < guest_mem_size / page_size; i++) { struct kvm_userspace_memory_region memreg; memreg.slot = i; memreg.flags = 0; memreg.guest_phys_addr = guest_mem + i * page_size; if (i == ioapic_page) memreg.guest_phys_addr = 0xfec00000; memreg.memory_size = page_size; memreg.userspace_addr = (uintptr_t)host_mem + i * page_size; ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &memreg); } struct kvm_userspace_memory_region memreg; memreg.slot = 1 + (1 << 16); memreg.flags = 0; memreg.guest_phys_addr = 0x30000; memreg.memory_size = 64 << 10; memreg.userspace_addr = (uintptr_t)host_mem; ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &memreg); struct kvm_sregs sregs; if (ioctl(cpufd, KVM_GET_SREGS, &sregs)) return -1; struct kvm_regs regs; memset(®s, 0, sizeof(regs)); regs.rip = guest_mem + ADDR_TEXT; regs.rsp = ADDR_STACK0; sregs.gdt.base = guest_mem + ADDR_GDT; sregs.gdt.limit = 256 * sizeof(uint64_t) - 1; uint64_t* gdt = (uint64_t*)(host_mem + sregs.gdt.base); struct kvm_segment seg_ldt; memset(&seg_ldt, 0, sizeof(seg_ldt)); seg_ldt.selector = SEL_LDT; seg_ldt.type = 2; seg_ldt.base = guest_mem + ADDR_LDT; seg_ldt.limit = 256 * sizeof(uint64_t) - 1; seg_ldt.present = 1; seg_ldt.dpl = 0; seg_ldt.s = 0; seg_ldt.g = 0; seg_ldt.db = 1; seg_ldt.l = 0; sregs.ldt = seg_ldt; uint64_t* ldt = (uint64_t*)(host_mem + sregs.ldt.base); struct kvm_segment seg_cs16; memset(&seg_cs16, 0, sizeof(seg_cs16)); seg_cs16.selector = SEL_CS16; seg_cs16.type = 11; seg_cs16.base = 0; seg_cs16.limit = 0xfffff; seg_cs16.present = 1; seg_cs16.dpl = 0; seg_cs16.s = 1; seg_cs16.g = 0; seg_cs16.db = 0; seg_cs16.l = 0; struct kvm_segment seg_ds16 = seg_cs16; seg_ds16.selector = SEL_DS16; seg_ds16.type = 3; struct kvm_segment seg_cs16_cpl3 = seg_cs16; seg_cs16_cpl3.selector = SEL_CS16_CPL3; seg_cs16_cpl3.dpl = 3; struct kvm_segment seg_ds16_cpl3 = seg_ds16; seg_ds16_cpl3.selector = SEL_DS16_CPL3; seg_ds16_cpl3.dpl = 3; struct kvm_segment seg_cs32 = seg_cs16; seg_cs32.selector = SEL_CS32; seg_cs32.db = 1; struct kvm_segment seg_ds32 = seg_ds16; seg_ds32.selector = SEL_DS32; seg_ds32.db = 1; struct kvm_segment seg_cs32_cpl3 = seg_cs32; seg_cs32_cpl3.selector = SEL_CS32_CPL3; seg_cs32_cpl3.dpl = 3; struct kvm_segment seg_ds32_cpl3 = seg_ds32; seg_ds32_cpl3.selector = SEL_DS32_CPL3; seg_ds32_cpl3.dpl = 3; struct kvm_segment seg_cs64 = seg_cs16; seg_cs64.selector = SEL_CS64; seg_cs64.l = 1; struct kvm_segment seg_ds64 = seg_ds32; seg_ds64.selector = SEL_DS64; struct kvm_segment seg_cs64_cpl3 = seg_cs64; seg_cs64_cpl3.selector = SEL_CS64_CPL3; seg_cs64_cpl3.dpl = 3; struct kvm_segment seg_ds64_cpl3 = seg_ds64; seg_ds64_cpl3.selector = SEL_DS64_CPL3; seg_ds64_cpl3.dpl = 3; struct kvm_segment seg_tss32; memset(&seg_tss32, 0, sizeof(seg_tss32)); seg_tss32.selector = SEL_TSS32; seg_tss32.type = 9; seg_tss32.base = ADDR_VAR_TSS32; seg_tss32.limit = 0x1ff; seg_tss32.present = 1; seg_tss32.dpl = 0; seg_tss32.s = 0; seg_tss32.g = 0; seg_tss32.db = 0; seg_tss32.l = 0; struct kvm_segment seg_tss32_2 = seg_tss32; seg_tss32_2.selector = SEL_TSS32_2; seg_tss32_2.base = ADDR_VAR_TSS32_2; struct kvm_segment seg_tss32_cpl3 = seg_tss32; seg_tss32_cpl3.selector = SEL_TSS32_CPL3; seg_tss32_cpl3.base = ADDR_VAR_TSS32_CPL3; struct kvm_segment seg_tss32_vm86 = seg_tss32; seg_tss32_vm86.selector = SEL_TSS32_VM86; seg_tss32_vm86.base = ADDR_VAR_TSS32_VM86; struct kvm_segment seg_tss16 = seg_tss32; seg_tss16.selector = SEL_TSS16; seg_tss16.base = ADDR_VAR_TSS16; seg_tss16.limit = 0xff; seg_tss16.type = 1; struct kvm_segment seg_tss16_2 = seg_tss16; seg_tss16_2.selector = SEL_TSS16_2; seg_tss16_2.base = ADDR_VAR_TSS16_2; seg_tss16_2.dpl = 0; struct kvm_segment seg_tss16_cpl3 = seg_tss16; seg_tss16_cpl3.selector = SEL_TSS16_CPL3; seg_tss16_cpl3.base = ADDR_VAR_TSS16_CPL3; seg_tss16_cpl3.dpl = 3; struct kvm_segment seg_tss64 = seg_tss32; seg_tss64.selector = SEL_TSS64; seg_tss64.base = ADDR_VAR_TSS64; seg_tss64.limit = 0x1ff; struct kvm_segment seg_tss64_cpl3 = seg_tss64; seg_tss64_cpl3.selector = SEL_TSS64_CPL3; seg_tss64_cpl3.base = ADDR_VAR_TSS64_CPL3; seg_tss64_cpl3.dpl = 3; struct kvm_segment seg_cgate16; memset(&seg_cgate16, 0, sizeof(seg_cgate16)); seg_cgate16.selector = SEL_CGATE16; seg_cgate16.type = 4; seg_cgate16.base = SEL_CS16 | (2 << 16); seg_cgate16.limit = ADDR_VAR_USER_CODE2; seg_cgate16.present = 1; seg_cgate16.dpl = 0; seg_cgate16.s = 0; seg_cgate16.g = 0; seg_cgate16.db = 0; seg_cgate16.l = 0; seg_cgate16.avl = 0; struct kvm_segment seg_tgate16 = seg_cgate16; seg_tgate16.selector = SEL_TGATE16; seg_tgate16.type = 3; seg_cgate16.base = SEL_TSS16_2; seg_tgate16.limit = 0; struct kvm_segment seg_cgate32 = seg_cgate16; seg_cgate32.selector = SEL_CGATE32; seg_cgate32.type = 12; seg_cgate32.base = SEL_CS32 | (2 << 16); struct kvm_segment seg_tgate32 = seg_cgate32; seg_tgate32.selector = SEL_TGATE32; seg_tgate32.type = 11; seg_tgate32.base = SEL_TSS32_2; seg_tgate32.limit = 0; struct kvm_segment seg_cgate64 = seg_cgate16; seg_cgate64.selector = SEL_CGATE64; seg_cgate64.type = 12; seg_cgate64.base = SEL_CS64; int kvmfd = open("/dev/kvm", O_RDWR); char buf[sizeof(struct kvm_cpuid2) + 128 * sizeof(struct kvm_cpuid_entry2)]; memset(buf, 0, sizeof(buf)); struct kvm_cpuid2* cpuid = (struct kvm_cpuid2*)buf; cpuid->nent = 128; ioctl(kvmfd, KVM_GET_SUPPORTED_CPUID, cpuid); ioctl(cpufd, KVM_SET_CPUID2, cpuid); close(kvmfd); const char* text_prefix = 0; int text_prefix_size = 0; char* host_text = host_mem + ADDR_TEXT; if (text_type == 8) { if (flags & KVM_SETUP_SMM) { if (flags & KVM_SETUP_PROTECTED) { sregs.cs = seg_cs16; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds16; sregs.cr0 |= CR0_PE; } else { sregs.cs.selector = 0; sregs.cs.base = 0; } *(host_mem + ADDR_TEXT) = 0xf4; host_text = host_mem + 0x8000; ioctl(cpufd, KVM_SMI, 0); } else if (flags & KVM_SETUP_VIRT86) { sregs.cs = seg_cs32; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32; sregs.cr0 |= CR0_PE; sregs.efer |= EFER_SCE; setup_syscall_msrs(cpufd, SEL_CS32, SEL_CS32_CPL3); setup_32bit_idt(&sregs, host_mem, guest_mem); if (flags & KVM_SETUP_PAGING) { uint64_t pd_addr = guest_mem + ADDR_PD; uint64_t* pd = (uint64_t*)(host_mem + ADDR_PD); pd[0] = PDE32_PRESENT | PDE32_RW | PDE32_USER | PDE32_PS; sregs.cr3 = pd_addr; sregs.cr4 |= CR4_PSE; text_prefix = kvm_asm32_paged_vm86; text_prefix_size = sizeof(kvm_asm32_paged_vm86) - 1; } else { text_prefix = kvm_asm32_vm86; text_prefix_size = sizeof(kvm_asm32_vm86) - 1; } } else { sregs.cs.selector = 0; sregs.cs.base = 0; } } else if (text_type == 16) { if (flags & KVM_SETUP_CPL3) { sregs.cs = seg_cs16; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds16; text_prefix = kvm_asm16_cpl3; text_prefix_size = sizeof(kvm_asm16_cpl3) - 1; } else { sregs.cr0 |= CR0_PE; sregs.cs = seg_cs16; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds16; } } else if (text_type == 32) { sregs.cr0 |= CR0_PE; sregs.efer |= EFER_SCE; setup_syscall_msrs(cpufd, SEL_CS32, SEL_CS32_CPL3); setup_32bit_idt(&sregs, host_mem, guest_mem); if (flags & KVM_SETUP_SMM) { sregs.cs = seg_cs32; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32; *(host_mem + ADDR_TEXT) = 0xf4; host_text = host_mem + 0x8000; ioctl(cpufd, KVM_SMI, 0); } else if (flags & KVM_SETUP_PAGING) { sregs.cs = seg_cs32; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32; uint64_t pd_addr = guest_mem + ADDR_PD; uint64_t* pd = (uint64_t*)(host_mem + ADDR_PD); pd[0] = PDE32_PRESENT | PDE32_RW | PDE32_USER | PDE32_PS; sregs.cr3 = pd_addr; sregs.cr4 |= CR4_PSE; text_prefix = kvm_asm32_paged; text_prefix_size = sizeof(kvm_asm32_paged) - 1; } else if (flags & KVM_SETUP_CPL3) { sregs.cs = seg_cs32_cpl3; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32_cpl3; } else { sregs.cs = seg_cs32; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32; } } else { sregs.efer |= EFER_LME | EFER_SCE; sregs.cr0 |= CR0_PE; setup_syscall_msrs(cpufd, SEL_CS64, SEL_CS64_CPL3); setup_64bit_idt(&sregs, host_mem, guest_mem); sregs.cs = seg_cs32; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32; uint64_t pml4_addr = guest_mem + ADDR_PML4; uint64_t* pml4 = (uint64_t*)(host_mem + ADDR_PML4); uint64_t pdpt_addr = guest_mem + ADDR_PDP; uint64_t* pdpt = (uint64_t*)(host_mem + ADDR_PDP); uint64_t pd_addr = guest_mem + ADDR_PD; uint64_t* pd = (uint64_t*)(host_mem + ADDR_PD); pml4[0] = PDE64_PRESENT | PDE64_RW | PDE64_USER | pdpt_addr; pdpt[0] = PDE64_PRESENT | PDE64_RW | PDE64_USER | pd_addr; pd[0] = PDE64_PRESENT | PDE64_RW | PDE64_USER | PDE64_PS; sregs.cr3 = pml4_addr; sregs.cr4 |= CR4_PAE; if (flags & KVM_SETUP_VM) { sregs.cr0 |= CR0_NE; *((uint64_t*)(host_mem + ADDR_VAR_VMXON_PTR)) = ADDR_VAR_VMXON; *((uint64_t*)(host_mem + ADDR_VAR_VMCS_PTR)) = ADDR_VAR_VMCS; memcpy(host_mem + ADDR_VAR_VMEXIT_CODE, kvm_asm64_vm_exit, sizeof(kvm_asm64_vm_exit) - 1); *((uint64_t*)(host_mem + ADDR_VAR_VMEXIT_PTR)) = ADDR_VAR_VMEXIT_CODE; text_prefix = kvm_asm64_init_vm; text_prefix_size = sizeof(kvm_asm64_init_vm) - 1; } else if (flags & KVM_SETUP_CPL3) { text_prefix = kvm_asm64_cpl3; text_prefix_size = sizeof(kvm_asm64_cpl3) - 1; } else { text_prefix = kvm_asm64_enable_long; text_prefix_size = sizeof(kvm_asm64_enable_long) - 1; } } struct tss16 tss16; memset(&tss16, 0, sizeof(tss16)); tss16.ss0 = tss16.ss1 = tss16.ss2 = SEL_DS16; tss16.sp0 = tss16.sp1 = tss16.sp2 = ADDR_STACK0; tss16.ip = ADDR_VAR_USER_CODE2; tss16.flags = (1 << 1); tss16.cs = SEL_CS16; tss16.es = tss16.ds = tss16.ss = SEL_DS16; tss16.ldt = SEL_LDT; struct tss16* tss16_addr = (struct tss16*)(host_mem + seg_tss16_2.base); memcpy(tss16_addr, &tss16, sizeof(tss16)); memset(&tss16, 0, sizeof(tss16)); tss16.ss0 = tss16.ss1 = tss16.ss2 = SEL_DS16; tss16.sp0 = tss16.sp1 = tss16.sp2 = ADDR_STACK0; tss16.ip = ADDR_VAR_USER_CODE2; tss16.flags = (1 << 1); tss16.cs = SEL_CS16_CPL3; tss16.es = tss16.ds = tss16.ss = SEL_DS16_CPL3; tss16.ldt = SEL_LDT; struct tss16* tss16_cpl3_addr = (struct tss16*)(host_mem + seg_tss16_cpl3.base); memcpy(tss16_cpl3_addr, &tss16, sizeof(tss16)); struct tss32 tss32; memset(&tss32, 0, sizeof(tss32)); tss32.ss0 = tss32.ss1 = tss32.ss2 = SEL_DS32; tss32.sp0 = tss32.sp1 = tss32.sp2 = ADDR_STACK0; tss32.ip = ADDR_VAR_USER_CODE; tss32.flags = (1 << 1) | (1 << 17); tss32.ldt = SEL_LDT; tss32.cr3 = sregs.cr3; tss32.io_bitmap = offsetof(struct tss32, io_bitmap); struct tss32* tss32_addr = (struct tss32*)(host_mem + seg_tss32_vm86.base); memcpy(tss32_addr, &tss32, sizeof(tss32)); memset(&tss32, 0, sizeof(tss32)); tss32.ss0 = tss32.ss1 = tss32.ss2 = SEL_DS32; tss32.sp0 = tss32.sp1 = tss32.sp2 = ADDR_STACK0; tss32.ip = ADDR_VAR_USER_CODE; tss32.flags = (1 << 1); tss32.cr3 = sregs.cr3; tss32.es = tss32.ds = tss32.ss = tss32.gs = tss32.fs = SEL_DS32; tss32.cs = SEL_CS32; tss32.ldt = SEL_LDT; tss32.cr3 = sregs.cr3; tss32.io_bitmap = offsetof(struct tss32, io_bitmap); struct tss32* tss32_cpl3_addr = (struct tss32*)(host_mem + seg_tss32_2.base); memcpy(tss32_cpl3_addr, &tss32, sizeof(tss32)); struct tss64 tss64; memset(&tss64, 0, sizeof(tss64)); tss64.rsp[0] = ADDR_STACK0; tss64.rsp[1] = ADDR_STACK0; tss64.rsp[2] = ADDR_STACK0; tss64.io_bitmap = offsetof(struct tss64, io_bitmap); struct tss64* tss64_addr = (struct tss64*)(host_mem + seg_tss64.base); memcpy(tss64_addr, &tss64, sizeof(tss64)); memset(&tss64, 0, sizeof(tss64)); tss64.rsp[0] = ADDR_STACK0; tss64.rsp[1] = ADDR_STACK0; tss64.rsp[2] = ADDR_STACK0; tss64.io_bitmap = offsetof(struct tss64, io_bitmap); struct tss64* tss64_cpl3_addr = (struct tss64*)(host_mem + seg_tss64_cpl3.base); memcpy(tss64_cpl3_addr, &tss64, sizeof(tss64)); if (text_size > 1000) text_size = 1000; if (text_prefix) { memcpy(host_text, text_prefix, text_prefix_size); void* patch = memmem(host_text, text_prefix_size, "\xde\xc0\xad\x0b", 4); if (patch) *((uint32_t*)patch) = guest_mem + ADDR_TEXT + ((char*)patch - host_text) + 6; uint16_t magic = PREFIX_SIZE; patch = memmem(host_text, text_prefix_size, &magic, sizeof(magic)); if (patch) *((uint16_t*)patch) = guest_mem + ADDR_TEXT + text_prefix_size; } memcpy((void*)(host_text + text_prefix_size), text, text_size); *(host_text + text_prefix_size + text_size) = 0xf4; memcpy(host_mem + ADDR_VAR_USER_CODE, text, text_size); *(host_mem + ADDR_VAR_USER_CODE + text_size) = 0xf4; *(host_mem + ADDR_VAR_HLT) = 0xf4; memcpy(host_mem + ADDR_VAR_SYSRET, "\x0f\x07\xf4", 3); memcpy(host_mem + ADDR_VAR_SYSEXIT, "\x0f\x35\xf4", 3); *(uint64_t*)(host_mem + ADDR_VAR_VMWRITE_FLD) = 0; *(uint64_t*)(host_mem + ADDR_VAR_VMWRITE_VAL) = 0; if (opt_count > 2) opt_count = 2; for (uintptr_t i = 0; i < opt_count; i++) { uint64_t typ = opt_array_ptr[i].typ; uint64_t val = opt_array_ptr[i].val; switch (typ % 9) { case 0: sregs.cr0 ^= val & (CR0_MP | CR0_EM | CR0_ET | CR0_NE | CR0_WP | CR0_AM | CR0_NW | CR0_CD); break; case 1: sregs.cr4 ^= val & (CR4_VME | CR4_PVI | CR4_TSD | CR4_DE | CR4_MCE | CR4_PGE | CR4_PCE | CR4_OSFXSR | CR4_OSXMMEXCPT | CR4_UMIP | CR4_VMXE | CR4_SMXE | CR4_FSGSBASE | CR4_PCIDE | CR4_OSXSAVE | CR4_SMEP | CR4_SMAP | CR4_PKE); break; case 2: sregs.efer ^= val & (EFER_SCE | EFER_NXE | EFER_SVME | EFER_LMSLE | EFER_FFXSR | EFER_TCE); break; case 3: val &= ((1 << 8) | (1 << 9) | (1 << 10) | (1 << 12) | (1 << 13) | (1 << 14) | (1 << 15) | (1 << 18) | (1 << 19) | (1 << 20) | (1 << 21)); regs.rflags ^= val; tss16_addr->flags ^= val; tss16_cpl3_addr->flags ^= val; tss32_addr->flags ^= val; tss32_cpl3_addr->flags ^= val; break; case 4: seg_cs16.type = val & 0xf; seg_cs32.type = val & 0xf; seg_cs64.type = val & 0xf; break; case 5: seg_cs16_cpl3.type = val & 0xf; seg_cs32_cpl3.type = val & 0xf; seg_cs64_cpl3.type = val & 0xf; break; case 6: seg_ds16.type = val & 0xf; seg_ds32.type = val & 0xf; seg_ds64.type = val & 0xf; break; case 7: seg_ds16_cpl3.type = val & 0xf; seg_ds32_cpl3.type = val & 0xf; seg_ds64_cpl3.type = val & 0xf; break; case 8: *(uint64_t*)(host_mem + ADDR_VAR_VMWRITE_FLD) = (val & 0xffff); *(uint64_t*)(host_mem + ADDR_VAR_VMWRITE_VAL) = (val >> 16); break; default: exit(1); } } regs.rflags |= 2; fill_segment_descriptor(gdt, ldt, &seg_ldt); fill_segment_descriptor(gdt, ldt, &seg_cs16); fill_segment_descriptor(gdt, ldt, &seg_ds16); fill_segment_descriptor(gdt, ldt, &seg_cs16_cpl3); fill_segment_descriptor(gdt, ldt, &seg_ds16_cpl3); fill_segment_descriptor(gdt, ldt, &seg_cs32); fill_segment_descriptor(gdt, ldt, &seg_ds32); fill_segment_descriptor(gdt, ldt, &seg_cs32_cpl3); fill_segment_descriptor(gdt, ldt, &seg_ds32_cpl3); fill_segment_descriptor(gdt, ldt, &seg_cs64); fill_segment_descriptor(gdt, ldt, &seg_ds64); fill_segment_descriptor(gdt, ldt, &seg_cs64_cpl3); fill_segment_descriptor(gdt, ldt, &seg_ds64_cpl3); fill_segment_descriptor(gdt, ldt, &seg_tss32); fill_segment_descriptor(gdt, ldt, &seg_tss32_2); fill_segment_descriptor(gdt, ldt, &seg_tss32_cpl3); fill_segment_descriptor(gdt, ldt, &seg_tss32_vm86); fill_segment_descriptor(gdt, ldt, &seg_tss16); fill_segment_descriptor(gdt, ldt, &seg_tss16_2); fill_segment_descriptor(gdt, ldt, &seg_tss16_cpl3); fill_segment_descriptor_dword(gdt, ldt, &seg_tss64); fill_segment_descriptor_dword(gdt, ldt, &seg_tss64_cpl3); fill_segment_descriptor(gdt, ldt, &seg_cgate16); fill_segment_descriptor(gdt, ldt, &seg_tgate16); fill_segment_descriptor(gdt, ldt, &seg_cgate32); fill_segment_descriptor(gdt, ldt, &seg_tgate32); fill_segment_descriptor_dword(gdt, ldt, &seg_cgate64); if (ioctl(cpufd, KVM_SET_SREGS, &sregs)) return -1; if (ioctl(cpufd, KVM_SET_REGS, ®s)) return -1; return 0; } #define XT_TABLE_SIZE 1536 #define XT_MAX_ENTRIES 10 struct xt_counters { uint64_t pcnt, bcnt; }; struct ipt_getinfo { char name[32]; unsigned int valid_hooks; unsigned int hook_entry[5]; unsigned int underflow[5]; unsigned int num_entries; unsigned int size; }; struct ipt_get_entries { char name[32]; unsigned int size; uint64_t entrytable[XT_TABLE_SIZE / sizeof(uint64_t)]; }; struct ipt_replace { char name[32]; unsigned int valid_hooks; unsigned int num_entries; unsigned int size; unsigned int hook_entry[5]; unsigned int underflow[5]; unsigned int num_counters; struct xt_counters* counters; uint64_t entrytable[XT_TABLE_SIZE / sizeof(uint64_t)]; }; struct ipt_table_desc { const char* name; struct ipt_getinfo info; struct ipt_replace replace; }; static struct ipt_table_desc ipv4_tables[] = { {.name = "filter"}, {.name = "nat"}, {.name = "mangle"}, {.name = "raw"}, {.name = "security"}, }; static struct ipt_table_desc ipv6_tables[] = { {.name = "filter"}, {.name = "nat"}, {.name = "mangle"}, {.name = "raw"}, {.name = "security"}, }; #define IPT_BASE_CTL 64 #define IPT_SO_SET_REPLACE (IPT_BASE_CTL) #define IPT_SO_GET_INFO (IPT_BASE_CTL) #define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1) struct arpt_getinfo { char name[32]; unsigned int valid_hooks; unsigned int hook_entry[3]; unsigned int underflow[3]; unsigned int num_entries; unsigned int size; }; struct arpt_get_entries { char name[32]; unsigned int size; uint64_t entrytable[XT_TABLE_SIZE / sizeof(uint64_t)]; }; struct arpt_replace { char name[32]; unsigned int valid_hooks; unsigned int num_entries; unsigned int size; unsigned int hook_entry[3]; unsigned int underflow[3]; unsigned int num_counters; struct xt_counters* counters; uint64_t entrytable[XT_TABLE_SIZE / sizeof(uint64_t)]; }; struct arpt_table_desc { const char* name; struct arpt_getinfo info; struct arpt_replace replace; }; static struct arpt_table_desc arpt_tables[] = { {.name = "filter"}, }; #define ARPT_BASE_CTL 96 #define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL) #define ARPT_SO_GET_INFO (ARPT_BASE_CTL) #define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1) static void checkpoint_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level) { int fd = socket(family, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) { switch (errno) { case EAFNOSUPPORT: case ENOPROTOOPT: case ENOENT: return; } exit(1); } for (int i = 0; i < num_tables; i++) { struct ipt_table_desc* table = &tables[i]; strcpy(table->info.name, table->name); strcpy(table->replace.name, table->name); socklen_t optlen = sizeof(table->info); if (getsockopt(fd, level, IPT_SO_GET_INFO, &table->info, &optlen)) { switch (errno) { case EPERM: case ENOENT: case ENOPROTOOPT: continue; } exit(1); } if (table->info.size > sizeof(table->replace.entrytable)) exit(1); if (table->info.num_entries > XT_MAX_ENTRIES) exit(1); struct ipt_get_entries entries; memset(&entries, 0, sizeof(entries)); strcpy(entries.name, table->name); entries.size = table->info.size; optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size; if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen)) exit(1); table->replace.valid_hooks = table->info.valid_hooks; table->replace.num_entries = table->info.num_entries; table->replace.size = table->info.size; memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry)); memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow)); memcpy(table->replace.entrytable, entries.entrytable, table->info.size); } close(fd); } static void reset_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level) { int fd = socket(family, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) { switch (errno) { case EAFNOSUPPORT: case ENOPROTOOPT: case ENOENT: return; } exit(1); } for (int i = 0; i < num_tables; i++) { struct ipt_table_desc* table = &tables[i]; if (table->info.valid_hooks == 0) continue; struct ipt_getinfo info; memset(&info, 0, sizeof(info)); strcpy(info.name, table->name); socklen_t optlen = sizeof(info); if (getsockopt(fd, level, IPT_SO_GET_INFO, &info, &optlen)) exit(1); if (memcmp(&table->info, &info, sizeof(table->info)) == 0) { struct ipt_get_entries entries; memset(&entries, 0, sizeof(entries)); strcpy(entries.name, table->name); entries.size = table->info.size; optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size; if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen)) exit(1); if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0) continue; } struct xt_counters counters[XT_MAX_ENTRIES]; table->replace.num_counters = info.num_entries; table->replace.counters = counters; optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size; if (setsockopt(fd, level, IPT_SO_SET_REPLACE, &table->replace, optlen)) exit(1); } close(fd); } static void checkpoint_arptables(void) { int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) { switch (errno) { case EAFNOSUPPORT: case ENOPROTOOPT: case ENOENT: return; } exit(1); } for (unsigned i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) { struct arpt_table_desc* table = &arpt_tables[i]; strcpy(table->info.name, table->name); strcpy(table->replace.name, table->name); socklen_t optlen = sizeof(table->info); if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &table->info, &optlen)) { switch (errno) { case EPERM: case ENOENT: case ENOPROTOOPT: continue; } exit(1); } if (table->info.size > sizeof(table->replace.entrytable)) exit(1); if (table->info.num_entries > XT_MAX_ENTRIES) exit(1); struct arpt_get_entries entries; memset(&entries, 0, sizeof(entries)); strcpy(entries.name, table->name); entries.size = table->info.size; optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size; if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen)) exit(1); table->replace.valid_hooks = table->info.valid_hooks; table->replace.num_entries = table->info.num_entries; table->replace.size = table->info.size; memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry)); memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow)); memcpy(table->replace.entrytable, entries.entrytable, table->info.size); } close(fd); } static void reset_arptables() { int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) { switch (errno) { case EAFNOSUPPORT: case ENOPROTOOPT: case ENOENT: return; } exit(1); } for (unsigned i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) { struct arpt_table_desc* table = &arpt_tables[i]; if (table->info.valid_hooks == 0) continue; struct arpt_getinfo info; memset(&info, 0, sizeof(info)); strcpy(info.name, table->name); socklen_t optlen = sizeof(info); if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &info, &optlen)) exit(1); if (memcmp(&table->info, &info, sizeof(table->info)) == 0) { struct arpt_get_entries entries; memset(&entries, 0, sizeof(entries)); strcpy(entries.name, table->name); entries.size = table->info.size; optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size; if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen)) exit(1); if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0) continue; } else { } struct xt_counters counters[XT_MAX_ENTRIES]; table->replace.num_counters = info.num_entries; table->replace.counters = counters; optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size; if (setsockopt(fd, SOL_IP, ARPT_SO_SET_REPLACE, &table->replace, optlen)) exit(1); } close(fd); } #define NF_BR_NUMHOOKS 6 #define EBT_TABLE_MAXNAMELEN 32 #define EBT_CHAIN_MAXNAMELEN 32 #define EBT_BASE_CTL 128 #define EBT_SO_SET_ENTRIES (EBT_BASE_CTL) #define EBT_SO_GET_INFO (EBT_BASE_CTL) #define EBT_SO_GET_ENTRIES (EBT_SO_GET_INFO + 1) #define EBT_SO_GET_INIT_INFO (EBT_SO_GET_ENTRIES + 1) #define EBT_SO_GET_INIT_ENTRIES (EBT_SO_GET_INIT_INFO + 1) struct ebt_replace { char name[EBT_TABLE_MAXNAMELEN]; unsigned int valid_hooks; unsigned int nentries; unsigned int entries_size; struct ebt_entries* hook_entry[NF_BR_NUMHOOKS]; unsigned int num_counters; struct ebt_counter* counters; char* entries; }; struct ebt_entries { unsigned int distinguisher; char name[EBT_CHAIN_MAXNAMELEN]; unsigned int counter_offset; int policy; unsigned int nentries; char data[0] __attribute__((aligned(__alignof__(struct ebt_replace)))); }; struct ebt_table_desc { const char* name; struct ebt_replace replace; char entrytable[XT_TABLE_SIZE]; }; static struct ebt_table_desc ebt_tables[] = { {.name = "filter"}, {.name = "nat"}, {.name = "broute"}, }; static void checkpoint_ebtables(void) { int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) { switch (errno) { case EAFNOSUPPORT: case ENOPROTOOPT: case ENOENT: return; } exit(1); } for (size_t i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) { struct ebt_table_desc* table = &ebt_tables[i]; strcpy(table->replace.name, table->name); socklen_t optlen = sizeof(table->replace); if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_INFO, &table->replace, &optlen)) { switch (errno) { case EPERM: case ENOENT: case ENOPROTOOPT: continue; } exit(1); } if (table->replace.entries_size > sizeof(table->entrytable)) exit(1); table->replace.num_counters = 0; table->replace.entries = table->entrytable; optlen = sizeof(table->replace) + table->replace.entries_size; if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_ENTRIES, &table->replace, &optlen)) exit(1); } close(fd); } static void reset_ebtables() { int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) { switch (errno) { case EAFNOSUPPORT: case ENOPROTOOPT: case ENOENT: return; } exit(1); } for (unsigned i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) { struct ebt_table_desc* table = &ebt_tables[i]; if (table->replace.valid_hooks == 0) continue; struct ebt_replace replace; memset(&replace, 0, sizeof(replace)); strcpy(replace.name, table->name); socklen_t optlen = sizeof(replace); if (getsockopt(fd, SOL_IP, EBT_SO_GET_INFO, &replace, &optlen)) exit(1); replace.num_counters = 0; table->replace.entries = 0; for (unsigned h = 0; h < NF_BR_NUMHOOKS; h++) table->replace.hook_entry[h] = 0; if (memcmp(&table->replace, &replace, sizeof(table->replace)) == 0) { char entrytable[XT_TABLE_SIZE]; memset(&entrytable, 0, sizeof(entrytable)); replace.entries = entrytable; optlen = sizeof(replace) + replace.entries_size; if (getsockopt(fd, SOL_IP, EBT_SO_GET_ENTRIES, &replace, &optlen)) exit(1); if (memcmp(table->entrytable, entrytable, replace.entries_size) == 0) continue; } for (unsigned j = 0, h = 0; h < NF_BR_NUMHOOKS; h++) { if (table->replace.valid_hooks & (1 << h)) { table->replace.hook_entry[h] = (struct ebt_entries*)table->entrytable + j; j++; } } table->replace.entries = table->entrytable; optlen = sizeof(table->replace) + table->replace.entries_size; if (setsockopt(fd, SOL_IP, EBT_SO_SET_ENTRIES, &table->replace, optlen)) exit(1); } close(fd); } static void checkpoint_net_namespace(void) { checkpoint_ebtables(); checkpoint_arptables(); checkpoint_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP); checkpoint_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6); } static void reset_net_namespace(void) { reset_ebtables(); reset_arptables(); reset_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP); reset_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6); } static void mount_cgroups(const char* dir, const char** controllers, int count) { if (mkdir(dir, 0777)) { return; } char enabled[128] = {0}; int i = 0; for (; i < count; i++) { if (mount("none", dir, "cgroup", 0, controllers[i])) { continue; } umount(dir); strcat(enabled, ","); strcat(enabled, controllers[i]); } if (enabled[0] == 0) { if (rmdir(dir) && errno != EBUSY) exit(1); return; } if (mount("none", dir, "cgroup", 0, enabled + 1)) { if (rmdir(dir) && errno != EBUSY) exit(1); } if (chmod(dir, 0777)) { } } static void mount_cgroups2(const char** controllers, int count) { if (mkdir("/syzcgroup/unified", 0777)) { return; } if (mount("none", "/syzcgroup/unified", "cgroup2", 0, NULL)) { if (rmdir("/syzcgroup/unified") && errno != EBUSY) exit(1); return; } if (chmod("/syzcgroup/unified", 0777)) { } int control = open("/syzcgroup/unified/cgroup.subtree_control", O_WRONLY); if (control == -1) return; int i; for (i = 0; i < count; i++) if (write(control, controllers[i], strlen(controllers[i])) < 0) { } close(control); } static void setup_cgroups() { const char* unified_controllers[] = {"+cpu", "+io", "+pids"}; const char* net_controllers[] = {"net", "net_prio", "devices", "blkio", "freezer"}; const char* cpu_controllers[] = {"cpuset", "cpuacct", "hugetlb", "rlimit", "memory"}; if (mkdir("/syzcgroup", 0777)) { return; } mount_cgroups2(unified_controllers, sizeof(unified_controllers) / sizeof(unified_controllers[0])); mount_cgroups("/syzcgroup/net", net_controllers, sizeof(net_controllers) / sizeof(net_controllers[0])); mount_cgroups("/syzcgroup/cpu", cpu_controllers, sizeof(cpu_controllers) / sizeof(cpu_controllers[0])); write_file("/syzcgroup/cpu/cgroup.clone_children", "1"); write_file("/syzcgroup/cpu/cpuset.memory_pressure_enabled", "1"); } static void setup_cgroups_loop() { int pid = getpid(); char file[128]; char cgroupdir[64]; snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid); if (mkdir(cgroupdir, 0777)) { } snprintf(file, sizeof(file), "%s/pids.max", cgroupdir); write_file(file, "32"); snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir); write_file(file, "%d", pid); snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid); if (mkdir(cgroupdir, 0777)) { } snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir); write_file(file, "%d", pid); snprintf(file, sizeof(file), "%s/memory.soft_limit_in_bytes", cgroupdir); write_file(file, "%d", 299 << 20); snprintf(file, sizeof(file), "%s/memory.limit_in_bytes", cgroupdir); write_file(file, "%d", 300 << 20); snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid); if (mkdir(cgroupdir, 0777)) { } snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir); write_file(file, "%d", pid); } static void setup_cgroups_test() { char cgroupdir[64]; snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid); if (symlink(cgroupdir, "./cgroup")) { } snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid); if (symlink(cgroupdir, "./cgroup.cpu")) { } snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid); if (symlink(cgroupdir, "./cgroup.net")) { } } static void setup_common() { if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) { } } static void setup_binderfs() { if (mkdir("/dev/binderfs", 0777)) { } if (mount("binder", "/dev/binderfs", "binder", 0, NULL)) { } } static void loop(); static void sandbox_common() { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); setsid(); struct rlimit rlim; rlim.rlim_cur = rlim.rlim_max = (200 << 20); setrlimit(RLIMIT_AS, &rlim); rlim.rlim_cur = rlim.rlim_max = 32 << 20; setrlimit(RLIMIT_MEMLOCK, &rlim); rlim.rlim_cur = rlim.rlim_max = 136 << 20; setrlimit(RLIMIT_FSIZE, &rlim); rlim.rlim_cur = rlim.rlim_max = 1 << 20; setrlimit(RLIMIT_STACK, &rlim); rlim.rlim_cur = rlim.rlim_max = 128 << 20; setrlimit(RLIMIT_CORE, &rlim); rlim.rlim_cur = rlim.rlim_max = 256; setrlimit(RLIMIT_NOFILE, &rlim); if (unshare(CLONE_NEWNS)) { } if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) { } if (unshare(CLONE_NEWIPC)) { } if (unshare(0x02000000)) { } if (unshare(CLONE_NEWUTS)) { } if (unshare(CLONE_SYSVSEM)) { } typedef struct { const char* name; const char* value; } sysctl_t; static const sysctl_t sysctls[] = { {"/proc/sys/kernel/shmmax", "16777216"}, {"/proc/sys/kernel/shmall", "536870912"}, {"/proc/sys/kernel/shmmni", "1024"}, {"/proc/sys/kernel/msgmax", "8192"}, {"/proc/sys/kernel/msgmni", "1024"}, {"/proc/sys/kernel/msgmnb", "1024"}, {"/proc/sys/kernel/sem", "1024 1048576 500 1024"}, }; unsigned i; for (i = 0; i < sizeof(sysctls) / sizeof(sysctls[0]); i++) write_file(sysctls[i].name, sysctls[i].value); } static int wait_for_loop(int pid) { if (pid < 0) exit(1); int status = 0; while (waitpid(-1, &status, __WALL) != pid) { } return WEXITSTATUS(status); } static void drop_caps(void) { struct __user_cap_header_struct cap_hdr = {}; struct __user_cap_data_struct cap_data[2] = {}; cap_hdr.version = _LINUX_CAPABILITY_VERSION_3; cap_hdr.pid = getpid(); if (syscall(SYS_capget, &cap_hdr, &cap_data)) exit(1); const int drop = (1 << CAP_SYS_PTRACE) | (1 << CAP_SYS_NICE); cap_data[0].effective &= ~drop; cap_data[0].permitted &= ~drop; cap_data[0].inheritable &= ~drop; if (syscall(SYS_capset, &cap_hdr, &cap_data)) exit(1); } static int do_sandbox_none(void) { if (unshare(CLONE_NEWPID)) { } int pid = fork(); if (pid != 0) return wait_for_loop(pid); setup_common(); sandbox_common(); drop_caps(); if (unshare(CLONE_NEWNET)) { } write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535"); initialize_tun(); initialize_wifi_devices(); setup_binderfs(); loop(); exit(1); } #define FS_IOC_SETFLAGS _IOW('f', 2, long) static void remove_dir(const char* dir) { int iter = 0; DIR* dp = 0; retry: const int umount_flags = MNT_FORCE | UMOUNT_NOFOLLOW; while (umount2(dir, umount_flags) == 0) { } dp = opendir(dir); if (dp == NULL) { if (errno == EMFILE) { exit(1); } exit(1); } struct dirent* ep = 0; while ((ep = readdir(dp))) { if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0) continue; char filename[FILENAME_MAX]; snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name); while (umount2(filename, umount_flags) == 0) { } struct stat st; if (lstat(filename, &st)) exit(1); if (S_ISDIR(st.st_mode)) { remove_dir(filename); continue; } int i; for (i = 0;; i++) { if (unlink(filename) == 0) break; if (errno == EPERM) { int fd = open(filename, O_RDONLY); if (fd != -1) { long flags = 0; if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) { } close(fd); continue; } } if (errno == EROFS) { break; } if (errno != EBUSY || i > 100) exit(1); if (umount2(filename, umount_flags)) exit(1); } } closedir(dp); for (int i = 0;; i++) { if (rmdir(dir) == 0) break; if (i < 100) { if (errno == EPERM) { int fd = open(dir, O_RDONLY); if (fd != -1) { long flags = 0; if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) { } close(fd); continue; } } if (errno == EROFS) { break; } if (errno == EBUSY) { if (umount2(dir, umount_flags)) exit(1); continue; } if (errno == ENOTEMPTY) { if (iter < 100) { iter++; goto retry; } } } exit(1); } } static void kill_and_wait(int pid, int* status) { kill(-pid, SIGKILL); kill(pid, SIGKILL); for (int i = 0; i < 100; i++) { if (waitpid(-1, status, WNOHANG | __WALL) == pid) return; usleep(1000); } DIR* dir = opendir("/sys/fs/fuse/connections"); if (dir) { for (;;) { struct dirent* ent = readdir(dir); if (!ent) break; if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue; char abort[300]; snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name); int fd = open(abort, O_WRONLY); if (fd == -1) { continue; } if (write(fd, abort, 1) < 0) { } close(fd); } closedir(dir); } else { } while (waitpid(-1, status, __WALL) != pid) { } } static void setup_loop() { setup_cgroups_loop(); checkpoint_net_namespace(); } static void reset_loop() { reset_net_namespace(); } static void setup_test() { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); setpgrp(); setup_cgroups_test(); write_file("/proc/self/oom_score_adj", "1000"); flush_tun(); if (symlink("/dev/binderfs", "./binderfs")) { } } static void close_fds() { for (int fd = 3; fd < MAX_FDS; fd++) close(fd); } static const char* setup_binfmt_misc() { if (mount(0, "/proc/sys/fs/binfmt_misc", "binfmt_misc", 0, 0)) { return NULL; } if (!write_file("/proc/sys/fs/binfmt_misc/register", ":syz0:M:0:\x01::./file0:") || !write_file("/proc/sys/fs/binfmt_misc/register", ":syz1:M:1:\x02::./file0:POC")) return "write(/proc/sys/fs/binfmt_misc/register) failed"; return NULL; } static const char* setup_usb() { if (chmod("/dev/raw-gadget", 0666)) return "failed to chmod /dev/raw-gadget"; return NULL; } #define NL802154_CMD_SET_SHORT_ADDR 11 #define NL802154_ATTR_IFINDEX 3 #define NL802154_ATTR_SHORT_ADDR 10 static const char* setup_802154() { const char* error = NULL; int sock_generic = -1; int sock_route = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sock_route == -1) { error = "socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE) failed"; goto fail; } sock_generic = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); if (sock_generic == -1) { error = "socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC) failed"; goto fail; } { int nl802154_family_id = netlink_query_family_id(&nlmsg, sock_generic, "nl802154", true); if (nl802154_family_id < 0) { error = "netlink_query_family_id failed"; goto fail; } for (int i = 0; i < 2; i++) { char devname[] = "wpan0"; devname[strlen(devname) - 1] += i; uint64_t hwaddr = 0xaaaaaaaaaaaa0002 + (i << 8); uint16_t shortaddr = 0xaaa0 + i; int ifindex = if_nametoindex(devname); struct genlmsghdr genlhdr; memset(&genlhdr, 0, sizeof(genlhdr)); genlhdr.cmd = NL802154_CMD_SET_SHORT_ADDR; netlink_init(&nlmsg, nl802154_family_id, 0, &genlhdr, sizeof(genlhdr)); netlink_attr(&nlmsg, NL802154_ATTR_IFINDEX, &ifindex, sizeof(ifindex)); netlink_attr(&nlmsg, NL802154_ATTR_SHORT_ADDR, &shortaddr, sizeof(shortaddr)); if (netlink_send(&nlmsg, sock_generic) < 0) { error = "NL802154_CMD_SET_SHORT_ADDR failed"; goto fail; } netlink_device_change(&nlmsg, sock_route, devname, true, 0, &hwaddr, sizeof(hwaddr), 0); if (i == 0) { netlink_add_device_impl(&nlmsg, "lowpan", "lowpan0", false); netlink_done(&nlmsg); netlink_attr(&nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); if (netlink_send(&nlmsg, sock_route) < 0) { error = "netlink: adding device lowpan0 type lowpan link wpan0"; goto fail; } } } } fail: close(sock_route); close(sock_generic); return error; } #define SWAP_FILE "./swap-file" #define SWAP_FILE_SIZE (128 * 1000 * 1000) static const char* setup_swap() { swapoff(SWAP_FILE); unlink(SWAP_FILE); int fd = open(SWAP_FILE, O_CREAT | O_WRONLY | O_CLOEXEC, 0600); if (fd == -1) return "swap file open failed"; fallocate(fd, FALLOC_FL_ZERO_RANGE, 0, SWAP_FILE_SIZE); close(fd); char cmdline[64]; sprintf(cmdline, "mkswap %s", SWAP_FILE); if (runcmdline(cmdline)) return "mkswap failed"; if (swapon(SWAP_FILE, SWAP_FLAG_PREFER) == 1) return "swapon failed"; return NULL; } static void execute_one(void); #define WAIT_FLAGS __WALL static void loop(void) { setup_loop(); int iter = 0; for (;; iter++) { char cwdbuf[32]; sprintf(cwdbuf, "./%d", iter); if (mkdir(cwdbuf, 0777)) exit(1); reset_loop(); int pid = fork(); if (pid < 0) exit(1); if (pid == 0) { if (chdir(cwdbuf)) exit(1); setup_test(); execute_one(); close_fds(); exit(0); } int status = 0; uint64_t start = current_time_ms(); for (;;) { sleep_ms(10); if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid) break; if (current_time_ms() - start < 5000) continue; kill_and_wait(pid, &status); break; } remove_dir(cwdbuf); } } uint64_t r[3] = {0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff}; void execute_one(void) { intptr_t res = 0; if (write(1, "executing program\n", sizeof("executing program\n") - 1)) { } NONFAILING(memcpy((void*)0x20000040, "/dev/kvm\000", 9)); res = syscall(__NR_openat, /*fd=*/0xffffffffffffff9cul, /*file=*/0x20000040ul, /*flags=*/0ul, /*mode=*/0ul); if (res != -1) r[0] = res; res = syscall(__NR_ioctl, /*fd=*/r[0], /*cmd=*/0xae01, /*type=*/0ul); if (res != -1) r[1] = res; syscall(__NR_ioctl, /*fd=*/r[1], /*cmd=*/0x4020ae46, /*arg=*/0ul); res = syscall(__NR_ioctl, /*fd=*/r[1], /*cmd=*/0xae41, /*id=*/0ul); if (res != -1) r[2] = res; NONFAILING(syz_kvm_setup_cpu( /*fd=*/-1, /*cpufd=*/r[2], /*usermem=*/0x20000000, /*text=*/0, /*ntext=*/0, /*flags=KVM_SETUP_VM|KVM_SETUP_SMM|KVM_SETUP_PROTECTED*/ 0x64, /*opts=*/0, /*nopt=*/0)); syscall(__NR_ioctl, /*fd=*/r[2], /*cmd=*/0x40186366, /*arg=*/0ul); } int main(void) { syscall(__NR_mmap, /*addr=*/0x1ffff000ul, /*len=*/0x1000ul, /*prot=*/0ul, /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1, /*offset=*/0ul); syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0x1000000ul, /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul, /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1, /*offset=*/0ul); syscall(__NR_mmap, /*addr=*/0x21000000ul, /*len=*/0x1000ul, /*prot=*/0ul, /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1, /*offset=*/0ul); setup_cgroups(); const char* reason; (void)reason; if ((reason = setup_binfmt_misc())) printf("the reproducer may not work as expected: binfmt_misc setup failed: " "%s\n", reason); if ((reason = setup_usb())) printf("the reproducer may not work as expected: USB injection setup " "failed: %s\n", reason); if ((reason = setup_802154())) printf("the reproducer may not work as expected: 802154 injection setup " "failed: %s\n", reason); if ((reason = setup_swap())) printf("the reproducer may not work as expected: swap setup failed: %s\n", reason); install_segv_handler(); for (procid = 0; procid < 5; procid++) { if (fork() == 0) { use_temporary_dir(); do_sandbox_none(); } } sleep(1000000); return 0; }