// https://syzkaller.appspot.com/bug?id=fa407a6613bdd94160cb12d618549a364290e406 // autogenerated by syzkaller (http://github.com/google/syzkaller) #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include __attribute__((noreturn)) static void doexit(int status) { volatile unsigned i; syscall(__NR_exit_group, status); for (i = 0;; i++) { } } #include #include #include #include #include #include #include const int kFailStatus = 67; const int kRetryStatus = 69; static void fail(const char* msg, ...) { int e = errno; va_list args; va_start(args, msg); vfprintf(stderr, msg, args); va_end(args); fprintf(stderr, " (errno %d)\n", e); doexit((e == ENOMEM || e == EAGAIN) ? kRetryStatus : kFailStatus); } static void exitf(const char* msg, ...) { int e = errno; va_list args; va_start(args, msg); vfprintf(stderr, msg, args); va_end(args); fprintf(stderr, " (errno %d)\n", e); doexit(kRetryStatus); } static uint64_t current_time_ms() { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts)) fail("clock_gettime failed"); return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; } static void use_temporary_dir() { char tmpdir_template[] = "./syzkaller.XXXXXX"; char* tmpdir = mkdtemp(tmpdir_template); if (!tmpdir) fail("failed to mkdtemp"); if (chmod(tmpdir, 0777)) fail("failed to chmod"); if (chdir(tmpdir)) fail("failed to chdir"); } static void loop(); static void sandbox_common() { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); setpgrp(); setsid(); struct rlimit rlim; rlim.rlim_cur = rlim.rlim_max = 128 << 20; setrlimit(RLIMIT_AS, &rlim); rlim.rlim_cur = rlim.rlim_max = 8 << 20; setrlimit(RLIMIT_MEMLOCK, &rlim); rlim.rlim_cur = rlim.rlim_max = 1 << 20; setrlimit(RLIMIT_FSIZE, &rlim); rlim.rlim_cur = rlim.rlim_max = 1 << 20; setrlimit(RLIMIT_STACK, &rlim); rlim.rlim_cur = rlim.rlim_max = 0; setrlimit(RLIMIT_CORE, &rlim); #define CLONE_NEWCGROUP 0x02000000 if (unshare(CLONE_NEWNS)) { } if (unshare(CLONE_NEWIPC)) { } if (unshare(CLONE_NEWCGROUP)) { } if (unshare(CLONE_NEWUTS)) { } if (unshare(CLONE_SYSVSEM)) { } } static bool write_file(const char* file, const char* what, ...) { char buf[1024]; va_list args; va_start(args, what); vsnprintf(buf, sizeof(buf), what, args); va_end(args); buf[sizeof(buf) - 1] = 0; int len = strlen(buf); int fd = open(file, O_WRONLY | O_CLOEXEC); if (fd == -1) return false; if (write(fd, buf, len) != len) { close(fd); return false; } close(fd); return true; } static int real_uid; static int real_gid; __attribute__((aligned(64 << 10))) static char sandbox_stack[1 << 20]; static int namespace_sandbox_proc(void* arg) { sandbox_common(); write_file("/proc/self/setgroups", "deny"); if (!write_file("/proc/self/uid_map", "0 %d 1\n", real_uid)) fail("write of /proc/self/uid_map failed"); if (!write_file("/proc/self/gid_map", "0 %d 1\n", real_gid)) fail("write of /proc/self/gid_map failed"); if (unshare(CLONE_NEWNET)) fail("unshare(CLONE_NEWNET)"); if (mkdir("./syz-tmp", 0777)) fail("mkdir(syz-tmp) failed"); if (mount("", "./syz-tmp", "tmpfs", 0, NULL)) fail("mount(tmpfs) failed"); if (mkdir("./syz-tmp/newroot", 0777)) fail("mkdir failed"); if (mkdir("./syz-tmp/newroot/dev", 0700)) fail("mkdir failed"); unsigned mount_flags = MS_BIND | MS_REC | MS_PRIVATE; if (mount("/dev", "./syz-tmp/newroot/dev", NULL, mount_flags, NULL)) fail("mount(dev) failed"); if (mkdir("./syz-tmp/newroot/proc", 0700)) fail("mkdir failed"); if (mount(NULL, "./syz-tmp/newroot/proc", "proc", 0, NULL)) fail("mount(proc) failed"); if (mkdir("./syz-tmp/newroot/selinux", 0700)) fail("mkdir failed"); const char* selinux_path = "./syz-tmp/newroot/selinux"; if (mount("/selinux", selinux_path, NULL, mount_flags, NULL) && mount("/sys/fs/selinux", selinux_path, NULL, mount_flags, NULL)) fail("mount(selinuxfs) failed"); if (mkdir("./syz-tmp/pivot", 0777)) fail("mkdir failed"); if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) { if (chdir("./syz-tmp")) fail("chdir failed"); } else { if (chdir("/")) fail("chdir failed"); if (umount2("./pivot", MNT_DETACH)) fail("umount failed"); } if (chroot("./newroot")) fail("chroot failed"); if (chdir("/")) fail("chdir failed"); struct __user_cap_header_struct cap_hdr = {}; struct __user_cap_data_struct cap_data[2] = {}; cap_hdr.version = _LINUX_CAPABILITY_VERSION_3; cap_hdr.pid = getpid(); if (syscall(SYS_capget, &cap_hdr, &cap_data)) fail("capget failed"); cap_data[0].effective &= ~(1 << CAP_SYS_PTRACE); cap_data[0].permitted &= ~(1 << CAP_SYS_PTRACE); cap_data[0].inheritable &= ~(1 << CAP_SYS_PTRACE); if (syscall(SYS_capset, &cap_hdr, &cap_data)) fail("capset failed"); loop(); doexit(1); } static int do_sandbox_namespace(int executor_pid, bool enable_tun) { int pid; real_uid = getuid(); real_gid = getgid(); mprotect(sandbox_stack, 4096, PROT_NONE); void* arg = (void*)(long)((executor_pid << 1) | enable_tun); pid = clone(namespace_sandbox_proc, &sandbox_stack[sizeof(sandbox_stack) - 64], CLONE_NEWUSER | CLONE_NEWPID, arg); if (pid < 0) fail("sandbox clone failed"); return pid; } struct ipt_getinfo { char name[32]; unsigned int valid_hooks; unsigned int hook_entry[5]; unsigned int underflow[5]; unsigned int num_entries; unsigned int size; }; struct ipt_get_entries { char name[32]; unsigned int size; unsigned int pad; char entrytable[1024]; }; struct xt_counters { uint64_t pcnt, bcnt; }; struct ipt_replace { char name[32]; unsigned int valid_hooks; unsigned int num_entries; unsigned int size; unsigned int hook_entry[5]; unsigned int underflow[5]; unsigned int num_counters; struct xt_counters* counters; char entrytable[1024]; }; struct ipt_table_desc { const char* name; struct ipt_getinfo info; struct ipt_get_entries entries; struct ipt_replace replace; struct xt_counters counters[10]; }; static struct ipt_table_desc ipv4_tables[] = { {.name = "filter"}, {.name = "nat"}, {.name = "mangle"}, {.name = "raw"}, {.name = "security"}, }; #define IPT_BASE_CTL 64 #define IPT_SO_SET_REPLACE (IPT_BASE_CTL) #define IPT_SO_GET_INFO (IPT_BASE_CTL) #define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1) static void checkpoint_net_namespace(void) { socklen_t optlen; unsigned i; int fd; fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) fail("socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)"); for (i = 0; i < sizeof(ipv4_tables) / sizeof(ipv4_tables[0]); i++) { struct ipt_table_desc* table = &ipv4_tables[i]; strcpy(table->info.name, table->name); strcpy(table->entries.name, table->name); strcpy(table->replace.name, table->name); optlen = sizeof(table->info); if (getsockopt(fd, SOL_IP, IPT_SO_GET_INFO, &table->info, &optlen)) { switch (errno) { case EPERM: case ENOENT: case ENOPROTOOPT: continue; } fail("getsockopt(IPT_SO_GET_INFO)"); } if (table->info.size > sizeof(table->entries.entrytable)) fail("table size is too large: %u", table->info.size); if (table->info.num_entries > sizeof(table->counters) / sizeof(table->counters[0])) fail("too many counters: %u", table->info.num_entries); table->entries.size = table->info.size; optlen = sizeof(table->entries) - sizeof(table->entries.entrytable) + table->info.size; if (getsockopt(fd, SOL_IP, IPT_SO_GET_ENTRIES, &table->entries, &optlen)) fail("getsockopt(IPT_SO_GET_ENTRIES)"); table->replace.valid_hooks = table->info.valid_hooks; table->replace.num_entries = table->info.num_entries; table->replace.counters = table->counters; table->replace.size = table->info.size; memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry)); memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow)); memcpy(table->replace.entrytable, table->entries.entrytable, table->info.size); } close(fd); } static void reset_net_namespace(void) { struct ipt_get_entries entries; struct ipt_getinfo info; socklen_t optlen; unsigned i; int fd; memset(&info, 0, sizeof(info)); memset(&entries, 0, sizeof(entries)); fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) fail("socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)"); for (i = 0; i < sizeof(ipv4_tables) / sizeof(ipv4_tables[0]); i++) { struct ipt_table_desc* table = &ipv4_tables[i]; if (table->info.valid_hooks == 0) continue; strcpy(info.name, table->name); optlen = sizeof(info); if (getsockopt(fd, SOL_IP, IPT_SO_GET_INFO, &info, &optlen)) fail("getsockopt(IPT_SO_GET_INFO)"); if (memcmp(&table->info, &info, sizeof(table->info)) == 0) { strcpy(entries.name, table->name); entries.size = table->info.size; optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size; if (getsockopt(fd, SOL_IP, IPT_SO_GET_ENTRIES, &entries, &optlen)) fail("getsockopt(IPT_SO_GET_ENTRIES)"); if (memcmp(&table->entries, &entries, optlen) == 0) continue; } table->replace.num_counters = info.num_entries; optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size; if (setsockopt(fd, SOL_IP, IPT_SO_SET_REPLACE, &table->replace, optlen)) fail("setsockopt(IPT_SO_SET_REPLACE)"); } close(fd); } static void remove_dir(const char* dir) { DIR* dp; struct dirent* ep; int iter = 0; retry: dp = opendir(dir); if (dp == NULL) { if (errno == EMFILE) { exitf("opendir(%s) failed due to NOFILE, exiting", dir); } exitf("opendir(%s) failed", dir); } while ((ep = readdir(dp))) { if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0) continue; char filename[FILENAME_MAX]; snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name); struct stat st; if (lstat(filename, &st)) exitf("lstat(%s) failed", filename); if (S_ISDIR(st.st_mode)) { remove_dir(filename); continue; } int i; for (i = 0;; i++) { if (unlink(filename) == 0) break; if (errno == EROFS) { break; } if (errno != EBUSY || i > 100) exitf("unlink(%s) failed", filename); if (umount2(filename, MNT_DETACH)) exitf("umount(%s) failed", filename); } } closedir(dp); int i; for (i = 0;; i++) { if (rmdir(dir) == 0) break; if (i < 100) { if (errno == EROFS) { break; } if (errno == EBUSY) { if (umount2(dir, MNT_DETACH)) exitf("umount(%s) failed", dir); continue; } if (errno == ENOTEMPTY) { if (iter < 100) { iter++; goto retry; } } } exitf("rmdir(%s) failed", dir); } } static void test(); void loop() { int iter; checkpoint_net_namespace(); for (iter = 0;; iter++) { char cwdbuf[256]; sprintf(cwdbuf, "./%d", iter); if (mkdir(cwdbuf, 0777)) fail("failed to mkdir"); int pid = fork(); if (pid < 0) fail("loop fork failed"); if (pid == 0) { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); setpgrp(); if (chdir(cwdbuf)) fail("failed to chdir"); test(); doexit(0); } int status = 0; uint64_t start = current_time_ms(); for (;;) { int res = waitpid(-1, &status, __WALL | WNOHANG); if (res == pid) break; usleep(1000); if (current_time_ms() - start > 5 * 1000) { kill(-pid, SIGKILL); kill(pid, SIGKILL); while (waitpid(-1, &status, __WALL) != pid) { } break; } } remove_dir(cwdbuf); reset_net_namespace(); } } uint64_t procid; void test() { syscall(__NR_unshare, 0x40040000); } int main() { char* cwd = get_current_dir_name(); for (procid = 0; procid < 8; procid++) { if (fork() == 0) { for (;;) { if (chdir(cwd)) fail("failed to chdir"); use_temporary_dir(); int pid = do_sandbox_namespace(procid, false); int status = 0; while (waitpid(pid, &status, __WALL) != pid) { } } } } sleep(1000000); return 0; }