用户命名空间:如何仅为给定程序安装文件夹

用户命名空间:如何仅为给定程序安装文件夹

我想在没有 root 访问权限的非 FHS 系统 (NixOs) 上伪造 FHS 系统。为此,我需要使用用户名空间在根目录安装一些文件夹(例如安装/tmp/mylib/lib)(我没有看到任何其他解决方案)。

不幸的是,我找不到如何让它工作:我试图遵循本教程,但是当我复制代码时它失败了(我什至无法启动 bash):

$ gcc userns_child_exec.c -lcap -o userns_child_exec
$ id
uid=1000(myname) gid=100(users) groups=100(users),1(wheel),17(audio),20(lp),57(networkmanager),59(scanner),131(docker),998(vboxusers),999(adbusers)

$ ./userns_child_exec -U -M '0 1000 1' -G '0 100 1' bash
write /proc/535313/gid_map: Operation not permitted
bash: initialize_job_control: no job control in background: Bad file descriptor

[nix-shell:~/Documents/Logiciels/Nix_bidouille/2022_04_26_-_nix_fake_FHS_user_namespace/demo]$ 
[root@bestos:~/Documents/Logiciels/Nix_bidouille/2022_04_26_-_nix_fake_FHS_user_namespace/demo]# 
exit

(注意显示了bash的提示,但是之后我什么也打不出来,直接退出了)

知道如何让它发挥作用吗?

代码:

/* userns_child_exec.c

   Copyright 2013, Michael Kerrisk
   Licensed under GNU General Public License v2 or later

   Create a child process that executes a shell command in new
   namespace(s); allow UID and GID mappings to be specified when
   creating a user namespace.
*/
#define _GNU_SOURCE
#include <sched.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <signal.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <errno.h>

/* A simple error-handling function: print an error message based
   on the value in 'errno' and terminate the calling process */

#define errExit(msg)    do { perror(msg); exit(EXIT_FAILURE); \
                        } while (0)

struct child_args {
    char **argv;        /* Command to be executed by child, with arguments */
    int    pipe_fd[2];  /* Pipe used to synchronize parent and child */
};

static int verbose;

static void
usage(char *pname)
{
    fprintf(stderr, "Usage: %s [options] cmd [arg...]\n\n", pname);
    fprintf(stderr, "Create a child process that executes a shell command "
            "in a new user namespace,\n"
            "and possibly also other new namespace(s).\n\n");
    fprintf(stderr, "Options can be:\n\n");
#define fpe(str) fprintf(stderr, "    %s", str);
    fpe("-i          New IPC namespace\n");
    fpe("-m          New mount namespace\n");
    fpe("-n          New network namespace\n");
    fpe("-p          New PID namespace\n");
    fpe("-u          New UTS namespace\n");
    fpe("-U          New user namespace\n");
    fpe("-M uid_map  Specify UID map for user namespace\n");
    fpe("-G gid_map  Specify GID map for user namespace\n");
    fpe("            If -M or -G is specified, -U is required\n");
    fpe("-v          Display verbose messages\n");
    fpe("\n");
    fpe("Map strings for -M and -G consist of records of the form:\n");
    fpe("\n");
    fpe("    ID-inside-ns   ID-outside-ns   len\n");
    fpe("\n");
    fpe("A map string can contain multiple records, separated by commas;\n");
    fpe("the commas are replaced by newlines before writing to map files.\n");

    exit(EXIT_FAILURE);
}

/* Update the mapping file 'map_file', with the value provided in
   'mapping', a string that defines a UID or GID mapping. A UID or
   GID mapping consists of one or more newline-delimited records
   of the form:

       ID_inside-ns    ID-outside-ns   length

   Requiring the user to supply a string that contains newlines is
   of course inconvenient for command-line use. Thus, we permit the
   use of commas to delimit records in this string, and replace them
   with newlines before writing the string to the file. */

static void
update_map(char *mapping, char *map_file)
{
    int fd, j;
    size_t map_len;     /* Length of 'mapping' */

    /* Replace commas in mapping string with newlines */

    map_len = strlen(mapping);
    for (j = 0; j < map_len; j++)
        if (mapping[j] == ',')
            mapping[j] = '\n';

    fd = open(map_file, O_RDWR);
    if (fd == -1) {
        fprintf(stderr, "open %s: %s\n", map_file, strerror(errno));
        exit(EXIT_FAILURE);
    }

    if (write(fd, mapping, map_len) != map_len) {
        fprintf(stderr, "write %s: %s\n", map_file, strerror(errno));
        exit(EXIT_FAILURE);
    }

    close(fd);
}

static int              /* Start function for cloned child */
childFunc(void *arg)
{
    struct child_args *args = (struct child_args *) arg;
    char ch;

    /* Wait until the parent has updated the UID and GID mappings. See
       the comment in main(). We wait for end of file on a pipe that will
       be closed by the parent process once it has updated the mappings. */

    close(args->pipe_fd[1]);    /* Close our descriptor for the write end
                                   of the pipe so that we see EOF when
                                   parent closes its descriptor */
    if (read(args->pipe_fd[0], &ch, 1) != 0) {
        fprintf(stderr, "Failure in child: read from pipe returned != 0\n");
        exit(EXIT_FAILURE);
    }

    /* Execute a shell command */

    execvp(args->argv[0], args->argv);
    errExit("execvp");
}

#define STACK_SIZE (1024 * 1024)

static char child_stack[STACK_SIZE];    /* Space for child's stack */

int
main(int argc, char *argv[])
{
    int flags, opt;
    pid_t child_pid;
    struct child_args args;
    char *uid_map, *gid_map;
    char map_path[PATH_MAX];

    /* Parse command-line options. The initial '+' character in
       the final getopt() argument prevents GNU-style permutation
       of command-line options. That's useful, since sometimes
       the 'command' to be executed by this program itself
       has command-line options. We don't want getopt() to treat
       those as options to this program. */

    flags = 0;
    verbose = 0;
    gid_map = NULL;
    uid_map = NULL;
    while ((opt = getopt(argc, argv, "+imnpuUM:G:v")) != -1) {
        switch (opt) {
        case 'i': flags |= CLONE_NEWIPC;        break;
        case 'm': flags |= CLONE_NEWNS;         break;
        case 'n': flags |= CLONE_NEWNET;        break;
        case 'p': flags |= CLONE_NEWPID;        break;
        case 'u': flags |= CLONE_NEWUTS;        break;
        case 'v': verbose = 1;                  break;
        case 'M': uid_map = optarg;             break;
        case 'G': gid_map = optarg;             break;
        case 'U': flags |= CLONE_NEWUSER;       break;
        default:  usage(argv[0]);
        }
    }

    /* -M or -G without -U is nonsensical */

    if ((uid_map != NULL || gid_map != NULL) &&
            !(flags & CLONE_NEWUSER))
        usage(argv[0]);

    args.argv = &argv[optind];

    /* We use a pipe to synchronize the parent and child, in order to
       ensure that the parent sets the UID and GID maps before the child
       calls execve(). This ensures that the child maintains its
       capabilities during the execve() in the common case where we
       want to map the child's effective user ID to 0 in the new user
       namespace. Without this synchronization, the child would lose
       its capabilities if it performed an execve() with nonzero
       user IDs (see the capabilities(7) man page for details of the
       transformation of a process's capabilities during execve()). */

    if (pipe(args.pipe_fd) == -1)
        errExit("pipe");

    /* Create the child in new namespace(s) */

    child_pid = clone(childFunc, child_stack + STACK_SIZE,
                      flags | SIGCHLD, &args);
    if (child_pid == -1)
        errExit("clone");

    /* Parent falls through to here */

    if (verbose)
        printf("%s: PID of child created by clone() is %ld\n",
                argv[0], (long) child_pid);

    /* Update the UID and GID maps in the child */

    if (uid_map != NULL) {
        snprintf(map_path, PATH_MAX, "/proc/%ld/uid_map",
                (long) child_pid);
        update_map(uid_map, map_path);
    }
    if (gid_map != NULL) {
        snprintf(map_path, PATH_MAX, "/proc/%ld/gid_map",
                (long) child_pid);
        update_map(gid_map, map_path);
    }

    /* Close the write end of the pipe, to signal to the child that we
       have updated the UID and GID maps */

    close(args.pipe_fd[1]);

    if (waitpid(child_pid, NULL, 0) == -1)      /* Wait for child */
        errExit("waitpid");

    if (verbose)
        printf("%s: terminating\n", argv[0]);

    exit(EXIT_SUCCESS);
}

编辑

实际上,这很奇怪:在编写组时出现错误,但它确实对 uid 有效:

[leo@bestos:~]$ cat /proc/582197/gid_map 

[leo@bestos:~]$ cat /proc/582197/uid_map 
         0       1000          1

[leo@bestos:~]$ ll /proc/582197/gid_map 
-rw-r--r-- 1 leo users 0 mai   18 09:09 /proc/582197/gid_map

[leo@bestos:~]$ ll /proc/582197/uid_map 
-rw-r--r-- 1 leo users 0 mai   18 09:09 /proc/582197/uid_map

答案1

您正在阅读的教程是在 2013 年创建的,之前添加了一个重要的附加限制来处理全局ID2015 年内核 3.19 中的映射。man user_namespaces

写“否认”/proc/[pid]/setgroups写入之前的文件 /proc/[pid]/gid_map 将要在用户命名空间中永久禁用 setgroups(2) 并允许在父用户命名空间中不具有 CAP_SETGID 功能的情况下写入 /proc/[pid]/gid_map。

/proc/[pid]/setgroupsLinux 3.19 中添加了该文件,但被向后移植到许多早期的稳定内核系列,因为它解决了安全问题。该问题涉及具有“rwx---rwx”等权限的文件。此类文件对“组”的权限比对“其他”的权限要少。这意味着使用 setgroups(2) 删除组可能会允许以前没有的进程文件访问权限。在用户名称空间存在之前,这不是一个问题 [...] 这允许以前的非特权用户删除组,从而获得他们以前没有的文件访问权限。 [...]

因此,您必须添加代码以将单词写入deny名称snprintf(map_path, PATH_MAX, "/proc/%ld/setgroups", (long) child_pid);正确的文件,然后再写入gid_map.

整个代码可以用这个普遍存在的命令替换:

unshare --user --map-root-user --mount -- bash

(其中有一个隐含的--setgroups=deny

同样,如果没有特权,则只能映射一个 uid/gid。因此,一旦挂载完成,模拟原始用户的唯一可能选择(尽管不完全)就是映射回原始用户,这可以使用最新版本的unshareToo 以及刚刚未共享的第二个级联用户命名空间来完成:

# unshare --user --map-user=1000 --map-group=100 -- bash

那么这个命名空间中将会有一个 uid。甚至 root 也不再存在(并且被视为与nobody任何其他未映射的 uid 一样映射)。


笔记

还有与其他命名空间和功能的其他交互,这是一个例子

保留CAP_SYS_ADMIN拥有进程 PID 命名空间的用户命名空间允许(自 Linux 3.8 起)该进程挂载/进程 文件系统。

因此,添加--pid --fork以遵守上述限制允许/proc在以后需要时安装到现有的限制上,但通常仅在--pid第一次使用时才需要(并且也可以通过添加来方便地完成--mount-proc)。

由于与网络命名空间的交互,同样--net需要挂载。/sys


将所有这些放在一起替换为as/lib的内容/tmp/oOP的例子

unshare --user --map-root-user --mount -- \
    sh -c 'mount --bind /tmp/o /lib; exec unshare --user --map-user=1000 --map-group=100 -- bash'

注意:一旦完成第一次映射,就不可能再正确使用大多数特权命令:要么在用户命名空间中存在单个 UID 0 可用,要么在下一个(嵌套)用户命名空间中存在单个 UID 1000 可用。由于特权命令处理两个 UID(其中之一通常是 root)和一个不可用的 UID 之间的转换,因此通常会在某些带有 EINVAL 的系统调用中失败。

为了做得更好,首先需要特权命令的帮助,并需要 root 访问权限来配置其他权限。例如 setuid root 命令newuidmapnewgidmap通常需要从没有权限的用户引导完整的容器。

答案2

只是为了完成 AB 的出色答案并使 AB 编写的注释更加明显,如果该文件夹应该安装在尚不存在的文件夹中,则可以在以下位置使用 chroot unshare

$ unshare --user --map-root-user --mount-proc --pid --fork
# cd /tmp/ && mkdir mychroot && cd mychroot
# for folder in $(ls / | grep -v sys); do echo "$folder"; mkdir "./$folder"; mount --rbind "/$folder" "./$folder"; done; mkdir sys; mount --rbind /sys sys/
# mkdir lib
# chroot .
# ls /
bin  boot  dev  etc  home  lib  mnt  nix  opt  proc  root  run  srv  sys  tmp  usr  var

请注意,这里您不能使用 sudo,您将作为普通用户执行所有操作。我会尝试看看是否newuidmap可以在这里提供帮助。

(注意 NixOs 用户可能需要/run/current-system/sw/bin/mount使用mount https://github.com/NixOS/nixpkgs/issues/42117

相关内容