Add support for Linux namespaces using clone(2)

Ugly, ugly code yet it compiles and the tests pass. I declare success!

To start a port in a new namespace:

    % see clone(2) for CLONE_NEW* flags
    1> Port = alcove_drv:start([
            {ns, "ipc"},
            {ns, "net"},
            {ns, "uts"},
            {ns, "ns"},
            {ns, "pid"}
            ]).

    2> alcove:execvp(Port, "/sbin/ifconfig", ["/sbin/ifconfig", "-a"]).
    ok

    3> IF = alcove:stdout(Port).
    <<"lo        Link encap:Local Loopback  \n          LOOPBACK MTU:16436  Metric:1\n          RX packets:0 errors:0 droppe"...>>

TODO:
* figure out how to test this
* add  ifdef'ed support for user namespaces
* test fork() still works
This commit is contained in:
Michael Santos
2014-03-02 15:40:23 -05:00
parent 54be79ec8a
commit 0adecc2a12
5 changed files with 112 additions and 58 deletions

View File

@@ -12,17 +12,20 @@
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <signal.h>
#include "alcove.h"
#include "err.h"
#include <signal.h>
#define ALCOVE_MSG_CALL 0
#define ALCOVE_MSG_CAST (htons(1))
#define ALCOVE_MSG_CHILDIN (htons(2))
#define ALCOVE_MSG_CHILDOUT (htons(3))
#define ALCOVE_MSG_CHILDERR (htons(4))
#define PIPE_READ 0
#define PIPE_WRITE 1
static int alcove_fork(alcove_state_t *);
static void alcove_ctl(int fd);
@@ -30,6 +33,9 @@ static void alcove_select(alcove_state_t *);
static alcove_msg_t *alcove_msg(int);
static void usage(alcove_state_t *);
static int alcove_fork_child(void *arg);
static int alcove_fork_parent(void *arg);
static ssize_t alcove_child_proxy(int, int, u_int16_t);
static int alcove_write(int fd, u_int16_t, ETERM *);
static ssize_t alcove_read(int, void *, ssize_t);
@@ -72,8 +78,20 @@ main(int argc, char *argv[])
signal(SIGCHLD, gotsig);
while ( (ch = getopt(argc, argv, "hv")) != -1) {
while ( (ch = getopt(argc, argv, "n:hv")) != -1) {
switch (ch) {
case 'n':
#ifdef HAVE_NAMESPACES
if (!strncmp("ipc", optarg, 3)) ap->ns |= CLONE_NEWIPC;
else if (!strncmp("net", optarg, 3)) ap->ns |= CLONE_NEWNET;
else if (!strncmp("ns", optarg, 2)) ap->ns |= CLONE_NEWNS;
else if (!strncmp("pid", optarg, 3)) ap->ns |= CLONE_NEWPID;
else if (!strncmp("uts", optarg, 3)) ap->ns |= CLONE_NEWUTS;
else usage(ap);
#else
usage(ap);
#endif
break;
case 'v':
ap->verbose++;
break;
@@ -93,61 +111,90 @@ main(int argc, char *argv[])
static int
alcove_fork(alcove_state_t *ap)
{
int ctl[2] = {0};
int fdin[2] = {0};
int fdout[2] = {0};
int fderr[2] = {0};
#ifdef HAVE_NAMESPACES
const int STACK_SIZE = 65536;
char *child_stack = NULL;
char *stack_top;
if (socketpair(AF_UNIX, SOCK_STREAM, 0, ctl) < 0)
child_stack = calloc(STACK_SIZE, 1);
if (!child_stack)
erl_err_sys("calloc");
stack_top = child_stack + STACK_SIZE;
#endif
if (socketpair(AF_UNIX, SOCK_STREAM, 0, ap->fd.ctl) < 0)
erl_err_sys("socketpair");
if ( (pipe(fdin) < 0)
|| (pipe(fdout) < 0)
|| (pipe(fderr) < 0))
if ( (pipe(ap->fd.in) < 0)
|| (pipe(ap->fd.out) < 0)
|| (pipe(ap->fd.err) < 0))
erl_err_sys("pipe");
ap->pid = fork();
#ifdef HAVE_NAMESPACES
if (clone(alcove_fork_child, stack_top, ap->ns|SIGCHLD, ap) < 0)
erl_err_sys("clone");
#define PIPE_READ 0
#define PIPE_WRITE 1
(void)alcove_fork_parent(ap);
#else
ap->pid = fork();
switch (ap->pid) {
case -1:
erl_err_sys("fork");
case 0:
if ( (close(ctl[PIPE_WRITE]) < 0)
|| (close(fdin[PIPE_WRITE]) < 0)
|| (close(fdout[PIPE_READ]) < 0)
|| (close(fderr[PIPE_READ]) < 0))
erl_err_sys("close");
if ( (dup2(fdin[PIPE_READ], STDIN_FILENO) < 0)
|| (dup2(fdout[PIPE_WRITE], STDOUT_FILENO) < 0)
|| (dup2(fderr[PIPE_WRITE], STDERR_FILENO) < 0))
erl_err_sys("dup2");
alcove_ctl(ctl[0]);
(void)alcove_fork_child(ap);
break;
default:
if ( (close(ctl[PIPE_READ]) < 0)
|| (close(fdin[PIPE_READ]) < 0)
|| (close(fdout[PIPE_WRITE]) < 0)
|| (close(fderr[PIPE_WRITE]) < 0))
erl_err_sys("close");
ap->ctl = ctl[PIPE_WRITE];
ap->fdin = fdin[PIPE_WRITE];
ap->fdout = fdout[PIPE_READ];
ap->fderr = fderr[PIPE_READ];
(void)alcove_fork_parent(ap);
break;
}
#endif
return 0;
}
static int
alcove_fork_child(void *arg)
{
alcove_state_t *ap = arg;
if ( (close(ap->fd.ctl[PIPE_WRITE]) < 0)
|| (close(ap->fd.in[PIPE_WRITE]) < 0)
|| (close(ap->fd.out[PIPE_READ]) < 0)
|| (close(ap->fd.err[PIPE_READ]) < 0))
erl_err_sys("close");
if ( (dup2(ap->fd.in[PIPE_READ], STDIN_FILENO) < 0)
|| (dup2(ap->fd.out[PIPE_WRITE], STDOUT_FILENO) < 0)
|| (dup2(ap->fd.err[PIPE_WRITE], STDERR_FILENO) < 0))
erl_err_sys("dup2");
alcove_ctl(ap->fd.ctl[PIPE_READ]);
return 0;
}
static int
alcove_fork_parent(void *arg)
{
alcove_state_t *ap = arg;
if ( (close(ap->fd.ctl[PIPE_READ]) < 0)
|| (close(ap->fd.in[PIPE_READ]) < 0)
|| (close(ap->fd.out[PIPE_WRITE]) < 0)
|| (close(ap->fd.err[PIPE_WRITE]) < 0))
erl_err_sys("close");
ap->ctl = ap->fd.ctl[PIPE_WRITE];
ap->fdin = ap->fd.in[PIPE_WRITE];
ap->fdout = ap->fd.out[PIPE_READ];
ap->fderr = ap->fd.err[PIPE_READ];
return 0;
}
static void
alcove_ctl(int fd)
{
@@ -449,13 +496,11 @@ usage(alcove_state_t *ap)
(void)fprintf(stderr, "%s %s\n",
__progname, ALCOVE_VERSION);
(void)fprintf(stderr,
"usage: %s -n <name> <options>\n"
" -n <name> container name\n"
" -o <path> error log\n"
" -P <path> LXC path\n"
" -v verbose mode\n"
" -d <option> debug: nodaemonize, nocloseallfds\n"
" -t <type> container type (permanent, transient, temporary)\n",
"usage: %s <options>\n"
#ifdef HAVE_NAMESPACES
" -n <namespace> new namespace: ipc, net, ns, pid, uts\n"
#endif
" -v verbose mode\n",
__progname
);

View File

@@ -12,6 +12,9 @@
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#define _GNU_SOURCE
#include <sched.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
@@ -41,6 +44,13 @@ typedef struct {
int fdin;
int fdout;
int fderr;
int ns;
struct {
int ctl[2];
int in[2];
int out[2];
int err[2];
} fd;
} alcove_state_t;
typedef struct {

View File

@@ -12,10 +12,10 @@
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "alcove.h"
#include "alcove_cmd.h"
#define _GNU_SOURCE
#include <fcntl.h>
#include <sched.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
@@ -25,9 +25,6 @@
#include <sys/types.h>
#include "alcove.h"
#include "alcove_cmd.h"
static char **alcove_list_to_argv(ETERM *);
static void alcove_free_argv(char **);

View File

@@ -1,4 +1,6 @@
{port_env, [
{"linux", "EXE_LDFLAGS", "$ALCOVE_LDFLAGS -DHAVE_NAMESPACES -lpthread $EXE_LDFLAGS"},
{"linux", "EXE_CFLAGS", "$EXE_CFLAGS -DHAVE_NAMESPACES $ALCOVE_CFLAGS"},
{"EXE_LDFLAGS", "$ALCOVE_LDFLAGS -lpthread $EXE_LDFLAGS"},
{"EXE_CFLAGS", "$EXE_CFLAGS $ALCOVE_CFLAGS"}
]}.

View File

@@ -78,28 +78,28 @@ getopts(Options) when is_list(Options) ->
Exec = proplists:get_value(exec, Options, "sudo"),
Progname = proplists:get_value(progname, Options, progname()),
Expand = lists:map(fun
Opt = lists:map(fun
(verbose) -> {verbose, 1};
({ns, N}) -> {namespace, N};
(N) when is_atom(N) -> {N, true};
({_,_} = N) -> N
end, Options),
Opt = lists:ukeysort(1, Expand),
Switches = lists:append([ optarg(N) || N <- Opt ]),
[Cmd|Argv] = [ N || N <- string:tokens(Exec, " ") ++ [Progname|Switches], N /= ""],
[find_executable(Cmd)|Argv].
optarg({verbose, Arg}) -> switch(string:copies("v", Arg));
optarg({namespace, Arg}) -> switch("n", Arg);
optarg(_) -> "".
switch(Switch) ->
[lists:concat(["-", Switch])].
%switch(Switch, Arg) when is_binary(Arg) ->
% switch(Switch, binary_to_list(Arg));
%switch(Switch, Arg) ->
% [lists:concat(["-", Switch]), Arg].
switch(Switch, Arg) when is_binary(Arg) ->
switch(Switch, binary_to_list(Arg));
switch(Switch, Arg) ->
[lists:concat(["-", Switch]), Arg].
find_executable(Exe) ->
case os:find_executable(Exe) of