delta filesystem prototype

classic Classic list List threaded Threaded
54 messages Options
123
Reply | Threaded
Open this post in threaded view
|

delta filesystem prototype

Miklos Szeredi
Here is my first try at a "delta" filesystem.  It takes two
directories, one of which is a read-only base, and the other is where
the differences are stored.  It stores data, metadata and directory
modifications without copying up whole files from the read-only
branch.

The layout of the delta store may look similar to the writable branch
of a union fs, but this is basically just coincidence (it was easier
to start out this way).

Currently it's implemented with fuse and it's not optimized at all, so
performance may suck in some cases.  But I think this is a useful
concept and a better model, than trying to fit writable branches into
a union filesystem.

Comments, bug reports are welcome.

Thanks,
Miklos


/*
  Delta filesystem
  Copyright (C) 2009  Miklos Szeredi <[hidden email]>

  This program can be distributed under the terms of the GNU GPL.

  gcc -Wall `pkg-config fuse --cflags --libs` deltafs.c -o deltafs

  usage: deltafs [opts] baseroot deltaroot mountpoint
*/

#define FUSE_USE_VERSION 26
#define _GNU_SOURCE

#include <fuse.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <dirent.h>
#include <string.h>
#include <assert.h>
#include <err.h>
#include <sys/time.h>

#define BLOCK_SIZE 4096
#define BLOCK_MASK 4095ULL

#define MAGIC_SEQ "\\$@"
#define REDIR_NULL "(null)"

static char *baseroot = NULL;
static char *deltaroot = NULL;

typedef char pathstr[4096];

struct deltaheader {
        pathstr dmpath;
        pathstr ddpath;
        pathstr drpath;
        pathstr bpath;
};

static int check_exist(const char *path)
{
        int res;
        struct stat dummy;

        res = lstat(path, &dummy);
        if (res == -1) {
                if (errno == ENOENT) {
                        return 0;
                } else {
                        warn("lstat %s", path);
                        return -EIO;
                }
        }
        return 1;
}

static void get_deltapath(const char *path, size_t len, pathstr dpath)
{
        snprintf(dpath, sizeof(pathstr), "%s%.*s", deltaroot, (int) len, path);
}

static void get_mergepath(const char *path, size_t len, pathstr mpath)
{
        snprintf(mpath, sizeof(pathstr), "%s%.*s" MAGIC_SEQ "merge",
                 deltaroot, (int) len, path);
}

static void get_redirpath(const char *path, size_t len, pathstr rpath)
{
        snprintf(rpath, sizeof(pathstr), "%s%.*s" MAGIC_SEQ "redir",
                 deltaroot, (int) len, path);
}

static void create_path(pathstr newpath, const char *path, const char *name)
{
        snprintf(newpath, sizeof(pathstr), "%s/%s", path, name);
}

static const char *last_slash(const char *s, const char *beg)
{
        do {
                s--;
        } while (*s != '/' && s > beg);

        return s;
}

static int check_redirect(const char *rpath, const char *end, size_t len,
                          pathstr bpath)
{
        int res;
        pathstr linkbuf;

        res = readlink(rpath, linkbuf, sizeof(linkbuf) - 1);
        if (res == -1) {
                if (errno != ENOENT) {
                        warn("readlink %s", rpath);
                        return -EIO;
                }
                return 0;
        }

        linkbuf[res] = '\0';
        if (strcmp(linkbuf, REDIR_NULL) == 0) {
                bpath[0] = '\0';
                return 1;
        }

        if (linkbuf[0] != '*') {
                warnx("invalid redirect in %s: %s\n", rpath, linkbuf);
                return -EIO;
        }
        snprintf(bpath, sizeof(pathstr), "%s%s%.*s", baseroot, linkbuf + 1,
                 (int) len, end);

        return 1;
}

static int get_basepath(struct deltaheader *head, const char *path, size_t len)
{
        int res;
        const char *s = path + strlen(path);

        res = check_redirect(head->drpath, "", 0, head->bpath);
        if (res < 0)
                return res;

        while (!res) {
                pathstr rpath;

                s = last_slash(s, path);
                if (s == path) {
                        snprintf(head->bpath, sizeof(pathstr), "%s%.*s",
                                 baseroot, (int) len, path);
                        break;
                }

                get_redirpath(path, s - path, rpath);
                res = check_redirect(rpath, s, len - (s - path), head->bpath);
                if (res < 0)
                        return res;
        }

        return 0;
}

static int get_deltaheader_len(struct deltaheader *head, const char *path,
                               unsigned int len)
{
        get_deltapath(path, len, head->ddpath);
        get_mergepath(path, len, head->dmpath);
        get_redirpath(path, len, head->drpath);

        return get_basepath(head, path, len);
}

static int get_deltaheader(struct deltaheader *head, const char *path)
{
        return get_deltaheader_len(head, path, strlen(path));
}

static int read_file(char *path, char *buf, loff_t offset, size_t size)
{
        int fd;
        int res;

        fd = open(path, O_RDONLY);
        if (fd == -1)
                return -errno;

        res = pread(fd, buf, size, offset);
        if (res == -1)
                res = -errno;

        close(fd);

        return res;
}

static int write_file(char *path, const char *buf, loff_t offset, size_t size)
{
        int fd;
        int res;

        fd = open(path, O_WRONLY);
        if (fd == -1)
                return -errno;

        res = pwrite(fd, buf, size, offset);
        if (res == -1)
                res = -errno;

        close(fd);

        return res;
}

static int is_bitmap_delta(struct deltaheader *head, off_t offset)
{
        int fd;
        int res;
        off_t index = offset >> 12;
        size_t n = index >> 3;
        unsigned char mask = 1 << (index & 7);
        unsigned char x = 0;

        fd = open(head->dmpath, O_RDONLY);
        if (fd == -1) {
                if (errno == ENOENT)
                        return 0;

                warn("open %s read-only", head->dmpath);
                return -EIO;
        }
        res = pread(fd, &x, 1, n);
        if (res == -1) {
                warn("read from %s pos %zi", head->dmpath, n);
                close(fd);
                return -EIO;
        }
        close(fd);

        return (x & mask) != 0;
}

static int set_bitmap_delta(struct deltaheader *head, off_t offset)
{
        int fd;
        int res;
        off_t index = offset >> 12;
        size_t n = index >> 3;
        unsigned char mask = 1 << (index & 7);
        unsigned char x = 0;

        fd = open(head->dmpath, O_RDWR);
        if (fd == -1) {
                warn("open %s read-write", head->dmpath);
                return -EIO;
        }

        res = pread(fd, &x, 1, n);
        if (res == -1) {
                warn("read from %s pos %zi", head->dmpath, n);
                goto out_eio;
        }
        x |= mask;
        res = pwrite(fd, &x, 1, n);
        if (res == -1) {
                warn("write to %s pos %zi", head->dmpath, n);
                goto out_eio;
        }
        close(fd);

        return 0;

out_eio:
        close(fd);
        return -EIO;
}

static int read_block(struct deltaheader *head, char *buf, off_t offset,
                      size_t size)
{
        char *xpath;
        int res;

        assert((offset & BLOCK_MASK) == 0);

        res = is_bitmap_delta(head, offset);
        if (res < 0)
                return res;

        xpath = res ? head->ddpath : head->bpath;

        if (size > BLOCK_SIZE)
                size = BLOCK_SIZE;

        res = read_file(xpath, buf, offset, size);
        if (res < 0)
                return res;

        return res;
}

static int write_block(struct deltaheader *head, const char *buf, off_t offset,
                       size_t size)
{
        char *xpath;
        int res;
        int res2;

        assert((offset & BLOCK_MASK) == 0);

        if (size > BLOCK_SIZE)
                size = BLOCK_SIZE;

        res = write_file(head->ddpath, buf, offset, size);
        if (res < 0)
                return res;

        if (res != size) {
                warnx("short write at %lli %zibytes from %s\n",
                      (long long) offset, size, xpath);
                return -EIO;
        }

        res2 = set_bitmap_delta(head, offset);
        if (res2)
                return res2;

        return res;
}

static int extend_file(struct deltaheader *head, off_t oldsize, off_t size)
{
        int res;
        off_t offset = oldsize;

        if ((oldsize & BLOCK_MASK) != 0) {
                offset &= ~BLOCK_MASK;

                res = is_bitmap_delta(head, offset);
                if (res < 0)
                        return res;

                if (!res) {
                        char buf[BLOCK_SIZE];
                        size_t num = oldsize - offset;

                        memset(buf, 0, sizeof(buf));
                        res = read_block(head, buf, offset, num);
                        if (res < 0)
                                return res;

                        num = size - offset;
                        res = write_block(head, buf, offset, num);
                        if (res < 0)
                                return res;
                        }
                offset += BLOCK_SIZE;
        }
        while (offset < size) {
                set_bitmap_delta(head, offset);
                offset += BLOCK_SIZE;
        }
        return 0;
}


static void add_name(char ***arrayp, unsigned *nump, const char *name)
{
        unsigned num = *nump + 1;
        char **array = realloc(*arrayp, num * sizeof(char *));
        char *copy = strdup(name);

        if (array == NULL || copy == NULL)
                err(1, "memory alocation failed");

        array[num - 1] = copy;
        *nump = num;
        *arrayp = array;
}

static int check_name_array(char **array, unsigned num, const char *name)
{
        unsigned ctr;

        for (ctr = 0; ctr < num; ctr++) {
                if (strcmp(name, array[ctr]) == 0)
                        return 1;
        }
        return 0;
}

static void free_names(char **array, unsigned num)
{
        unsigned ctr;

        for (ctr = 0; ctr < num; ctr++)
                free(array[ctr]);
        free(array);
}

static int set_attr(const char *path, struct stat *stbuf)
{
        int res;
        struct timespec times[2];

        res = lchown(path, stbuf->st_uid, stbuf->st_gid);
        if (res == -1) {
                warn("chown %s", path);
                return -EIO;
        }

        times[0] = stbuf->st_atim;
        times[1] = stbuf->st_mtim;
        res = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
        if (res == -1) {
                warn("utimes %s", path);
                return -EIO;
        }

        return 0;
}

static int create_merge(struct deltaheader *head)
{
        int res;

        res = mknod(head->dmpath, 0600 | S_IFREG, 0);
        if (res == -1) {
                warn("create %s", head->dmpath);
                return -EIO;
        }

        return 0;
}

static int create_parent(const char *path)
{
        int res;
        struct deltaheader head;
        const char *s = path + strlen(path);
        struct stat stbuf;

        while (1) {
                s = last_slash(s, path);
                if (s == path)
                        break;

                get_deltapath(path, s - path, head.ddpath);
                res = lstat(head.ddpath, &stbuf);
                if (res == 0) {
                        if (!S_ISDIR(stbuf.st_mode)) {
                                warnx("%s not a directory", head.ddpath);
                                return -EIO;
                        }

                        get_mergepath(path, s - path, head.dmpath);
                        res = check_exist(head.dmpath);
                        if (res < 0)
                                return res;
                        if (!res)
                                return 0;

                        break;
                } else if (errno != ENOENT) {
                        warn("stat %s", path);
                        return -EIO;
                }
        }

        while (1) {
                s = strchr(s + 1, '/');
                if (!s)
                        break;

                res = get_deltaheader_len(&head, path, s - path);
                if (res < 0)
                        return res;

                res = lstat(head.bpath, &stbuf);
                if (res == -1) {
                        warn("stat %s", head.bpath);
                        return -EIO;
                }
                if (!S_ISDIR(stbuf.st_mode)) {
                        warnx("%s not a directory", head.bpath);
                        return -EIO;
                }
                res = mkdir(head.ddpath, stbuf.st_mode);
                if (res == -1) {
                        warn("mkdir %s", head.ddpath);
                        return -EIO;
                }

                res = set_attr(head.ddpath, &stbuf);
                if (res < 0)
                        return res;

                res = create_merge(&head);
                if (res < 0)
                        return res;
        }

        return 1;
}

static int is_magic(const char *name, unsigned namelen)
{
        if (namelen <= 8)
                return 0;

        if (memcmp(name + namelen - 8, MAGIC_SEQ, 3) != 0)
                return 0;

        return 1;
}

static int is_redir(const char *name, unsigned namelen)
{
        if (!is_magic(name, namelen))
                return 0;

        return memcmp(name + namelen - 5, "redir", 5) == 0;
}

static int remove_empty_dir(struct deltaheader *head)
{
        int res;
        DIR *dp;
        struct dirent *de;

        res = check_exist(head->dmpath);
        if (res < 0)
                return res;
        if (!res)
                return 0;

        dp = opendir(head->ddpath);
        if (dp == NULL) {
                warn("opendir %s", head->ddpath);
                return -EIO;
        }

        while ((de = readdir(dp)) != NULL) {
                unsigned namelen = strlen(de->d_name);

                if (is_redir(de->d_name, namelen)) {
                        pathstr linkpath;

                        create_path(linkpath, head->ddpath, de->d_name);
                        res = unlink(linkpath);
                        if (res == -1) {
                                warn("unlink %s", linkpath);
                                closedir(dp);
                                return -EIO;
                        }
                }
        }
        closedir(dp);
        res = rmdir(head->ddpath);
        if (res == -1) {
                warn("rmdir %s", head->ddpath);
                return -EIO;
        }
        res = unlink(head->dmpath);
        if (res == -1) {
                warn("unlink %s", head->dmpath);
                return -EIO;
        }

        return 0;
}


static int remove_base(struct deltaheader *head, const char *path)
{
        int res;

        res = unlink(head->drpath);
        if (res == -1 && errno != ENOENT) {
                warn("unlink %s", head->drpath);
                return -EIO;
        }

        res = get_basepath(head, path, strlen(path));
        if (res < 0)
                return res;

        res = check_exist(head->bpath);
        if (res < 0)
                return res;

        if (res) {
                res = create_parent(path);
                if (res < 0)
                        return res;

                if (res) {
                        res = symlink(REDIR_NULL, head->drpath);
                        if (res == -1) {
                                warn("symlink %s", head->drpath);
                                return -EIO;
                        }
                }
        }

        return remove_empty_dir(head);
}

static int check_empty_dir(const char *path)
{
        DIR *dp;
        struct dirent *de;
        int isempty = 1;

        dp = opendir(path);
        if (dp == NULL)
                return -errno;

        while ((de = readdir(dp)) != NULL) {
                if (strcmp(de->d_name, ".") != 0 &&
                    strcmp(de->d_name, "..") != 0) {
                        isempty = 0;
                        break;
                }
        }
        closedir(dp);

        return isempty ? 0 : -ENOTEMPTY;
}

static int check_empty_merged(struct deltaheader *head)
{
        DIR *dp;
        struct dirent *de;
        char **narray = NULL;
        unsigned nnum = 0;
        int res = 0;

        dp = opendir(head->ddpath);
        if (dp == NULL) {
                warn("opendir %s", head->ddpath);
                return -EIO;
        }

        while ((de = readdir(dp)) != NULL) {
                unsigned namelen = strlen(de->d_name);

                if (is_redir(de->d_name, namelen)) {
                        int res;
                        pathstr buf;
                        pathstr linkpath;

                        create_path(linkpath, head->ddpath, de->d_name);
                        res = readlink(linkpath, buf, sizeof(buf) - 1);
                        if (res == -1) {
                                warn("readlink %s", linkpath);
                                res = -EIO;
                                break;
                        }
                        buf[res] = '\0';
                        de->d_name[namelen - 8] = '\0';
                        if (strcmp(buf, REDIR_NULL) == 0) {
                                add_name(&narray, &nnum, de->d_name);
                        } else {
                                res = -ENOTEMPTY;
                                break;
                        }
                } else if (!is_magic(de->d_name, namelen)) {
                        if (strcmp(de->d_name, ".") != 0 &&
                            strcmp(de->d_name, "..") != 0) {
                                res = -ENOTEMPTY;
                                break;
                        }
                }
        }
        closedir(dp);
        if (res < 0)
                goto out;

        dp = opendir(head->bpath);
        if (dp == NULL) {
                res = -errno;
                goto out;
        } else {
                while ((de = readdir(dp)) != NULL) {
                        if (strcmp(de->d_name, ".") != 0 &&
                            strcmp(de->d_name, "..") != 0 &&
                            !check_name_array(narray, nnum, de->d_name)) {
                                res = -ENOTEMPTY;
                                break;
                        }
                }
                closedir(dp);
        }

out:
        free_names(narray, nnum);

        return res;
}

static int copy_up(struct deltaheader *head, const char *path,
                   struct stat *stbuf)
{
        int res;

        res = create_parent(path);
        if (res < 0)
                return res;

        if (S_ISDIR(stbuf->st_mode)) {
                res = mkdir(head->ddpath, stbuf->st_mode);
                if (res == -1) {
                        warn("mkdir %s", head->ddpath);
                        return -EIO;
                }
        } else if (S_ISREG(stbuf->st_mode)) {
                int fd;

                fd = open(head->ddpath, O_WRONLY | O_CREAT | O_EXCL,
                          stbuf->st_mode);
                if (fd == -1) {
                        warn("create %s", head->ddpath);
                        return -EIO;
                }
                if (stbuf->st_size != 0)
                        ftruncate(fd, stbuf->st_size);
                close(fd);
        } else if (S_ISLNK(stbuf->st_mode)) {
                pathstr linkbuf;

                res = readlink(head->bpath, linkbuf, sizeof(linkbuf) - 1);
                if (res == -1)
                        return -errno;

                linkbuf[res] = '\0';

                res = symlink(linkbuf, head->ddpath);
                if (res == -1) {
                        warn("symlink %s", head->ddpath);
                        return -EIO;
                }
        } else {
                res = mknod(head->ddpath, stbuf->st_mode, stbuf->st_rdev);
                if (res == -1) {
                        warn("mknod %s", head->ddpath);
                        return -EIO;
                }
        }

        res = set_attr(head->ddpath, stbuf);
        if (res < 0)
                return res;

        res = create_merge(head);
        if (res < 0)
                return res;

        if ((!S_ISREG(stbuf->st_mode) && !S_ISDIR(stbuf->st_mode)) ||
            (S_ISREG(stbuf->st_mode) && stbuf->st_size == 0)) {
                res = unlink(head->drpath);
                if (res == -1 && errno != ENOENT) {
                        warn("unlink %s", head->drpath);
                        return -EIO;
                }
        }
        return 0;
}

static int delta_getattr(const char *path, struct stat *stbuf)
{
        int res;
        struct deltaheader head;

        res = get_deltaheader(&head, path);
        if (res < 0)
                return res;

        res = lstat(head.ddpath, stbuf);
        if (res == -1) {
                char *s;
                pathstr parentpath;

                if (errno != ENOENT) {
                        warn("stat %s", head.ddpath);
                        return -EIO;
                }
                if (!head.bpath[0])
                        return -ENOENT;

                s = strrchr(path, '/');
                assert(s != NULL);
                if (s == path)
                        goto check;

                get_deltapath(path, s - path, parentpath);
                res = check_exist(parentpath);
                if (res < 0)
                        return res;

                if (res) {
                        pathstr mpath;

                        get_mergepath(path, s - path, mpath);
                        res = check_exist(mpath);
                        if (res < 0)
                                return res;

                        if (!res)
                                return -ENOENT;
                }
check:
                res = lstat(head.bpath, stbuf);
        }
        if (res == -1)
                return -errno;

        return 0;
}

static int delta_readlink(const char *path, char *buf, size_t size)
{
        int res;
        struct deltaheader head;

        res = get_deltaheader(&head, path);
        if (res < 0)
                return res;

        res = readlink(head.ddpath, buf, size - 1);
        if (res == -1) {
                if (errno != ENOENT) {
                        warn("readlink %s", head.ddpath);
                        return -EIO;
                }
                res = readlink(head.bpath, buf, size - 1);
        }
        if (res == -1)
                return -errno;

        buf[res] = '\0';
        return 0;
}

static int delta_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
                         off_t offset, struct fuse_file_info *fi)
{
        int res;
        DIR *dp;
        struct dirent *de;
        struct deltaheader head;
        char **parray = NULL;
        char **narray = NULL;
        unsigned pnum = 0;
        unsigned nnum = 0;
        unsigned ctr;
        int merge = 0;

        (void) offset;
        (void) fi;

        res = get_deltaheader(&head, path);
        if (res < 0)
                return res;

        dp = opendir(head.ddpath);
        if (dp == NULL) {
                if (errno != ENOENT) {
                        warn("opendir %s", head.ddpath);
                        return -EIO;
                }
                dp = opendir(head.bpath);
                if (dp == NULL)
                        return -errno;

                while ((de = readdir(dp)) != NULL) {
                        if (filler(buf, de->d_name, NULL, 0))
                                break;
                }
                closedir(dp);
                return 0;
        }
        if (strcmp(path, "/") == 0) {
                merge = 1;
        } else {
                res = check_exist(head.dmpath);
                if (res < 0) {
                        closedir(dp);
                        return res;
                }
                if (res)
                        merge = 1;
        }

        while ((de = readdir(dp)) != NULL) {
                unsigned namelen = strlen(de->d_name);

                if (!is_magic(de->d_name, namelen)) {
                        if (!check_name_array(parray, pnum, de->d_name))
                                add_name(&parray, &pnum, de->d_name);
                } else if (is_redir(de->d_name, namelen)) {
                        int res;
                        pathstr buf;
                        pathstr linkpath;

                        create_path(linkpath, head.ddpath, de->d_name);
                        res = readlink(linkpath, buf, sizeof(buf) - 1);
                        if (res == -1) {
                                warn("readlink %s", linkpath);
                                closedir(dp);
                                res = -EIO;
                                goto out;
                        }
                        buf[res] = '\0';
                        de->d_name[namelen - 8] = '\0';
                        if (strcmp(buf, REDIR_NULL) == 0)
                                add_name(&narray, &nnum, de->d_name);
                        else if (!check_name_array(parray, pnum, de->d_name))
                                add_name(&parray, &pnum, de->d_name);
                }
        }
        closedir(dp);

        res = 0;
        for (ctr = 0; ctr < pnum; ctr++) {
                if (filler(buf, parray[ctr], NULL, 0))
                        goto out;
        }

        if (!merge)
                goto out;

        dp = opendir(head.bpath);
        if (dp == NULL) {
                if (errno != ENOENT) {
                        warn("opendir %s", head.bpath);
                        res = -EIO;
                        goto out;
                }
        } else {
                while ((de = readdir(dp)) != NULL) {
                        if (!check_name_array(parray, pnum, de->d_name) &&
                            !check_name_array(narray, nnum, de->d_name)) {
                                if (filler(buf, de->d_name, NULL, 0))
                                        break;
                        }
                }
                closedir(dp);
        }

out:
        free_names(parray, pnum);
        free_names(narray, nnum);

        return res;
}

static int delta_mknod(const char *path, mode_t mode, dev_t rdev)
{
        int res;
        struct deltaheader head;
        struct fuse_context *ctx;

        res = get_deltaheader(&head, path);
        if (res < 0)
                return res;

        res = create_parent(path);
        if (res < 0)
                return res;

        res = mknod(head.ddpath, mode, rdev);
        if (res == -1)
                return -errno;

        ctx = fuse_get_context();
        res = lchown(head.ddpath, ctx->uid, ctx->gid);
        if (res == -1) {
                warn("chown %s", head.ddpath);
                return -EIO;
        }

        res = unlink(head.drpath);
        if (res == -1 && errno != ENOENT) {
                warn("unlink %s", head.drpath);
                return -EIO;
        }

        return 0;
}

static int delta_mkdir(const char *path, mode_t mode)
{
        int res;
        struct deltaheader head;
        struct fuse_context *ctx;

        res = get_deltaheader(&head, path);
        if (res < 0)
                return res;

        res = create_parent(path);
        if (res < 0)
                return res;

        res = mkdir(head.ddpath, mode);
        if (res == -1)
                return -errno;

        ctx = fuse_get_context();
        res = lchown(head.ddpath, ctx->uid, ctx->gid);
        if (res == -1) {
                warn("chown %s", head.ddpath);
                return -EIO;
        }

        res = unlink(head.drpath);
        if (res == -1 && errno != ENOENT) {
                warn("unlink %s", head.drpath);
                return -EIO;
        }

        return 0;
}

static int delta_symlink(const char *from, const char *to)
{
        int res;
        struct deltaheader head;
        struct fuse_context *ctx;

        res = get_deltaheader(&head, to);
        if (res < 0)
                return res;

        res = create_parent(to);
        if (res < 0)
                return res;

        res = symlink(from, head.ddpath);
        if (res == -1)
                return -errno;

        ctx = fuse_get_context();
        res = lchown(head.ddpath, ctx->uid, ctx->gid);
        if (res == -1) {
                warn("chown %s", head.ddpath);
                return -EIO;
        }

        res = unlink(head.drpath);
        if (res == -1 && errno != ENOENT) {
                warn("unlink %s", head.drpath);
                return -EIO;
        }

        return 0;
}

static int delta_unlink(const char *path)
{
        int res;
        struct deltaheader head;

        res = get_deltaheader(&head, path);
        if (res < 0)
                return res;

        res = unlink(head.ddpath);
        if (res != -1) {
                res = unlink(head.dmpath);
                if (res == -1) {
                        if (errno != ENOENT) {
                                warn("unlink %s", head.dmpath);
                                return -EIO;
                        }
                }
        } else if (errno != ENOENT) {
                warn("unlink %s", head.ddpath);
                return -EIO;
        }

        return remove_base(&head, path);
}

static int delta_rmdir(const char *path)
{
        int res;
        struct deltaheader head;

        res = get_deltaheader(&head, path);
        if (res < 0)
                return res;

        res = rmdir(head.ddpath);
        if (res != -1) {
                res = unlink(head.dmpath);
                if (res == -1) {
                        if (errno != ENOENT) {
                                warn("unlink %s", head.dmpath);
                                return -EIO;
                        }
                } else {
                        res = check_empty_dir(head.bpath);
                        if (res < 0)
                                return res;
                }
        } else if (errno == ENOENT) {
                res = check_empty_dir(head.bpath);
                if (res < 0)
                        return res;
        } else if (errno == ENOTEMPTY) {
                res = check_exist(head.dmpath);
                if (res < 0)
                        return res;
                if (!res)
                        return -ENOTEMPTY;

                res = check_empty_merged(&head);
                if (res < 0)
                        return res;
        } else {
                warn("rmdir %s", head.ddpath);
                return -EIO;
        }

        return remove_base(&head, path);
}

static int delta_rename(const char *from, const char *to)
{
        int res;
        struct deltaheader hfrom;
        struct deltaheader hto;
        int need_redir = 1;
        struct stat stbuf;

        res = get_deltaheader(&hfrom, from);
        if (res < 0)
                return res;

        res = get_deltaheader(&hto, to);
        if (res < 0)
                return res;

        res = lstat(hto.ddpath, &stbuf);
        if (res == -1) {
                if (errno != ENOENT) {
                        warn("stat %s", hto.ddpath);
                        return -EIO;
                }
                res = lstat(hto.bpath, &stbuf);
                if (res == -1) {
                        if (errno != ENOENT) {
                                warn("stat %s", hto.bpath);
                                return -EIO;
                        }
                } else {
                        if (S_ISDIR(stbuf.st_mode)) {
                                res = check_empty_dir(hto.bpath);
                                if (res < 0)
                                        return res;
                        }
                }
        } else {
                if (S_ISDIR(stbuf.st_mode)) {
                        res = check_exist(hto.dmpath);
                        if (res < 0)
                                return res;

                        if (res) {
                                res = check_empty_merged(&hto);
                                if (res < 0)
                                        return res;

                                res = remove_empty_dir(&hto);
                                if (res < 0)
                                        return res;
                        }
                }
        }

        res = create_parent(to);
        if (res < 0)
                return res;

        res = rename(hfrom.ddpath, hto.ddpath);
        if (res != -1) {
                res = rename(hfrom.dmpath, hto.dmpath);
                if (res == -1) {
                        if (errno != ENOENT) {
                                warn("rename %s %s", hfrom.dmpath, hto.dmpath);
                                return -EIO;
                        }
                        need_redir = 0;
                }
        } else if (errno == ENOENT) {
                res = unlink(hto.ddpath);
                if (res == -1) {
                        if (errno == EISDIR) {
                                res = rmdir(hto.ddpath);
                                if (res == -1) {
                                        if (errno == ENOTEMPTY)
                                                return -ENOTEMPTY;
                                        warn("rmdir %s", hto.ddpath);
                                        return -EIO;
                                }
                        } else if (errno != ENOENT) {
                                warn("unlink %s", hto.ddpath);
                                return -EIO;
                        }
                }
        } else if (errno == ENOTEMPTY) {
                return -ENOTEMPTY;
        } else {
                warn("rename %s %s", hfrom.ddpath, hto.ddpath);
                return -EIO;
        }

        if (need_redir) {
                res = rename(hfrom.drpath, hto.drpath);
                if (res == -1) {
                        pathstr link;

                        if (errno != ENOENT) {
                                warn("rename %s %s", hfrom.drpath, hto.drpath);
                                return -EIO;
                        }

                        res = unlink(hto.drpath);
                        if (res == -1 && errno != ENOENT) {
                                warn("unlink %s", hto.drpath);
                                return -EIO;
                        }

                        snprintf(link, sizeof(pathstr), "*%s",
                                 hfrom.bpath + strlen(baseroot));
                        res = symlink(link, hto.drpath);
                        if (res == -1) {
                                warn("symlink %s", hto.drpath);
                                return -EIO;
                        }
                }
        }

        return remove_base(&hfrom, from);
}

static int delta_chmod(const char *path, mode_t mode)
{
        int res;
        struct deltaheader head;
        struct stat stbuf;

        res = get_deltaheader(&head, path);
        if (res < 0)
                return res;

        res = chmod(head.ddpath, mode);
        if (res == 0)
                return 0;

        if (errno != ENOENT) {
                warn("chmod %s", head.ddpath);
                return -EIO;
        }
        res = lstat(head.bpath, &stbuf);
        if (res == -1)
                return -errno;

        mode &= 07777;
        if ((stbuf.st_mode & 07777) == mode)
                return 0;

        stbuf.st_mode &= S_IFMT;
        stbuf.st_mode |= mode;

        return copy_up(&head, path, &stbuf);
}

static int delta_chown(const char *path, uid_t uid, gid_t gid)
{
        int res;
        struct deltaheader head;
        struct stat stbuf;

        res = get_deltaheader(&head, path);
        if (res < 0)
                return res;

        res = lchown(head.ddpath, uid, gid);
        if (res == 0)
                return 0;

        if (errno != ENOENT) {
                warn("chown %s", head.ddpath);
                return -EIO;
        }
        res = lstat(head.bpath, &stbuf);
        if (res == -1)
                return -errno;

        if ((uid == -1 || stbuf.st_uid == uid) &&
            (gid == -1 || stbuf.st_gid == gid))
                return 0;

        if (uid != -1)
                stbuf.st_uid = uid;
        if (gid != -1)
                stbuf.st_gid = gid;

        return copy_up(&head, path, &stbuf);
}

static int delta_truncate(const char *path, off_t size)
{
        int res;
        struct deltaheader head;
        struct stat stbuf;
        off_t oldsize;
        int need_copy = 0;

        res = get_deltaheader(&head, path);
        if (res < 0)
                return res;

        res = lstat(head.ddpath, &stbuf);
        if (res == -1) {
                if (errno != ENOENT) {
                        warn("stat %s", head.ddpath);
                        return -EIO;
                }
                res = lstat(head.bpath, &stbuf);
                if (res == -1)
                        return -errno;

                if (size == stbuf.st_size)
                        return 0;

                oldsize = stbuf.st_size;
                stbuf.st_size = size;
                res = copy_up(&head, path, &stbuf);
                if (res < 0)
                        return res;

                need_copy = 1;
        } else {
                oldsize = stbuf.st_size;

                res = truncate(head.ddpath, size);
                if (res == -1) {
                        warn("truncate %s", head.ddpath);
                        return -EIO;
                }

                if (size == 0) {
                        res = unlink(head.dmpath);
                        if (res == -1 && errno != ENOENT) {
                                warn("unlink %s", head.dmpath);
                                return -EIO;
                        }
                        res = unlink(head.drpath);
                        if (res == -1 && errno != ENOENT) {
                                warn("unlink %s", head.drpath);
                                return -EIO;
                        }
                }
        }


        if (size <= oldsize)
                return 0;

        if (!need_copy) {
                res = check_exist(head.dmpath);
                if (res < 0)
                        return res;

                if (!res)
                        return 0;
        }

        res = extend_file(&head, oldsize, size);
        if (res < 0)
                return res;

        return 0;
}

static int delta_utimens(const char *path, const struct timespec ts[2])
{
        int res;
        struct deltaheader head;
        struct stat stbuf;

        res = get_deltaheader(&head, path);
        if (res < 0)
                return res;

        res = utimensat(AT_FDCWD, head.ddpath, ts, AT_SYMLINK_NOFOLLOW);
        if (res == 0)
                return 0;

        if (errno != ENOENT) {
                warn("utimes %s", head.ddpath);
                return -EIO;
        }
        res = lstat(head.bpath, &stbuf);
        if (res == -1)
                return -errno;

        if ((ts[0].tv_nsec == UTIME_OMIT ||
             (ts[0].tv_nsec == stbuf.st_atim.tv_nsec &&
              ts[0].tv_sec == stbuf.st_atim.tv_sec)) ||
            (ts[1].tv_nsec == UTIME_OMIT ||
             (ts[1].tv_nsec == stbuf.st_mtim.tv_nsec &&
              ts[1].tv_sec == stbuf.st_mtim.tv_sec)))
                return 0;

        if (ts[0].tv_nsec != UTIME_OMIT)
                stbuf.st_atim = ts[0];
        if (ts[1].tv_nsec != UTIME_OMIT)
                stbuf.st_mtim = ts[1];

        return copy_up(&head, path, &stbuf);
}

static int delta_read(const char *path, char *buf, size_t size, off_t offset,
                    struct fuse_file_info *fi)
{
        int res;
        size_t num = 0;
        struct deltaheader head;

        (void) fi;

        res = get_deltaheader(&head, path);
        if (res < 0)
                return res;

        res = check_exist(head.ddpath);
        if (res < 0)
                return res;

        if (!res)
                return read_file(head.bpath, buf, offset, size);

        res = check_exist(head.dmpath);
        if (res < 0)
                return res;
        if (!res)
                return read_file(head.ddpath, buf, offset, size);

        while (size) {
                res = read_block(&head, buf, offset, size);
                if (res < 0)
                        break;

                num += res;
                offset += res;
                buf += res;
                size -= res;

                if (res != BLOCK_SIZE)
                        break;
        }

        return num ? num : res;
}

static int delta_write(const char *path, const char *buf, size_t size,
                     off_t offset, struct fuse_file_info *fi)
{
        int res;
        struct deltaheader head;
        struct stat stbuf;
        off_t blkoffset;

        res = get_deltaheader(&head, path);
        if (res < 0)
                return res;

        res = lstat(head.ddpath, &stbuf);
        if (res == -1) {
                if (errno != ENOENT) {
                        warn("stat %s", head.ddpath);
                        return -EIO;
                }
                res = lstat(head.bpath, &stbuf);
                if (res == -1)
                        return -errno;

                res = copy_up(&head, path, &stbuf);
                if (res < 0)
                        return res;
        } else {
                res = check_exist(head.dmpath);
                if (res < 0)
                        return res;
                if (!res)
                        return write_file(head.ddpath, buf, offset, size);
        }

        if (offset > stbuf.st_size) {
                res = extend_file(&head, stbuf.st_size, offset);
                if (res < 0)
                        return res;
        }

        assert((offset & BLOCK_MASK) + size <= BLOCK_SIZE);

        if ((offset & BLOCK_MASK) == 0 && size == BLOCK_SIZE)
                return write_block(&head, buf, offset, size);

        blkoffset = offset & ~BLOCK_MASK;
        res = is_bitmap_delta(&head, blkoffset);
        if (res < 0)
                return res;

        if (!res) {
                char blkbuf[BLOCK_SIZE];
                size_t num = stbuf.st_size - blkoffset;
                off_t newsize;

                res = read_block(&head, blkbuf, blkoffset, num);
                if (res < 0)
                        return res;

                memcpy(blkbuf + (offset & BLOCK_MASK), buf, size);

                newsize = stbuf.st_size;
                if (offset + size > newsize)
                        newsize = offset + size;
                num = newsize - blkoffset;
                res = write_block(&head, blkbuf, blkoffset, num);
                if (res < 0)
                        return res;
        } else {
                res = write_file(head.ddpath, buf, offset, size);
                if (res < 0)
                        return res;
        }

        return size;
}

static struct fuse_operations delta_oper = {
        .getattr = delta_getattr,
        .readlink = delta_readlink,
        .readdir = delta_readdir,
        .mknod = delta_mknod,
        .mkdir = delta_mkdir,
        .symlink = delta_symlink,
        .unlink = delta_unlink,
        .rmdir = delta_rmdir,
        .rename = delta_rename,
        .chmod = delta_chmod,
        .chown = delta_chown,
        .truncate = delta_truncate,
        .utimens = delta_utimens,
        .read = delta_read,
        .write = delta_write,
};

static int delta_opt_proc(void *data, const char *arg, int key,
                          struct fuse_args *outargs)
{
        (void) data;
        (void) outargs;

        if (key == FUSE_OPT_KEY_NONOPT) {
                if (baseroot == NULL) {
                        baseroot = strdup(arg);
                        assert(baseroot != NULL);
                        return 0;
                } else if (deltaroot == NULL) {
                        deltaroot = strdup(arg);
                        assert(deltaroot != NULL);
                        return 0;
                }
        }
        return 1;
}

int main(int argc, char *argv[])
{
        int res;
        struct fuse_args args = FUSE_ARGS_INIT(argc, argv);

        umask(0);
        res = fuse_opt_parse(&args, NULL, NULL, delta_opt_proc);
        if (res == -1)
                exit(1);
        fuse_opt_add_arg(&args, "-s");
        fuse_opt_add_arg(&args, "-oallow_other,default_permissions");
        if (res == -1)
                exit(1);

        if (baseroot == NULL || deltaroot == NULL) {
                fprintf(stderr,
                        "usage: %s [opts] baseroot deltaroot mountpoint\n",
                        argv[0]);
                exit(1);
        }

        res = fuse_main(args.argc, args.argv, &delta_oper, NULL);
        fuse_opt_free_args(&args);

        return res;
}

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

Goswin von Brederlow-2
Miklos Szeredi <[hidden email]> writes:

> Here is my first try at a "delta" filesystem.  It takes two
> directories, one of which is a read-only base, and the other is where
> the differences are stored.  It stores data, metadata and directory
> modifications without copying up whole files from the read-only
> branch.
>
> The layout of the delta store may look similar to the writable branch
> of a union fs, but this is basically just coincidence (it was easier
> to start out this way).
>
> Currently it's implemented with fuse and it's not optimized at all, so
> performance may suck in some cases.  But I think this is a useful
> concept and a better model, than trying to fit writable branches into
> a union filesystem.
>
> Comments, bug reports are welcome.
>
> Thanks,
> Miklos

Wouldn't it make more sense to start with unionfs-fuse and add a delta
feature to it? unionfs-fuse already has all you need except that it
will copy the whole file (if on a read-only branch) on write.

MfG
        Goswin

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

Bernd Schubert
On Saturday 28 February 2009, Goswin von Brederlow wrote:

> Miklos Szeredi <[hidden email]> writes:
> > Here is my first try at a "delta" filesystem.  It takes two
> > directories, one of which is a read-only base, and the other is where
> > the differences are stored.  It stores data, metadata and directory
> > modifications without copying up whole files from the read-only
> > branch.
> >
> > The layout of the delta store may look similar to the writable branch
> > of a union fs, but this is basically just coincidence (it was easier
> > to start out this way).
> >
> > Currently it's implemented with fuse and it's not optimized at all, so
> > performance may suck in some cases.  But I think this is a useful
> > concept and a better model, than trying to fit writable branches into
> > a union filesystem.
> >
> > Comments, bug reports are welcome.
> >
> > Thanks,
> > Miklos
>
> Wouldn't it make more sense to start with unionfs-fuse and add a delta
> feature to it? unionfs-fuse already has all you need except that it
> will copy the whole file (if on a read-only branch) on write.

Well yes, but it would need to be configurable by the user. IMMHO, the 'delta'
ansatz has a big problem - what happens if the admin decides to modify the
underlying ro-branch, which is a distribution chroot seen by all clients as
their '/'? Any time files may be modified or even deleted on this branch when
the admin does an update.
This is also the exactly the problem why none of the kernel unionfs
implementation fits my needs and why I started to work on unionfs-fuse. Only,
the delta approach makes it even worse ;)


Cheers,
Bernd

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

LCID Fire
In reply to this post by Miklos Szeredi
Miklos Szeredi wrote:

> Here is my first try at a "delta" filesystem.  It takes two
> directories, one of which is a read-only base, and the other is where
> the differences are stored.  It stores data, metadata and directory
> modifications without copying up whole files from the read-only
> branch.
>
> The layout of the delta store may look similar to the writable branch
> of a union fs, but this is basically just coincidence (it was easier
> to start out this way).
>
> Currently it's implemented with fuse and it's not optimized at all, so
> performance may suck in some cases.  But I think this is a useful
> concept and a better model, than trying to fit writable branches into
> a union filesystem.
>
> Comments, bug reports are welcome.
Well just as a quick idea - wouldn't it be possible to create a fuse
filesystem on top of a git repository? They are pretty much already
doing deltas all the time...

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

Mike Hommey
On Sun, Mar 01, 2009 at 09:09:59AM +0100, LCID Fire wrote:

> Miklos Szeredi wrote:
> > Here is my first try at a "delta" filesystem.  It takes two
> > directories, one of which is a read-only base, and the other is where
> > the differences are stored.  It stores data, metadata and directory
> > modifications without copying up whole files from the read-only
> > branch.
> >
> > The layout of the delta store may look similar to the writable branch
> > of a union fs, but this is basically just coincidence (it was easier
> > to start out this way).
> >
> > Currently it's implemented with fuse and it's not optimized at all, so
> > performance may suck in some cases.  But I think this is a useful
> > concept and a better model, than trying to fit writable branches into
> > a union filesystem.
> >
> > Comments, bug reports are welcome.
> Well just as a quick idea - wouldn't it be possible to create a fuse
> filesystem on top of a git repository? They are pretty much already
> doing deltas all the time...

There is one already, except it hasn't been updated in a while. The
other problem is that git doesn't have a stable API, which makes
building something like this, at least in C, a big problem.

Mike

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

Goswin von Brederlow-2
In reply to this post by Bernd Schubert
Bernd Schubert <[hidden email]> writes:

> On Saturday 28 February 2009, Goswin von Brederlow wrote:
>> Miklos Szeredi <[hidden email]> writes:
>> > Here is my first try at a "delta" filesystem.  It takes two
>> > directories, one of which is a read-only base, and the other is where
>> > the differences are stored.  It stores data, metadata and directory
>> > modifications without copying up whole files from the read-only
>> > branch.
>> >
>> > The layout of the delta store may look similar to the writable branch
>> > of a union fs, but this is basically just coincidence (it was easier
>> > to start out this way).
>> >
>> > Currently it's implemented with fuse and it's not optimized at all, so
>> > performance may suck in some cases.  But I think this is a useful
>> > concept and a better model, than trying to fit writable branches into
>> > a union filesystem.
>> >
>> > Comments, bug reports are welcome.
>> >
>> > Thanks,
>> > Miklos
>>
>> Wouldn't it make more sense to start with unionfs-fuse and add a delta
>> feature to it? unionfs-fuse already has all you need except that it
>> will copy the whole file (if on a read-only branch) on write.
>
> Well yes, but it would need to be configurable by the user. IMMHO, the 'delta'
> ansatz has a big problem - what happens if the admin decides to modify the
> underlying ro-branch, which is a distribution chroot seen by all clients as
> their '/'? Any time files may be modified or even deleted on this branch when
> the admin does an update.
> This is also the exactly the problem why none of the kernel unionfs
> implementation fits my needs and why I started to work on unionfs-fuse. Only,
> the delta approach makes it even worse ;)
>
>
> Cheers,
> Bernd

Obviously with delta storage the underlying branches really MUST be
read-only. As for configuration I see no problem in clasifying a
branch as RO, RW or D. If the user wants a modifiable RO branch then
he can not have any D branch. Easy enough.

MfG
        Goswin

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

Goswin von Brederlow-2
In reply to this post by LCID Fire
LCID Fire <[hidden email]> writes:

> Miklos Szeredi wrote:
>> Here is my first try at a "delta" filesystem.  It takes two
>> directories, one of which is a read-only base, and the other is where
>> the differences are stored.  It stores data, metadata and directory
>> modifications without copying up whole files from the read-only
>> branch.
>>
>> The layout of the delta store may look similar to the writable branch
>> of a union fs, but this is basically just coincidence (it was easier
>> to start out this way).
>>
>> Currently it's implemented with fuse and it's not optimized at all, so
>> performance may suck in some cases.  But I think this is a useful
>> concept and a better model, than trying to fit writable branches into
>> a union filesystem.
>>
>> Comments, bug reports are welcome.
> Well just as a quick idea - wouldn't it be possible to create a fuse
> filesystem on top of a git repository? They are pretty much already
> doing deltas all the time...

http://www.sfgoth.com/~mitch/linux/gitfs/

MfG
        Goswin

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

J. R. Okajima
In reply to this post by Miklos Szeredi

Miklos Szeredi:
> Here is my first try at a "delta" filesystem.  It takes two
        :::
> Comments, bug reports are welcome.

As I wrote before, it is unclear how do you implment mmap.
I am afrid you need to copyup the entire file to support MAP_SHARED.

Since you don't care the inum, the hardlink will not work correctly. For
instance,
$ cd /base
$ echo a > a
$ ln a b
$ cd /deltafs
$ echo a >> a
$ cat b

By the way, are you going to review the aufs2-tmp-ro branch which I made
it responding you.


J. R. Okajima

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

Goswin von Brederlow-2
[hidden email] writes:

> Miklos Szeredi:
>> Here is my first try at a "delta" filesystem.  It takes two
> :::
>> Comments, bug reports are welcome.
>
> As I wrote before, it is unclear how do you implment mmap.
> I am afrid you need to copyup the entire file to support MAP_SHARED.

Why? When a page is accessed the filesystem gets a read request and
reads it either from the RO branch or the delta branch. Why should it
need to copy up the full file?

MfG
        Goswin

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

J. R. Okajima

Goswin von Brederlow:
> Why? When a page is accessed the filesystem gets a read request and
> reads it either from the RO branch or the delta branch. Why should it
> need to copy up the full file?

For example,
- you have two mmap for a single file
- the first mapping is done, it may map the file on the lower rdonly
  layer
- the other mapping modifies the contents
- can the first mapping see the new content?

Of course, it may depend on the implementation.


J. R. Okajima

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

Dave Kleikamp-3
On Tue, 2009-03-03 at 22:11 +0900, [hidden email] wrote:

> Goswin von Brederlow:
> > Why? When a page is accessed the filesystem gets a read request and
> > reads it either from the RO branch or the delta branch. Why should it
> > need to copy up the full file?
>
> For example,
> - you have two mmap for a single file
> - the first mapping is done, it may map the file on the lower rdonly
>   layer
> - the other mapping modifies the contents
> - can the first mapping see the new content?

The lower read-only file would not appear to user-space as the same file
at all.  It would have its own address space.  It clearly would not see
the new content.

There wouldn't be any problem with reading pages from the lower file for
the upper file one at a time as they are faulted.

Shaggy
--
David Kleikamp
IBM Linux Technology Center


------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

J. R. Okajima

Dave Kleikamp:
> The lower read-only file would not appear to user-space as the same file
> at all.  It would have its own address space.  It clearly would not see
> the new content.
>
> There wouldn't be any problem with reading pages from the lower file for
> the upper file one at a time as they are faulted.

I am afraid that I cannot understand fully what you wrote (due to my
poor English).
Do you mean that the entire file copyup will be necessary?


J. R. Okajima

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

Dave Kleikamp-3
On Wed, 2009-03-04 at 00:50 +0900, [hidden email] wrote:

> Dave Kleikamp:
> > The lower read-only file would not appear to user-space as the same file
> > at all.  It would have its own address space.  It clearly would not see
> > the new content.
> >
> > There wouldn't be any problem with reading pages from the lower file for
> > the upper file one at a time as they are faulted.
>
> I am afraid that I cannot understand fully what you wrote (due to my
> poor English).
> Do you mean that the entire file copyup will be necessary?

No.  I was saying the opposite.  Nothing that happens to the upper
address space would be visible to the lower address space.  The upper
file could read from the lower file system on-demand as pages are
faulted.  There is no need to copy everything at once.

Shaggy
--
David Kleikamp
IBM Linux Technology Center


------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

J. R. Okajima

Dave Kleikamp:
> No.  I was saying the opposite.  Nothing that happens to the upper
> address space would be visible to the lower address space.  The upper
> file could read from the lower file system on-demand as pages are
> faulted.  There is no need to copy everything at once.

So you mean,
- you have two mmap for a single file
- the first mapping is done, it may map the file on the lower rdonly
  layer
- the other mapping modifies the contents
- when a page in the first mapping accessed again, the page is read from
  the upper layer.
Right?


J. R. Okajima


------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

Dave Kleikamp-3
On Wed, 2009-03-04 at 01:02 +0900, [hidden email] wrote:
> Dave Kleikamp:
> > No.  I was saying the opposite.  Nothing that happens to the upper
> > address space would be visible to the lower address space.  The upper
> > file could read from the lower file system on-demand as pages are
> > faulted.  There is no need to copy everything at once.
>
> So you mean,
> - you have two mmap for a single file

Explain what you mean by a single file.  If there are two mmaps to the
upper file, they will see the same changes.  If one is to the upper
file, and one is to the lower file, they will NOT be mmap'ed to the same
file.

> - the first mapping is done, it may map the file on the lower rdonly
>   layer

This mapping will only ever see the lower contents

> - the other mapping modifies the contents

The upper mapping will contain data pages with modified content.  Only
those pages accessed will be copied (if necessary) from the lower file.

> - when a page in the first mapping accessed again, the page is read from
>   the upper layer.

No.  The first mapping is not even aware of the second mapping.  It
continues to see the read-only data

> Right?
No

Shaggy
--
David Kleikamp
IBM Linux Technology Center


------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

J. R. Okajima

Dave Kleikamp:
> > - you have two mmap for a single file
>
> Explain what you mean by a single file.  If there are two mmaps to the
> upper file, they will see the same changes.  If one is to the upper
> file, and one is to the lower file, they will NOT be mmap'ed to the same
> file.

A regular file on the lower readonly layer. Not a single block/page is
copied-up yet.


> No.  The first mapping is not even aware of the second mapping.  It
> continues to see the read-only data

Should the first mapping see the latest (modified) content?


J. R. Okajima

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

Dave Kleikamp-3
On Wed, 2009-03-04 at 01:19 +0900, [hidden email] wrote:

> Dave Kleikamp:
> > > - you have two mmap for a single file
> >
> > Explain what you mean by a single file.  If there are two mmaps to the
> > upper file, they will see the same changes.  If one is to the upper
> > file, and one is to the lower file, they will NOT be mmap'ed to the same
> > file.
>
> A regular file on the lower readonly layer. Not a single block/page is
> copied-up yet.

I understand the file physically resides on the lower layer.  The delta
file system will presents a new file on a different path that initially
has the contents of the lower file.  If you are mmapping the file
presented by the delta files system, then both mmaps will see the same
modified data.

Any mmaps to the original path of the read-only file system will not see
any modified data.

dd if=/dev/zero of=/ro/a bs=4096 count=1000
# /ro/a contains 1000 pages of zeros
mount -o remount,ro /ro # make sure it's read-only
mount -t deltafs /ro /rw
 mmap(addr1, "/ro/a", ...);
 mmap(addr2, "/rw/a", ...);
mmap(addr3, "/rw/a", ...);

If you modify the file through addr3, addr2 will see the changes.  the
address space for /rw/a is shared.  However, addr1 will only see the
read-only data.  /ro/a will not be modified.

deltafs (does this filesystem have a name yet?) does not need to copy
all 1000 pages from the lower file.  It can read from the lower file as
needed when a page is accessed.  The modified data will be present in
the upper address space's page cache.  I assume it will eventually be
written as a "delta" in the upper file system.

> > No.  The first mapping is not even aware of the second mapping.  It
> > continues to see the read-only data
>
> Should the first mapping see the latest (modified) content?

If you're asking about the case of addr2 and addr3 above, then yes.  For
addr1, no.

--
David Kleikamp
IBM Linux Technology Center


------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

J. R. Okajima

Dave Kleikamp:
> I understand the file physically resides on the lower layer.  The delta
> file system will presents a new file on a different path that initially
> has the contents of the lower file.  If you are mmapping the file
> presented by the delta files system, then both mmaps will see the same
> modified data.

Ok, I will read fuse (instead of deltafs.c) again.
Thank you.


J. R. Okajima

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

Miklos Szeredi
In reply to this post by Goswin von Brederlow-2
On Sun, 01 Mar 2009, Goswin von Brederlow wrote:
> Obviously with delta storage the underlying branches really MUST be
> read-only. As for configuration I see no problem in clasifying a
> branch as RO, RW or D. If the user wants a modifiable RO branch then
> he can not have any D branch. Easy enough.

Not necessarily.  A delta filesystem could support automatic or manual
merges very well.  This is not really possible with a writable union.

Miklos

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
Reply | Threaded
Open this post in threaded view
|

Re: delta filesystem prototype

Goswin von Brederlow-2
In reply to this post by J. R. Okajima
[hidden email] writes:

> Goswin von Brederlow:
>> Why? When a page is accessed the filesystem gets a read request and
>> reads it either from the RO branch or the delta branch. Why should it
>> need to copy up the full file?
>
> For example,
> - you have two mmap for a single file

On open you have to create an internal FD structure that keeps track
of the delta informtaions and the read-only and delta filedescriptors
(one or both can be -1). If the same file is opened again you have to
use the same internal FD structure so both share the same delta
informations.

> - the first mapping is done, it may map the file on the lower rdonly
>   layer
> - the other mapping modifies the contents
> - can the first mapping see the new content?

Both mmaps would be to the internal FD and thus share any change made
by one of them.

> Of course, it may depend on the implementation.

Sure. If you don't create an internal FD structure and share it
between open calls things will break. Not just mmap.

MfG
        Goswin

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/fuse-devel
123