diff --git a/archive/doc.go b/archive/doc.go new file mode 100644 index 0000000000000000000000000000000000000000..f3c4789069fae9ffe4a8dbe2f02f940e98488471 --- /dev/null +++ b/archive/doc.go @@ -0,0 +1,4 @@ +/* +Package archive provides secure functions for working with archive files. +*/ +package archive diff --git a/archive/errors.go b/archive/errors.go new file mode 100644 index 0000000000000000000000000000000000000000..29dff74dbe33ced1cae3fce0bacbc4dd048e6d46 --- /dev/null +++ b/archive/errors.go @@ -0,0 +1,16 @@ +package archive + +import "fmt" + +// InvalidFilenameError is the error returned by extract functions when a file name inside an +// archive contains directory traversals (../) or other sequences that attempt to manipulate the +// final destination path. +type InvalidFilenameError struct { + // Name contains the invalid file name. + Name string +} + +// Error returns a formatted error message. +func (e *InvalidFilenameError) Error() string { + return fmt.Sprintf("invalid filename: %s", e.Name) +} diff --git a/archive/reader.go b/archive/reader.go new file mode 100644 index 0000000000000000000000000000000000000000..f90694d46a6e224f0d5e2dca245ae2325fd341ac --- /dev/null +++ b/archive/reader.go @@ -0,0 +1,24 @@ +package archive + +import ( + "context" + "io" +) + +// CtxReader provides a context-aware io.Reader for extraction functions. +type CtxReader struct { + ctx context.Context + r io.Reader +} + +func (r *CtxReader) Read(p []byte) (int, error) { + if err := r.ctx.Err(); err != nil { + return 0, err + } + return r.r.Read(p) +} + +// NewReader returns a context-aware io.Reader. +func NewReader(ctx context.Context, r io.Reader) io.Reader { + return &CtxReader{ctx, r} +} diff --git a/archive/tar/extract.go b/archive/tar/extract.go new file mode 100644 index 0000000000000000000000000000000000000000..ebd5a23efdc50e64a19c3553b7e4ae9dba1dac3d --- /dev/null +++ b/archive/tar/extract.go @@ -0,0 +1,137 @@ +package tar + +import ( + "archive/tar" + "compress/bzip2" + "compress/gzip" + "context" + "errors" + "io" + "io/fs" + "os" + "path/filepath" + "strings" + + "gitlab.com/gitlab-org/labkit/archive" +) + +const ( + gzipMagicBytes = "\x1f\x8b" + bzip2MagicBytes = "\x42\x5a\x68" +) + +// Extract safely extracts an io.ReaderAt assumed to contain a tarball archive to the given +// destination path. If the destination path does not exist, it is automatically created. +// +// Gzip (tar.gz) and Bzip2 (tar.bz2) compression is automatically detected and decompressed. If no +// compression is detected, it is assumed to be a standard tarball with no compression. +// +// If the archive contains file names with Zip Slip-style directory traversal (../) and other +// sequences that attempt to have the file extracted outside of the intended destination directory, +// the function will stop extraction and return archive.InvalidFilenameError. +// +// The function only supports extraction of directories and regular files. +// Symbolic links, device files, named pipes, unix domain sockets, and other irregular file types +// are ignored as they are considered uncommon and can present a security risk. +// +// The function accepts a context in order to make it possible to cancel or time out the archive +// extraction, however, this is not a bullet proof protection against resource exhaustion attacks +// like Zip Bombs. If this is a concern, it is recommended to isolate archive extraction into its +// own process and limit resources with operating system controls like ulimit. +// +// No clean-up of extraction is performed if an error occurs, or if the context is caneled or times +// out. +// +// References: +// Zip Slip: https://snyk.io/research/zip-slip-vulnerability +// Zip Bomb: https://en.wikipedia.org/wiki/Zip_bomb +// Zip Symlink vulnerability: https://effortlesssecurity.in/2020/08/05/zip-symlink-vulnerability/ +// ulimit: https://ss64.com/bash/ulimit.html +func Extract(ctx context.Context, r io.ReadSeeker, dest string) error { + if err := os.MkdirAll(dest, 0750); err != nil { + return err + } + cleanDest := filepath.Clean(dest) + string(os.PathSeparator) + + stream, err := detectCompression(r) + if err != nil { + return err + } + + tr := tar.NewReader(stream) + + for { + if err := ctx.Err(); err != nil { + return err + } + + header, err := tr.Next() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return err + } + if header.Name == "./" { + continue + } + + path := filepath.Join(dest, header.Name) // #nosec G305 + if !strings.HasPrefix(path, cleanDest) { + return &archive.InvalidFilenameError{Name: header.Name} + } + + if err := writeEntry(ctx, tr, header, path); err != nil { + return err + } + } + return nil +} + +func detectCompression(r io.ReadSeeker) (io.Reader, error) { + peek := make([]byte, 3) + if _, err := r.Read(peek); err != nil { + return nil, err + } + if _, err := r.Seek(0, 0); err != nil { + return nil, err + } + if string(peek) == bzip2MagicBytes { + return bzip2.NewReader(r), nil + } + if strings.HasPrefix(string(peek), gzipMagicBytes) { + return gzip.NewReader(r) + } + return r, nil +} + +func writeEntry(ctx context.Context, tr io.Reader, h *tar.Header, dest string) error { + switch h.Typeflag { + case tar.TypeDir: + if err := os.MkdirAll(dest, 0755); err != nil { + return err + } + case tar.TypeReg: + if err := writeFile(ctx, tr, h, dest); err != nil { + return err + } + } + + return nil +} + +func writeFile(ctx context.Context, tr io.Reader, h *tar.Header, dest string) error { + if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil { + return err + } + df, err := os.OpenFile(dest, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, fs.FileMode(h.Mode)) + if err != nil { + return err + } + defer df.Close() + + if _, err := io.Copy(df, archive.NewReader(ctx, tr)); err != nil { + return err + } + return nil +} diff --git a/archive/tar/extract_test.go b/archive/tar/extract_test.go new file mode 100644 index 0000000000000000000000000000000000000000..45bd57bd41ed945aca8bb6b51d1af2f72ba9ff5b --- /dev/null +++ b/archive/tar/extract_test.go @@ -0,0 +1,211 @@ +package tar_test + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/labkit/archive" + "gitlab.com/gitlab-org/labkit/archive/tar" +) + +func TestExtractTar(t *testing.T) { + dest, err := os.MkdirTemp("", "labkit-test") + require.NoError(t, err) + defer os.RemoveAll(dest) + + f, err := os.Open("testdata/simple.tar") + require.NoError(t, err) + defer f.Close() + + require.NoError(t, tar.Extract(context.Background(), f, dest)) + + c, err := os.ReadFile(filepath.Join(dest, "hello.txt")) + require.NoError(t, err) + require.Equal(t, "Hello World!\n", string(c)) + + c, err = os.ReadFile(filepath.Join(dest, "subdir", "hello2.txt")) + require.NoError(t, err) + require.Equal(t, "Hello World!\n", string(c)) +} + +func TestExtractTarGzip(t *testing.T) { + dest, err := os.MkdirTemp("", "labkit-test") + require.NoError(t, err) + defer os.RemoveAll(dest) + + f, err := os.Open("testdata/simple.tar.gz") + require.NoError(t, err) + defer f.Close() + + require.NoError(t, tar.Extract(context.Background(), f, dest)) + + c, err := os.ReadFile(filepath.Join(dest, "hello.txt")) + require.NoError(t, err) + require.Equal(t, "Hello World!\n", string(c)) + + c, err = os.ReadFile(filepath.Join(dest, "subdir", "hello2.txt")) + require.NoError(t, err) + require.Equal(t, "Hello World!\n", string(c)) +} + +func TestExtractTarBzip2(t *testing.T) { + dest, err := os.MkdirTemp("", "labkit-test") + require.NoError(t, err) + defer os.RemoveAll(dest) + + f, err := os.Open("testdata/simple.tar.bz2") + require.NoError(t, err) + defer f.Close() + + require.NoError(t, tar.Extract(context.Background(), f, dest)) + + c, err := os.ReadFile(filepath.Join(dest, "hello.txt")) + require.NoError(t, err) + require.Equal(t, "Hello World!\n", string(c)) + + c, err = os.ReadFile(filepath.Join(dest, "subdir", "hello2.txt")) + require.NoError(t, err) + require.Equal(t, "Hello World!\n", string(c)) +} + +func TestExtractTarZipSlip(t *testing.T) { + dest, err := os.MkdirTemp("", "labkit-test") + require.NoError(t, err) + defer os.RemoveAll(dest) + + f, err := os.Open("testdata/zipslip.tar") + require.NoError(t, err) + defer f.Close() + + err = tar.Extract(context.Background(), f, filepath.Join(dest, "extract")) + + require.IsType(t, &archive.InvalidFilenameError{}, err) + require.Equal(t, "invalid filename: ../zipslip.txt", err.Error()) + _, err = os.Stat(filepath.Join(dest, "zipslip.txt")) + require.ErrorIs(t, err, os.ErrNotExist) +} + +func TestExtractTarGzipZipSlip(t *testing.T) { + dest, err := os.MkdirTemp("", "labkit-test") + require.NoError(t, err) + defer os.RemoveAll(dest) + + f, err := os.Open("testdata/zipslip.tar.gz") + require.NoError(t, err) + defer f.Close() + + err = tar.Extract(context.Background(), f, filepath.Join(dest, "extract")) + + require.IsType(t, &archive.InvalidFilenameError{}, err) + require.Equal(t, "invalid filename: ../zipslip.txt", err.Error()) + _, err = os.Stat(filepath.Join(dest, "zipslip.txt")) + require.ErrorIs(t, err, os.ErrNotExist) +} + +func TestExtractTarBzip2ZipSlip(t *testing.T) { + dest, err := os.MkdirTemp("", "labkit-test") + require.NoError(t, err) + defer os.RemoveAll(dest) + + f, err := os.Open("testdata/zipslip.tar.bz2") + require.NoError(t, err) + defer f.Close() + + err = tar.Extract(context.Background(), f, filepath.Join(dest, "extract")) + + require.IsType(t, &archive.InvalidFilenameError{}, err) + require.Equal(t, "invalid filename: ../zipslip.txt", err.Error()) + _, err = os.Stat(filepath.Join(dest, "zipslip.txt")) + require.ErrorIs(t, err, os.ErrNotExist) +} + +func TestExtractTarSymlink(t *testing.T) { + _, err := os.Create("/tmp/labkit-archive-symlink-test.txt") // #nosec G303 + require.NoError(t, err) + defer os.RemoveAll("/tmp/labkit-archive-symlink-test.txt") + + dest, err := os.MkdirTemp("", "labkit-test") + require.NoError(t, err) + defer os.RemoveAll(dest) + + f, err := os.Open("testdata/symlink.tar") + require.NoError(t, err) + defer f.Close() + + require.NoError(t, tar.Extract(context.Background(), f, dest)) + + _, err = os.Stat(filepath.Join(dest, "labkit-archive-symlink-test.txt")) + require.ErrorIs(t, err, os.ErrNotExist) +} + +func TestExtractTarGzipSymlink(t *testing.T) { + _, err := os.Create("/tmp/labkit-archive-symlink-test.txt") // #nosec G303 + require.NoError(t, err) + defer os.RemoveAll("/tmp/labkit-archive-symlink-test.txt") + + dest, err := os.MkdirTemp("", "labkit-test") + require.NoError(t, err) + defer os.RemoveAll(dest) + + f, err := os.Open("testdata/symlink.tar.gz") + require.NoError(t, err) + defer f.Close() + + require.NoError(t, tar.Extract(context.Background(), f, dest)) + + _, err = os.Stat(filepath.Join(dest, "labkit-archive-symlink-test.txt")) + require.ErrorIs(t, err, os.ErrNotExist) +} + +func TestExtractTarBzip2Symlink(t *testing.T) { + _, err := os.Create("/tmp/labkit-archive-symlink-test.txt") // #nosec G303 + require.NoError(t, err) + defer os.RemoveAll("/tmp/labkit-archive-symlink-test.txt") + + dest, err := os.MkdirTemp("", "labkit-test") + require.NoError(t, err) + defer os.RemoveAll(dest) + + f, err := os.Open("testdata/symlink.tar.bz2") + require.NoError(t, err) + defer f.Close() + + require.NoError(t, tar.Extract(context.Background(), f, dest)) + + _, err = os.Stat(filepath.Join(dest, "labkit-archive-symlink-test.txt")) + require.ErrorIs(t, err, os.ErrNotExist) +} + +func TestExtractWithCanceledContext(t *testing.T) { + dest, err := os.MkdirTemp("", "labkit-test") + require.NoError(t, err) + defer os.RemoveAll(dest) + + f, err := os.Open("testdata/simple.tar") + require.NoError(t, err) + defer f.Close() + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + require.ErrorIs(t, tar.Extract(ctx, f, dest), context.Canceled) +} + +func TestExtractWithTimedOutContext(t *testing.T) { + dest, err := os.MkdirTemp("", "labkit-test") + require.NoError(t, err) + defer os.RemoveAll(dest) + + f, err := os.Open("testdata/simple.tar") + require.NoError(t, err) + defer f.Close() + + ctx, cancel := context.WithTimeout(context.Background(), -1*time.Second) + defer cancel() + + require.ErrorIs(t, tar.Extract(ctx, f, dest), context.DeadlineExceeded) +} diff --git a/archive/tar/testdata/simple.tar b/archive/tar/testdata/simple.tar new file mode 100644 index 0000000000000000000000000000000000000000..131977164efec9b392d92d4104f41fc928b4e9ad Binary files /dev/null and b/archive/tar/testdata/simple.tar differ diff --git a/archive/tar/testdata/simple.tar.bz2 b/archive/tar/testdata/simple.tar.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..22603beca6e84c40493e1c5d85fd4eb24de33e63 Binary files /dev/null and b/archive/tar/testdata/simple.tar.bz2 differ diff --git a/archive/tar/testdata/simple.tar.gz b/archive/tar/testdata/simple.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9e6345c724aeeb9c8b57900446cdd78402acf291 Binary files /dev/null and b/archive/tar/testdata/simple.tar.gz differ diff --git a/archive/tar/testdata/symlink.tar b/archive/tar/testdata/symlink.tar new file mode 100644 index 0000000000000000000000000000000000000000..2e6b6c99b59e881dd2d925e36be473421f0c2ec2 Binary files /dev/null and b/archive/tar/testdata/symlink.tar differ diff --git a/archive/tar/testdata/symlink.tar.bz2 b/archive/tar/testdata/symlink.tar.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..08361592eee12067756c33c024010540c08747dd Binary files /dev/null and b/archive/tar/testdata/symlink.tar.bz2 differ diff --git a/archive/tar/testdata/symlink.tar.gz b/archive/tar/testdata/symlink.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..49260ded806a3c87ebb1471dd941b27c6f61e641 Binary files /dev/null and b/archive/tar/testdata/symlink.tar.gz differ diff --git a/archive/tar/testdata/zipslip.tar b/archive/tar/testdata/zipslip.tar new file mode 100644 index 0000000000000000000000000000000000000000..528a5b36d03aea837f7bd1a6b9bdb721b37eaa6b Binary files /dev/null and b/archive/tar/testdata/zipslip.tar differ diff --git a/archive/tar/testdata/zipslip.tar.bz2 b/archive/tar/testdata/zipslip.tar.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..53f20b232579bc48a88d98a65a79cd8db2ed5e19 Binary files /dev/null and b/archive/tar/testdata/zipslip.tar.bz2 differ diff --git a/archive/tar/testdata/zipslip.tar.gz b/archive/tar/testdata/zipslip.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a0e180e0127d4490348f3363edeaa904c6cb7997 Binary files /dev/null and b/archive/tar/testdata/zipslip.tar.gz differ diff --git a/archive/zip/extract.go b/archive/zip/extract.go new file mode 100644 index 0000000000000000000000000000000000000000..446c8b2040cba121e52b42701575b05d1e785cee --- /dev/null +++ b/archive/zip/extract.go @@ -0,0 +1,97 @@ +package zip + +import ( + "archive/zip" + "context" + "io" + "os" + "path/filepath" + "strings" + + "gitlab.com/gitlab-org/labkit/archive" +) + +// Extract safely extracts an io.ReaderAt assumed to contain a zip archive to the given destination +// path. If the destination path does not exist, it is automatically created. +// +// If the archive contains file names with Zip Slip-style directory traversal (../) and other +// sequences that attempt to have the file extracted outside of the intended destination directory, +// the function will stop extraction and return archive.InvalidFilenameError. +// +// The function only supports extraction of directories and regular files. +// Symbolic links, device files, named pipes, unix domain sockets, and other irregular file types +// are ignored as they are considered uncommon and can present a security risk. +// +// The function accepts a context in order to make it possible to cancel or time out the archive +// extraction, however, this is not a bullet proof protection against resource exhaustion attacks +// like Zip Bombs. If this is a concern, it is recommended to isolate archive extraction into its +// own process and limit resources with operating system controls like ulimit. +// +// No clean-up of extraction is performed if an error occurs, or if the context is caneled or times +// out. +// +// References: +// Zip Slip: https://snyk.io/research/zip-slip-vulnerability +// Zip Bomb: https://en.wikipedia.org/wiki/Zip_bomb +// Zip Symlink vulnerability: https://effortlesssecurity.in/2020/08/05/zip-symlink-vulnerability/ +// ulimit: https://ss64.com/bash/ulimit.html +func Extract(ctx context.Context, r io.ReaderAt, size int64, dest string) error { + zr, err := zip.NewReader(r, size) + if err != nil { + return err + } + if err := os.MkdirAll(dest, 0750); err != nil { + return err + } + cleanDest := filepath.Clean(dest) + string(os.PathSeparator) + for _, zf := range zr.File { + if err := ctx.Err(); err != nil { + return err + } + + path := filepath.Join(dest, zf.Name) // #nosec G305 + if !strings.HasPrefix(path, cleanDest) { + return &archive.InvalidFilenameError{Name: zf.Name} + } + + if zf.FileInfo().IsDir() { + if err := os.MkdirAll(path, 0750); err != nil { + return err + } + continue + } + + if !zf.Mode().IsRegular() { + continue + } + + if err := writeFile(ctx, zf, path); err != nil { + return err + } + } + return nil +} + +func writeFile(ctx context.Context, zf *zip.File, dest string) error { + f, err := zf.Open() + if err != nil { + return err + } + defer f.Close() + + if err := os.MkdirAll(filepath.Dir(dest), 0750); err != nil { + return err + } + + wf, err := os.OpenFile(dest, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, zf.Mode()) + if err != nil { + return err + } + defer wf.Close() + + if _, err := io.Copy(wf, archive.NewReader(ctx, f)); err != nil { + return err + } + + return nil +} diff --git a/archive/zip/extract_test.go b/archive/zip/extract_test.go new file mode 100644 index 0000000000000000000000000000000000000000..f0303a39d68139c1b5a51881f29c9785347e8a75 --- /dev/null +++ b/archive/zip/extract_test.go @@ -0,0 +1,108 @@ +package zip_test + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/labkit/archive" + "gitlab.com/gitlab-org/labkit/archive/zip" +) + +func TestExtract(t *testing.T) { + dest, err := os.MkdirTemp("", "labkit-test") + require.NoError(t, err) + defer os.RemoveAll(dest) + + f, err := os.Open("testdata/simple.zip") + require.NoError(t, err) + defer f.Close() + fi, err := f.Stat() + require.NoError(t, err) + + require.NoError(t, zip.Extract(context.Background(), f, fi.Size(), dest)) + + c, err := os.ReadFile(filepath.Join(dest, "hello.txt")) + require.NoError(t, err) + require.Equal(t, "Hello World!\n", string(c)) + + c, err = os.ReadFile(filepath.Join(dest, "subdir", "hello2.txt")) + require.NoError(t, err) + require.Equal(t, "Hello World!\n", string(c)) +} + +func TestExtractWithZipSlip(t *testing.T) { + dest, err := os.MkdirTemp("", "labkit-test") + require.NoError(t, err) + defer os.RemoveAll(dest) + + f, err := os.Open("testdata/zipslip.zip") + require.NoError(t, err) + defer f.Close() + fi, err := f.Stat() + require.NoError(t, err) + + err = zip.Extract(context.Background(), f, fi.Size(), filepath.Join(dest, "extract")) + require.IsType(t, &archive.InvalidFilenameError{}, err) + require.Equal(t, "invalid filename: ../zipslip.txt", err.Error()) + _, err = os.Stat(filepath.Join(dest, "zipslip.txt")) + require.ErrorIs(t, err, os.ErrNotExist) +} + +func TestExtractWithSymlink(t *testing.T) { + _, err := os.Create("/tmp/labkit-archive-symlink-test.txt") // #nosec G303 + require.NoError(t, err) + defer os.RemoveAll("/tmp/labkit-archive-symlink-test.txt") + + dest, err := os.MkdirTemp("", "labkit-test") + require.NoError(t, err) + defer os.RemoveAll(dest) + + f, err := os.Open("testdata/symlink.zip") + require.NoError(t, err) + defer f.Close() + fi, err := f.Stat() + require.NoError(t, err) + + require.NoError(t, zip.Extract(context.Background(), f, fi.Size(), dest)) + + _, err = os.Stat(filepath.Join(dest, "labkit-archive-symlink-test.txt")) + require.ErrorIs(t, err, os.ErrNotExist) +} + +func TestExtractWithCanceledContext(t *testing.T) { + dest, err := os.MkdirTemp("", "labkit-test") + require.NoError(t, err) + defer os.RemoveAll(dest) + + f, err := os.Open("testdata/simple.zip") + require.NoError(t, err) + defer f.Close() + fi, err := f.Stat() + require.NoError(t, err) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + require.ErrorIs(t, zip.Extract(ctx, f, fi.Size(), dest), context.Canceled) +} + +func TestExtractWithTimedOutContext(t *testing.T) { + dest, err := os.MkdirTemp("", "labkit-test") + require.NoError(t, err) + defer os.RemoveAll(dest) + + f, err := os.Open("testdata/simple.zip") + require.NoError(t, err) + defer f.Close() + fi, err := f.Stat() + require.NoError(t, err) + + ctx, cancel := context.WithTimeout(context.Background(), -1*time.Second) + defer cancel() + + require.ErrorIs(t, zip.Extract(ctx, f, fi.Size(), dest), context.DeadlineExceeded) +} diff --git a/archive/zip/testdata/simple.zip b/archive/zip/testdata/simple.zip new file mode 100644 index 0000000000000000000000000000000000000000..69d774f052d586803bde8b88478324c8bedeaf24 Binary files /dev/null and b/archive/zip/testdata/simple.zip differ diff --git a/archive/zip/testdata/symlink.zip b/archive/zip/testdata/symlink.zip new file mode 100644 index 0000000000000000000000000000000000000000..ba954cacdd8129f3a0d461d7d9406bdc94ea14ef Binary files /dev/null and b/archive/zip/testdata/symlink.zip differ diff --git a/archive/zip/testdata/zipslip.zip b/archive/zip/testdata/zipslip.zip new file mode 100644 index 0000000000000000000000000000000000000000..d449c33b2088e83c4b63a6d297002f3eda9c824d Binary files /dev/null and b/archive/zip/testdata/zipslip.zip differ diff --git a/commitlint.config.js b/commitlint.config.js index f70377828c5d60e5c08fb635e8b8794a4ae0851d..63a6b3ca7ceb23983e0347f0de932878a58d695b 100644 --- a/commitlint.config.js +++ b/commitlint.config.js @@ -36,6 +36,7 @@ module.exports = { 2, 'always', [ + 'archive', 'correlation', 'errortracking', 'log',