Skip to content

Python:tarfile

Python의 tar archive library.

Examples

How to extract an entire tar archive to the current working directory:

import tarfile
tar = tarfile.open("sample.tar.gz")
tar.extractall()
tar.close()

How to extract a subset of a tar archive with TarFile.extractall() using a generator function instead of a list:

import os
import tarfile

def py_files(members):
    for tarinfo in members:
        if os.path.splitext(tarinfo.name)[1] == ".py":
            yield tarinfo

tar = tarfile.open("sample.tar.gz")
tar.extractall(members=py_files(tar))
tar.close()

How to create an uncompressed tar archive from a list of filenames:

import tarfile
tar = tarfile.open("sample.tar", "w")
for name in ["foo", "bar", "quux"]:
    tar.add(name)
tar.close()

The same example using the with statement:

import tarfile
with tarfile.open("sample.tar", "w") as tar:
    for name in ["foo", "bar", "quux"]:
        tar.add(name)

How to read a gzip compressed tar archive and display some member information:

import tarfile
tar = tarfile.open("sample.tar.gz", "r:gz")
for tarinfo in tar:
    print tarinfo.name, "is", tarinfo.size, "bytes in size and is",
    if tarinfo.isreg():
        print "a regular file."
    elif tarinfo.isdir():
        print "a directory."
    else:
        print "something else."
tar.close()

How to create an archive and reset the user information using the filter parameter in TarFile.add():

import tarfile
def reset(tarinfo):
    tarinfo.uid = tarinfo.gid = 0
    tarinfo.uname = tarinfo.gname = "root"
    return tarinfo
tar = tarfile.open("sample.tar.gz", "w:gz")
tar.add("foo", filter=reset)
tar.close()

recc tar_archive.py file

recc에 사용했던 tar관련 유틸리티 함수 모음:

# -*- coding: utf-8 -*-

import os
from io import BytesIO
from tarfile import DIRTYPE, REGTYPE, TarFile, TarInfo
from tarfile import open as tar_open
from typing import Optional


def compress_tar(
    path: str,
    mode="w",
    archive_name: Optional[str] = None,
    recursive=True,
) -> bytes:
    file_object = BytesIO()
    with tar_open(fileobj=file_object, mode=mode) as tar:
        tar.add(path, archive_name, recursive)
    return file_object.getvalue()


def compress_bytes(name: str, data: bytes, mode="w") -> bytes:
    file_object = BytesIO()
    with tar_open(fileobj=file_object, mode=mode) as tar:
        info = TarInfo(name=name)
        info.size = len(data)
        tar.addfile(info, BytesIO(data))
    return file_object.getvalue()


def remove_first_slash(path: str) -> str:
    return path[1:] if path.startswith("/") else path


def insert_last_slash(path: str) -> str:
    return path if path.endswith("/") else path + "/"


def file_info(path: str, size: int = 0, mode: int = 0o644) -> TarInfo:
    info = TarInfo(remove_first_slash(path))
    info.type = REGTYPE
    info.size = size
    info.mode = mode
    return info


def dir_info(path: str, mode: int = 0o644) -> TarInfo:
    info = TarInfo(insert_last_slash(remove_first_slash(path)))
    info.type = DIRTYPE
    info.mode = mode
    return info


def add_dirs(tar: TarFile, path: str, mode: int = 0o644) -> None:
    parent_dir = ""
    for directory in os.path.split(os.path.normpath(path)):
        if parent_dir:
            parent_dir = os.path.join(parent_dir, directory)
        else:
            parent_dir = directory
        if parent_dir:
            tar.addfile(dir_info(parent_dir, mode))

TestCase

# -*- coding: utf-8 -*-

import os
from io import BytesIO
from tarfile import open as tar_open
from unittest import TestCase, main

from recc import archive
from recc.archive.tar_archive import compress_tar


class TarArchiveTestCase(TestCase):
    def test_compress(self):
        prefix = "prefix/path/node"
        node_init_path = os.path.abspath(archive.__file__)
        node_dir = os.path.abspath(os.path.dirname(node_init_path))
        node_data = compress_tar(node_dir, mode="w", archive_name=prefix)
        self.assertLess(0, len(node_data))
        self.assertIsInstance(node_data, bytes)

        rpc_files = [os.path.basename(archive.tar_archive.__file__)]

        with tar_open(fileobj=BytesIO(node_data), mode="r") as tar:
            names = tar.getnames()
            self.assertIn(f"{prefix}", names)
            self.assertIn(f"{prefix}/__init__.py", names)
            for f in rpc_files:
                self.assertIn(f"{prefix}/{f}", names)


if __name__ == "__main__":
    main()

See also

Favorite site