Python:tarfile
Examples
How to extract an entire tar archive to the current working directory:
How to extract a subset of a tar archive with TarFile.extractall() using a generator function instead of a list:
import os
import tarfile
def py_files(members):
for tarinfo in members:
if os.path.splitext(tarinfo.name)[1] == ".py":
yield tarinfo
tar = tarfile.open("sample.tar.gz")
tar.extractall(members=py_files(tar))
tar.close()
How to create an uncompressed tar archive from a list of filenames:
import tarfile
tar = tarfile.open("sample.tar", "w")
for name in ["foo", "bar", "quux"]:
tar.add(name)
tar.close()
The same example using the with statement:
import tarfile
with tarfile.open("sample.tar", "w") as tar:
for name in ["foo", "bar", "quux"]:
tar.add(name)
How to read a gzip compressed tar archive and display some member information:
import tarfile
tar = tarfile.open("sample.tar.gz", "r:gz")
for tarinfo in tar:
print tarinfo.name, "is", tarinfo.size, "bytes in size and is",
if tarinfo.isreg():
print "a regular file."
elif tarinfo.isdir():
print "a directory."
else:
print "something else."
tar.close()
How to create an archive and reset the user information using the filter parameter in TarFile.add():
import tarfile
def reset(tarinfo):
tarinfo.uid = tarinfo.gid = 0
tarinfo.uname = tarinfo.gname = "root"
return tarinfo
tar = tarfile.open("sample.tar.gz", "w:gz")
tar.add("foo", filter=reset)
tar.close()
recc tar_archive.py file
recc에 사용했던 tar관련 유틸리티 함수 모음:
# -*- coding: utf-8 -*-
import os
from io import BytesIO
from tarfile import DIRTYPE, REGTYPE, TarFile, TarInfo
from tarfile import open as tar_open
from typing import Optional
def compress_tar(
path: str,
mode="w",
archive_name: Optional[str] = None,
recursive=True,
) -> bytes:
file_object = BytesIO()
with tar_open(fileobj=file_object, mode=mode) as tar:
tar.add(path, archive_name, recursive)
return file_object.getvalue()
def compress_bytes(name: str, data: bytes, mode="w") -> bytes:
file_object = BytesIO()
with tar_open(fileobj=file_object, mode=mode) as tar:
info = TarInfo(name=name)
info.size = len(data)
tar.addfile(info, BytesIO(data))
return file_object.getvalue()
def remove_first_slash(path: str) -> str:
return path[1:] if path.startswith("/") else path
def insert_last_slash(path: str) -> str:
return path if path.endswith("/") else path + "/"
def file_info(path: str, size: int = 0, mode: int = 0o644) -> TarInfo:
info = TarInfo(remove_first_slash(path))
info.type = REGTYPE
info.size = size
info.mode = mode
return info
def dir_info(path: str, mode: int = 0o644) -> TarInfo:
info = TarInfo(insert_last_slash(remove_first_slash(path)))
info.type = DIRTYPE
info.mode = mode
return info
def add_dirs(tar: TarFile, path: str, mode: int = 0o644) -> None:
parent_dir = ""
for directory in os.path.split(os.path.normpath(path)):
if parent_dir:
parent_dir = os.path.join(parent_dir, directory)
else:
parent_dir = directory
if parent_dir:
tar.addfile(dir_info(parent_dir, mode))
TestCase
# -*- coding: utf-8 -*-
import os
from io import BytesIO
from tarfile import open as tar_open
from unittest import TestCase, main
from recc import archive
from recc.archive.tar_archive import compress_tar
class TarArchiveTestCase(TestCase):
def test_compress(self):
prefix = "prefix/path/node"
node_init_path = os.path.abspath(archive.__file__)
node_dir = os.path.abspath(os.path.dirname(node_init_path))
node_data = compress_tar(node_dir, mode="w", archive_name=prefix)
self.assertLess(0, len(node_data))
self.assertIsInstance(node_data, bytes)
rpc_files = [os.path.basename(archive.tar_archive.__file__)]
with tar_open(fileobj=BytesIO(node_data), mode="r") as tar:
names = tar.getnames()
self.assertIn(f"{prefix}", names)
self.assertIn(f"{prefix}/__init__.py", names)
for f in rpc_files:
self.assertIn(f"{prefix}/{f}", names)
if __name__ == "__main__":
main()