Source code for qcportal.compression
from __future__ import annotations
import lzma
from enum import Enum
from typing import Optional, Tuple, Any
import msgpack
import zstandard
[docs]
class CompressionEnum(str, Enum):
"""
How data is compressed (compression method only, ie lzma, zstd)
"""
none = "none"
lzma = "lzma"
zstd = "zstd"
[docs]
def get_compressed_ext(compression_type: str) -> str:
if compression_type == CompressionEnum.none:
return ""
elif compression_type == CompressionEnum.zstd:
return ".zstd"
else:
# Shouldn't ever happen, unless we change CompressionEnum but not the rest of this function
raise TypeError(f"Unknown compression type: {compression_type}")
[docs]
def compress(
input_data: Any,
compression_type: CompressionEnum = CompressionEnum.zstd,
compression_level: Optional[int] = None,
) -> Tuple[bytes, CompressionEnum, int]:
"""Serializes and compresses data given a compression scheme and level
If compression_level is None, but a compression_type is specified, an appropriate default level is chosen
Returns a tuple containing the compressed data, applied compression type, and compression level (which may
be different from the provided arguments)
"""
data = msgpack.packb(input_data, use_bin_type=True)
# No compression
if compression_type == CompressionEnum.none:
compression_level = 0
# LZMA compression
# By default, use level = 1 for larger data (>15MB or so)
elif compression_type == CompressionEnum.lzma:
if compression_level is None:
if len(data) > 15 * 1048576:
compression_level = 1
else:
compression_level = 6
data = lzma.compress(data, preset=compression_level)
# ZStandard compression
# By default, use level = 6 for larger data (>15MB or so)
elif compression_type == CompressionEnum.zstd:
if compression_level is None:
if len(data) > 15 * 1048576:
compression_level = 6
else:
compression_level = 16
data = zstandard.compress(data, level=compression_level)
else:
# Shouldn't ever happen, unless we change CompressionEnum but not the rest of this function
raise TypeError(f"Unknown compression type: {compression_type}")
return (data, compression_type, compression_level)
[docs]
def decompress(compressed_data: bytes, compression_type: CompressionEnum) -> Any:
"""
Decompresses and deserializes data into python objects
"""
if compression_type == CompressionEnum.none:
decompressed_data = compressed_data
elif compression_type == CompressionEnum.lzma:
decompressed_data = lzma.decompress(compressed_data)
elif compression_type == CompressionEnum.zstd:
decompressed_data = zstandard.decompress(compressed_data)
else:
# Shouldn't ever happen, unless we change CompressionEnum but not the rest of this function
raise TypeError(f"Unknown compression type: {compression_type}")
return msgpack.unpackb(decompressed_data, raw=False)