290 lines
10 KiB
Python
290 lines
10 KiB
Python
"""
|
|
Functions for encoding EBML elements and their values.
|
|
|
|
Note: this module does not encode Document or MasterElement objects; they are
|
|
special cases, handled in `core.py`.
|
|
"""
|
|
__author__ = "David Randall Stokes, Connor Flanigan"
|
|
__copyright__ = "Copyright 2021, Mide Technology Corporation"
|
|
__credits__ = "David Randall Stokes, Connor Flanigan, Becker Awqatty, Derek Witt"
|
|
|
|
__all__ = ['encodeBinary', 'encodeDate', 'encodeFloat', 'encodeId', 'encodeInt',
|
|
'encodeSize', 'encodeString', 'encodeUInt', 'encodeUnicode']
|
|
|
|
import datetime
|
|
import sys
|
|
import warnings
|
|
|
|
from .decoding import _struct_uint64, _struct_int64
|
|
from .decoding import _struct_float32, _struct_float64
|
|
|
|
# ==============================================================================
|
|
#
|
|
# ==============================================================================
|
|
|
|
# If no length is given, use the platform's size of a float.
|
|
DEFAULT_FLOAT_SIZE = 4 if sys.maxsize <= 2147483647 else 8
|
|
|
|
LENGTH_PREFIXES = [0,
|
|
0x80,
|
|
0x4000,
|
|
0x200000,
|
|
0x10000000,
|
|
0x0800000000,
|
|
0x040000000000,
|
|
0x02000000000000,
|
|
0x0100000000000000
|
|
]
|
|
|
|
# Translation table for removing invalid EBML string characters (32 < x < 127)
|
|
STRING_CHARACTERS = (b"?"*32 + bytearray(range(32, 127))).ljust(256, b'?')
|
|
|
|
# ==============================================================================
|
|
#
|
|
# ==============================================================================
|
|
|
|
|
|
def getLength(val):
|
|
""" Calculate the encoded length of a value.
|
|
@param val: A value to be encoded, generally either an ID or a size for
|
|
an EBML element
|
|
@return The minimum length, in bytes, that can be used to represent val
|
|
"""
|
|
# Brute force it. Ugly but faster than calculating it.
|
|
if val <= 126:
|
|
return 1
|
|
elif val <= 16382:
|
|
return 2
|
|
elif val <= 2097150:
|
|
return 3
|
|
elif val <= 268435454:
|
|
return 4
|
|
elif val <= 34359738366:
|
|
return 5
|
|
elif val <= 4398046511102:
|
|
return 6
|
|
elif val <= 562949953421310:
|
|
return 7
|
|
else:
|
|
return 8
|
|
|
|
|
|
def encodeSize(val, length=None):
|
|
""" Encode an element size.
|
|
|
|
@param val: The size to encode. If `None`, the EBML 'unknown' size
|
|
will be returned (1 or `length` bytes, all bits 1).
|
|
@keyword length: An explicit length for the encoded size. If `None`,
|
|
the size will be encoded at the minimum length required.
|
|
@return: an encoded size for an EBML element.
|
|
@raise ValueError: raised if the length is invalid, or the length cannot
|
|
be encoded.
|
|
"""
|
|
if val is None:
|
|
# 'unknown' size: all bits 1.
|
|
length = 1 if (length is None or length == -1) else length
|
|
return b'\xff' * length
|
|
|
|
length = getLength(val) if (length is None or length == -1) else length
|
|
try:
|
|
prefix = LENGTH_PREFIXES[length]
|
|
return encodeUInt(val | prefix, length)
|
|
except (IndexError, TypeError):
|
|
raise ValueError("Cannot encode element size %s" % length)
|
|
|
|
|
|
# ==============================================================================
|
|
# --- Encoding
|
|
# ==============================================================================
|
|
|
|
def encodeId(eid, length=None):
|
|
""" Encode an element ID.
|
|
|
|
@param eid: The EBML ID to encode.
|
|
@keyword length: An explicit length for the encoded data. A `ValueError`
|
|
will be raised if the length is too short to encode the value.
|
|
@return: The binary representation of ID, left-padded with ``0x00`` if
|
|
`length` is not `None`.
|
|
@return: The encoded version of the ID.
|
|
@raise ValueError: raised if length is less than one or more than 4.
|
|
"""
|
|
if length is not None:
|
|
if length < 1 or length > 4:
|
|
raise ValueError("Cannot encode an ID 0x%0x to length %d" %
|
|
(eid, length))
|
|
return encodeUInt(eid, length)
|
|
|
|
|
|
def encodeUInt(val, length=None):
|
|
""" Encode an unsigned integer.
|
|
|
|
@param val: The unsigned integer value to encode.
|
|
@keyword length: An explicit length for the encoded data. A `ValueError`
|
|
will be raised if the length is too short to encode the value.
|
|
@return: The binary representation of val as an unsigned integer,
|
|
left-padded with ``0x00`` if `length` is not `None`.
|
|
@raise ValueError: raised if val is longer than length.
|
|
"""
|
|
if isinstance(val, float):
|
|
fval, val = val, int(val)
|
|
if fval != val:
|
|
warnings.warn('encodeUInt: float value {} encoded as {}'.format(fval, val))
|
|
|
|
pad = b'\x00'
|
|
packed = _struct_uint64.pack(val).lstrip(pad) or pad
|
|
|
|
if length is None:
|
|
return packed
|
|
if len(packed) > length:
|
|
raise ValueError("Encoded length (%d) greater than specified length "
|
|
"(%d)" % (len(packed), length))
|
|
return packed.rjust(length, pad)
|
|
|
|
|
|
def encodeInt(val, length=None):
|
|
""" Encode a signed integer.
|
|
|
|
@param val: The signed integer value to encode.
|
|
@keyword length: An explicit length for the encoded data. A `ValueError`
|
|
will be raised if the length is too short to encode the value.
|
|
@return: The binary representation of val as a signed integer,
|
|
left-padded with either ```0x00`` (for positive values) or ``0xFF``
|
|
(for negative) if `length` is not `None`.
|
|
@raise ValueError: raised if val is longer than length.
|
|
"""
|
|
if isinstance(val, float):
|
|
fval, val = val, int(val)
|
|
if fval != val:
|
|
warnings.warn('encodeInt: float value {} encoded as {}'.format(fval, val))
|
|
|
|
if val >= 0:
|
|
pad = b'\x00'
|
|
packed = _struct_int64.pack(val).lstrip(pad) or pad
|
|
if packed[0] & 0b10000000:
|
|
packed = pad + packed
|
|
else:
|
|
pad = b'\xff'
|
|
packed = _struct_int64.pack(val).lstrip(pad) or pad
|
|
if not packed[0] & 0b10000000:
|
|
packed = pad + packed
|
|
|
|
if length is None:
|
|
return packed
|
|
if len(packed) > length:
|
|
raise ValueError("Encoded length (%d) greater than specified length "
|
|
"(%d)" % (len(packed), length))
|
|
return packed.rjust(length, pad)
|
|
|
|
|
|
def encodeFloat(val, length=None):
|
|
""" Encode a floating point value.
|
|
|
|
@param val: The floating point value to encode.
|
|
@keyword length: An explicit length for the encoded data. Must be
|
|
`None`, 0, 4, or 8; otherwise, a `ValueError` will be raised.
|
|
@return: The binary representation of val as a float, left-padded with
|
|
``0x00`` if `length` is not `None`.
|
|
@raise ValueError: raised if val not length 0, 4, or 8
|
|
"""
|
|
if length is None:
|
|
if val is None or val == 0.0:
|
|
return b''
|
|
else:
|
|
length = DEFAULT_FLOAT_SIZE
|
|
|
|
if length == 0:
|
|
return b''
|
|
if length == 4:
|
|
return _struct_float32.pack(val)
|
|
elif length == 8:
|
|
return _struct_float64.pack(val)
|
|
else:
|
|
raise ValueError("Cannot encode float of length %d; only 0, 4, or 8" %
|
|
length)
|
|
|
|
|
|
def encodeBinary(val, length=None):
|
|
""" Encode binary data.
|
|
|
|
@param val: A string or bytearray containing the data to encode.
|
|
@keyword length: An explicit length for the encoded data. A
|
|
`ValueError` will be raised if `length` is shorter than the
|
|
actual length of the binary data.
|
|
@return: The binary representation of value as binary data, left-padded
|
|
with ``0x00`` if `length` is not `None`.
|
|
@raise ValueError: raised if val is longer than length.
|
|
"""
|
|
if isinstance(val, str):
|
|
val = val.encode('utf_8')
|
|
elif val is None:
|
|
val = b''
|
|
|
|
if length is None:
|
|
return val
|
|
elif len(val) <= length:
|
|
return val.ljust(length, b'\x00')
|
|
else:
|
|
raise ValueError("Length of data (%d) exceeds specified length (%d)" %
|
|
(len(val), length))
|
|
|
|
|
|
def encodeString(val, length=None):
|
|
""" Encode an ASCII string.
|
|
|
|
@param val: The string (or bytearray) to encode.
|
|
@keyword length: An explicit length for the encoded data. Longer
|
|
strings will be truncated.
|
|
@keyword length: An explicit length for the encoded data. The result
|
|
will be truncated if the length is less than that of the original.
|
|
@return: The binary representation of val as a string, truncated or
|
|
left-padded with ``0x00`` if `length` is not `None`.
|
|
"""
|
|
if isinstance(val, str):
|
|
val = val.encode('ascii', 'replace')
|
|
|
|
if length is not None:
|
|
val = val[:length]
|
|
|
|
return encodeBinary(val.translate(STRING_CHARACTERS), length)
|
|
|
|
|
|
def encodeUnicode(val, length=None):
|
|
""" Encode a Unicode string.
|
|
|
|
@param val: The Unicode string to encode.
|
|
@keyword length: An explicit length for the encoded data. The result
|
|
will be truncated if the length is less than that of the original.
|
|
@return: The binary representation of val as a string, truncated or
|
|
left-padded with ``0x00`` if `length` is not `None`.
|
|
"""
|
|
val = val.encode('utf_8')
|
|
|
|
if length is not None:
|
|
val = val[:length]
|
|
|
|
return encodeBinary(val, length)
|
|
|
|
|
|
def encodeDate(val, length=None):
|
|
""" Encode a `datetime` object as an EBML date (i.e. nanoseconds since
|
|
2001-01-01T00:00:00).
|
|
|
|
@param val: The `datetime.datetime` object value to encode.
|
|
@keyword length: An explicit length for the encoded data. Must be
|
|
`None` or 8; otherwise, a `ValueError` will be raised.
|
|
@return: The binary representation of val as an 8-byte dateTime.
|
|
@raise ValueError: raised if the length of the input is not 8 bytes.
|
|
"""
|
|
if length is None:
|
|
length = 8
|
|
elif length != 8:
|
|
raise ValueError("Dates must be of length 8")
|
|
|
|
if val is None:
|
|
val = datetime.datetime.utcnow()
|
|
|
|
delta = val - datetime.datetime(2001, 1, 1, tzinfo=None)
|
|
nanoseconds = (delta.microseconds +
|
|
((delta.seconds + (delta.days * 86400)) * 1000000)) * 1000
|
|
return encodeInt(nanoseconds, length)
|