2025-10-17 20:02:29 +08:00

290 lines
10 KiB
Python

"""
Functions for encoding EBML elements and their values.
Note: this module does not encode Document or MasterElement objects; they are
special cases, handled in `core.py`.
"""
__author__ = "David Randall Stokes, Connor Flanigan"
__copyright__ = "Copyright 2021, Mide Technology Corporation"
__credits__ = "David Randall Stokes, Connor Flanigan, Becker Awqatty, Derek Witt"
__all__ = ['encodeBinary', 'encodeDate', 'encodeFloat', 'encodeId', 'encodeInt',
'encodeSize', 'encodeString', 'encodeUInt', 'encodeUnicode']
import datetime
import sys
import warnings
from .decoding import _struct_uint64, _struct_int64
from .decoding import _struct_float32, _struct_float64
# ==============================================================================
#
# ==============================================================================
# If no length is given, use the platform's size of a float.
DEFAULT_FLOAT_SIZE = 4 if sys.maxsize <= 2147483647 else 8
LENGTH_PREFIXES = [0,
0x80,
0x4000,
0x200000,
0x10000000,
0x0800000000,
0x040000000000,
0x02000000000000,
0x0100000000000000
]
# Translation table for removing invalid EBML string characters (32 < x < 127)
STRING_CHARACTERS = (b"?"*32 + bytearray(range(32, 127))).ljust(256, b'?')
# ==============================================================================
#
# ==============================================================================
def getLength(val):
""" Calculate the encoded length of a value.
@param val: A value to be encoded, generally either an ID or a size for
an EBML element
@return The minimum length, in bytes, that can be used to represent val
"""
# Brute force it. Ugly but faster than calculating it.
if val <= 126:
return 1
elif val <= 16382:
return 2
elif val <= 2097150:
return 3
elif val <= 268435454:
return 4
elif val <= 34359738366:
return 5
elif val <= 4398046511102:
return 6
elif val <= 562949953421310:
return 7
else:
return 8
def encodeSize(val, length=None):
""" Encode an element size.
@param val: The size to encode. If `None`, the EBML 'unknown' size
will be returned (1 or `length` bytes, all bits 1).
@keyword length: An explicit length for the encoded size. If `None`,
the size will be encoded at the minimum length required.
@return: an encoded size for an EBML element.
@raise ValueError: raised if the length is invalid, or the length cannot
be encoded.
"""
if val is None:
# 'unknown' size: all bits 1.
length = 1 if (length is None or length == -1) else length
return b'\xff' * length
length = getLength(val) if (length is None or length == -1) else length
try:
prefix = LENGTH_PREFIXES[length]
return encodeUInt(val | prefix, length)
except (IndexError, TypeError):
raise ValueError("Cannot encode element size %s" % length)
# ==============================================================================
# --- Encoding
# ==============================================================================
def encodeId(eid, length=None):
""" Encode an element ID.
@param eid: The EBML ID to encode.
@keyword length: An explicit length for the encoded data. A `ValueError`
will be raised if the length is too short to encode the value.
@return: The binary representation of ID, left-padded with ``0x00`` if
`length` is not `None`.
@return: The encoded version of the ID.
@raise ValueError: raised if length is less than one or more than 4.
"""
if length is not None:
if length < 1 or length > 4:
raise ValueError("Cannot encode an ID 0x%0x to length %d" %
(eid, length))
return encodeUInt(eid, length)
def encodeUInt(val, length=None):
""" Encode an unsigned integer.
@param val: The unsigned integer value to encode.
@keyword length: An explicit length for the encoded data. A `ValueError`
will be raised if the length is too short to encode the value.
@return: The binary representation of val as an unsigned integer,
left-padded with ``0x00`` if `length` is not `None`.
@raise ValueError: raised if val is longer than length.
"""
if isinstance(val, float):
fval, val = val, int(val)
if fval != val:
warnings.warn('encodeUInt: float value {} encoded as {}'.format(fval, val))
pad = b'\x00'
packed = _struct_uint64.pack(val).lstrip(pad) or pad
if length is None:
return packed
if len(packed) > length:
raise ValueError("Encoded length (%d) greater than specified length "
"(%d)" % (len(packed), length))
return packed.rjust(length, pad)
def encodeInt(val, length=None):
""" Encode a signed integer.
@param val: The signed integer value to encode.
@keyword length: An explicit length for the encoded data. A `ValueError`
will be raised if the length is too short to encode the value.
@return: The binary representation of val as a signed integer,
left-padded with either ```0x00`` (for positive values) or ``0xFF``
(for negative) if `length` is not `None`.
@raise ValueError: raised if val is longer than length.
"""
if isinstance(val, float):
fval, val = val, int(val)
if fval != val:
warnings.warn('encodeInt: float value {} encoded as {}'.format(fval, val))
if val >= 0:
pad = b'\x00'
packed = _struct_int64.pack(val).lstrip(pad) or pad
if packed[0] & 0b10000000:
packed = pad + packed
else:
pad = b'\xff'
packed = _struct_int64.pack(val).lstrip(pad) or pad
if not packed[0] & 0b10000000:
packed = pad + packed
if length is None:
return packed
if len(packed) > length:
raise ValueError("Encoded length (%d) greater than specified length "
"(%d)" % (len(packed), length))
return packed.rjust(length, pad)
def encodeFloat(val, length=None):
""" Encode a floating point value.
@param val: The floating point value to encode.
@keyword length: An explicit length for the encoded data. Must be
`None`, 0, 4, or 8; otherwise, a `ValueError` will be raised.
@return: The binary representation of val as a float, left-padded with
``0x00`` if `length` is not `None`.
@raise ValueError: raised if val not length 0, 4, or 8
"""
if length is None:
if val is None or val == 0.0:
return b''
else:
length = DEFAULT_FLOAT_SIZE
if length == 0:
return b''
if length == 4:
return _struct_float32.pack(val)
elif length == 8:
return _struct_float64.pack(val)
else:
raise ValueError("Cannot encode float of length %d; only 0, 4, or 8" %
length)
def encodeBinary(val, length=None):
""" Encode binary data.
@param val: A string or bytearray containing the data to encode.
@keyword length: An explicit length for the encoded data. A
`ValueError` will be raised if `length` is shorter than the
actual length of the binary data.
@return: The binary representation of value as binary data, left-padded
with ``0x00`` if `length` is not `None`.
@raise ValueError: raised if val is longer than length.
"""
if isinstance(val, str):
val = val.encode('utf_8')
elif val is None:
val = b''
if length is None:
return val
elif len(val) <= length:
return val.ljust(length, b'\x00')
else:
raise ValueError("Length of data (%d) exceeds specified length (%d)" %
(len(val), length))
def encodeString(val, length=None):
""" Encode an ASCII string.
@param val: The string (or bytearray) to encode.
@keyword length: An explicit length for the encoded data. Longer
strings will be truncated.
@keyword length: An explicit length for the encoded data. The result
will be truncated if the length is less than that of the original.
@return: The binary representation of val as a string, truncated or
left-padded with ``0x00`` if `length` is not `None`.
"""
if isinstance(val, str):
val = val.encode('ascii', 'replace')
if length is not None:
val = val[:length]
return encodeBinary(val.translate(STRING_CHARACTERS), length)
def encodeUnicode(val, length=None):
""" Encode a Unicode string.
@param val: The Unicode string to encode.
@keyword length: An explicit length for the encoded data. The result
will be truncated if the length is less than that of the original.
@return: The binary representation of val as a string, truncated or
left-padded with ``0x00`` if `length` is not `None`.
"""
val = val.encode('utf_8')
if length is not None:
val = val[:length]
return encodeBinary(val, length)
def encodeDate(val, length=None):
""" Encode a `datetime` object as an EBML date (i.e. nanoseconds since
2001-01-01T00:00:00).
@param val: The `datetime.datetime` object value to encode.
@keyword length: An explicit length for the encoded data. Must be
`None` or 8; otherwise, a `ValueError` will be raised.
@return: The binary representation of val as an 8-byte dateTime.
@raise ValueError: raised if the length of the input is not 8 bytes.
"""
if length is None:
length = 8
elif length != 8:
raise ValueError("Dates must be of length 8")
if val is None:
val = datetime.datetime.utcnow()
delta = val - datetime.datetime(2001, 1, 1, tzinfo=None)
nanoseconds = (delta.microseconds +
((delta.seconds + (delta.days * 86400)) * 1000000)) * 1000
return encodeInt(nanoseconds, length)