306 lines
10 KiB
Python
306 lines
10 KiB
Python
"""
|
|
Classes for various means of encoding/decoding binary data to/from XML.
|
|
|
|
Note: the class docstrings will be shown in the `ebml2xml` help text.
|
|
"""
|
|
|
|
import base64
|
|
from io import BytesIO, StringIO
|
|
|
|
|
|
# ==============================================================================
|
|
#
|
|
# ==============================================================================
|
|
|
|
class BinaryCodec:
|
|
""" Base class for binary encoders/decoders, rendering and reading
|
|
`BinaryElement` contents as text.
|
|
|
|
:cvar NAME: The codec's name, written to the rendered XML as
|
|
the `encoding` attribute. Also used as the `--encoding`
|
|
argument in the command-line tools. Must be unique, and
|
|
should be lowercase.
|
|
:type NAME: str
|
|
"""
|
|
NAME = ""
|
|
|
|
def __init__(self, **kwargs):
|
|
""" Constructor. All arguments should be optional keyword
|
|
arguments. Can be considered optional in subclasses.
|
|
"""
|
|
pass
|
|
|
|
def encode(self, data, stream=None, indent='', offset=0, **kwargs):
|
|
""" Convert binary data to text. Typical arguments:
|
|
|
|
:param data: The binary data from an EBML `BinaryElement`.
|
|
:param stream: An optional stream to which to write the encoded
|
|
data. Should be included and used in all implementations.
|
|
:param indent: Indentation before each row of text. Used if
|
|
the codec was instantiated with `cols` specified.
|
|
:param offset: The originating EBML element's offset in the file.
|
|
For use with codecs that write line numbers/position info.
|
|
:returns: If no `stream`, the encoded data as text. If `stream`,
|
|
the number of bytes written.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
@classmethod
|
|
def decode(cls, data, stream=None):
|
|
""" Decode binary data in text form (e.g., from an XML file). Note:
|
|
this is a `classmethod`, and should work regardless of the
|
|
arguments used when the data was encoded (e.g., with or without
|
|
indentations and/or line breaks, metadata like offsets, etc.).
|
|
|
|
:param data: The text data from an XML file.
|
|
:param stream: A stream to which to write the encoded data.
|
|
:returns: If no `stream`, the decoded binary data. If `stream`,
|
|
the number of bytes written.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
|
|
# ==============================================================================
|
|
#
|
|
# ==============================================================================
|
|
|
|
class Base64Codec(BinaryCodec):
|
|
""" Encoder/decoder for binary data as base64 formatted text to/from text.
|
|
"""
|
|
NAME = "base64"
|
|
|
|
def __init__(self, cols=76, **kwargs):
|
|
""" Constructor.
|
|
|
|
:param cols: The length of each line of base64 data, excluding
|
|
any indentation specified when encoding. If 0 or `None`,
|
|
data will be written as a single continuous block with no
|
|
newlines.
|
|
|
|
Additional keyword arguments will be accepted (to maintain
|
|
compatibility with other codecs) but ignored.
|
|
"""
|
|
self.cols = cols
|
|
|
|
|
|
def encode(self, data, stream=None, indent='', **kwargs):
|
|
""" Convert binary data to base64 text.
|
|
|
|
:param data: The binary data from an EBML `BinaryElement`.
|
|
:param stream: An optional stream to which to write the encoded
|
|
data.
|
|
:param indent: Indentation before each row of text. Used if
|
|
the codec was instantiated with `cols` specified.
|
|
:returns: If no `stream`, the encoded data as text. If `stream`,
|
|
the number of bytes written.
|
|
|
|
Additional keyword arguments will be accepted (to maintain
|
|
compatibility with other codecs) but ignored.
|
|
"""
|
|
if isinstance(indent, bytes):
|
|
indent = indent.decode()
|
|
if isinstance(data, str):
|
|
data = data.encode('utf8')
|
|
|
|
result = base64.encodebytes(data).decode()
|
|
if stream is None:
|
|
out = StringIO()
|
|
else:
|
|
out = stream
|
|
|
|
if self.cols == 76:
|
|
# Default width of a base64 line; use existing newlines
|
|
result = "\n" + result
|
|
if indent:
|
|
result = result.replace('\n', '\n' + indent)
|
|
if stream is not None:
|
|
return out.write(result)
|
|
return result
|
|
|
|
result = result.replace('\n', '')
|
|
|
|
if self.cols is None:
|
|
if stream is not None:
|
|
return out.write(result)
|
|
return result
|
|
|
|
numbytes = 0
|
|
for chunk in range(0, len(result), self.cols):
|
|
numbytes += out.write('\n')
|
|
numbytes += out.write(indent) + out.write(result[chunk:chunk+self.cols])
|
|
|
|
if stream is None:
|
|
return out.getvalue()
|
|
|
|
return numbytes
|
|
|
|
|
|
@classmethod
|
|
def decode(cls, data, stream=None):
|
|
""" Decode binary data in base64 (e.g., from an XML file). Note: this
|
|
is a `classmethod`, and works regardles of how the encoded data was
|
|
formatted (e.g., with indentations and/or line breaks).
|
|
|
|
:param data: The base64 data from an XML file.
|
|
:param stream: A stream to which to write the encoded data.
|
|
:returns: If no `stream`, the decoded binary data. If `stream`,
|
|
the number of bytes written.
|
|
"""
|
|
if not data:
|
|
if stream is None:
|
|
return b''
|
|
else:
|
|
return 0
|
|
|
|
if isinstance(data, str):
|
|
data = data.encode('utf8')
|
|
|
|
result = base64.decodebytes(data)
|
|
|
|
if stream is not None:
|
|
return stream.write(result)
|
|
else:
|
|
return result
|
|
|
|
|
|
# ==============================================================================
|
|
#
|
|
# ==============================================================================
|
|
|
|
class HexCodec(BinaryCodec):
|
|
""" Encoder/decoder for binary data as hexadecimal format to/from text.
|
|
Encoded text is multiple columns of bytes/words (default is 16 columns,
|
|
2 bytes per column), with an optional file offset at the start of each
|
|
row.
|
|
"""
|
|
# The name shown in the encoded XML element's `encoding` attribute
|
|
NAME = "hex"
|
|
|
|
def __init__(self, width=2, cols=32, offsets=True, **kwargs):
|
|
""" Constructor.
|
|
|
|
:param width: The number of bytes displayed per column when
|
|
encoding.
|
|
:param cols: The number of columns to display when encoding. If 0
|
|
or `None`, data will be written as a single continuous block
|
|
with no newlines.
|
|
:param offsets: If `True`, each line will start with its offset
|
|
(in decimal). Applicable if `cols` is a non-zero number.
|
|
"""
|
|
self.width = width
|
|
self.cols = cols
|
|
self.offsets = bool(offsets and cols)
|
|
|
|
|
|
def encode(self, data, stream=None, offset=0, indent='', **kwargs):
|
|
""" Convert binary data to hexadecimal text.
|
|
|
|
:param data: The binary data from an EBML `BinaryElement`.
|
|
:param stream: An optional stream to which to write the encoded
|
|
data.
|
|
:param offset: A starting number for the displayed offsets column.
|
|
For showing the data's offset in an EBML file.
|
|
:param indent: Indentation before each row of hex text.
|
|
:returns: If no `stream`, the encoded data as text. If `stream`,
|
|
the number of bytes written.
|
|
"""
|
|
if not isinstance(indent, str):
|
|
indent = indent.decode()
|
|
|
|
if stream is None:
|
|
out = StringIO()
|
|
else:
|
|
out = stream
|
|
|
|
newline = bool(self.cols)
|
|
offsets = self.offsets and newline
|
|
|
|
numbytes = 0
|
|
for i, b in enumerate(data):
|
|
if newline and not i % self.cols:
|
|
numbytes += out.write('\n')
|
|
numbytes += out.write(indent)
|
|
if offsets:
|
|
numbytes += out.write('[{:06d}] '.format(i + offset))
|
|
elif not i % self.width:
|
|
numbytes += out.write(' ')
|
|
numbytes += out.write('{:02x}'.format(b))
|
|
|
|
if stream is None:
|
|
return out.getvalue()
|
|
|
|
return numbytes
|
|
|
|
|
|
@classmethod
|
|
def decode(cls, data, stream=None):
|
|
""" Decode binary data in hexadecimal (e.g., from an XML file). Note:
|
|
this is a `classmethod`, and works regardles of how the encoded
|
|
data was formatted (e.g., number of columns, with or without
|
|
offsets, etc.).
|
|
|
|
:param data: The base64 data from an XML file.
|
|
:param stream: A stream to which to write the encoded data.
|
|
:returns: If no `stream`, the decoded binary data. If `stream`,
|
|
the number of bytes written.
|
|
"""
|
|
if stream is None:
|
|
out = BytesIO()
|
|
else:
|
|
out = stream
|
|
numbytes = 0
|
|
|
|
if not data:
|
|
if stream is None:
|
|
return b''
|
|
else:
|
|
return 0
|
|
|
|
if isinstance(data, str):
|
|
data = data.encode('utf8')
|
|
|
|
for word in data.split():
|
|
if b'[' in word or b']' in word:
|
|
continue
|
|
for i in range(0, len(word), 2):
|
|
numbytes += out.write((int(word[i:i+2], 16).to_bytes(1, 'big')))
|
|
|
|
if stream is None:
|
|
return out.getvalue()
|
|
|
|
return numbytes
|
|
|
|
|
|
# ==============================================================================
|
|
#
|
|
# ==============================================================================
|
|
|
|
class IgnoreCodec(BinaryCodec):
|
|
""" Suppresses writing binary data as text.
|
|
"""
|
|
NAME = "ignore"
|
|
|
|
@staticmethod
|
|
def encode(data, stream=None, **kwargs):
|
|
if stream:
|
|
return 0
|
|
return ''
|
|
|
|
@staticmethod
|
|
def decode(data, stream=None, **kwargs):
|
|
if stream:
|
|
return 0
|
|
return b''
|
|
|
|
|
|
# ==============================================================================
|
|
#
|
|
# ==============================================================================
|
|
|
|
# Collection of codecs. The first one will be the default in the CLI (or at least
|
|
# it will be in Python 3.7 and later). User-implemented codecs should be added to
|
|
# the dictionary.
|
|
BINARY_CODECS = {'base64': Base64Codec,
|
|
'hex': HexCodec,
|
|
'ignore': IgnoreCodec}
|