2025-10-17 20:02:29 +08:00

306 lines
10 KiB
Python

"""
Classes for various means of encoding/decoding binary data to/from XML.
Note: the class docstrings will be shown in the `ebml2xml` help text.
"""
import base64
from io import BytesIO, StringIO
# ==============================================================================
#
# ==============================================================================
class BinaryCodec:
""" Base class for binary encoders/decoders, rendering and reading
`BinaryElement` contents as text.
:cvar NAME: The codec's name, written to the rendered XML as
the `encoding` attribute. Also used as the `--encoding`
argument in the command-line tools. Must be unique, and
should be lowercase.
:type NAME: str
"""
NAME = ""
def __init__(self, **kwargs):
""" Constructor. All arguments should be optional keyword
arguments. Can be considered optional in subclasses.
"""
pass
def encode(self, data, stream=None, indent='', offset=0, **kwargs):
""" Convert binary data to text. Typical arguments:
:param data: The binary data from an EBML `BinaryElement`.
:param stream: An optional stream to which to write the encoded
data. Should be included and used in all implementations.
:param indent: Indentation before each row of text. Used if
the codec was instantiated with `cols` specified.
:param offset: The originating EBML element's offset in the file.
For use with codecs that write line numbers/position info.
:returns: If no `stream`, the encoded data as text. If `stream`,
the number of bytes written.
"""
raise NotImplementedError
@classmethod
def decode(cls, data, stream=None):
""" Decode binary data in text form (e.g., from an XML file). Note:
this is a `classmethod`, and should work regardless of the
arguments used when the data was encoded (e.g., with or without
indentations and/or line breaks, metadata like offsets, etc.).
:param data: The text data from an XML file.
:param stream: A stream to which to write the encoded data.
:returns: If no `stream`, the decoded binary data. If `stream`,
the number of bytes written.
"""
raise NotImplementedError
# ==============================================================================
#
# ==============================================================================
class Base64Codec(BinaryCodec):
""" Encoder/decoder for binary data as base64 formatted text to/from text.
"""
NAME = "base64"
def __init__(self, cols=76, **kwargs):
""" Constructor.
:param cols: The length of each line of base64 data, excluding
any indentation specified when encoding. If 0 or `None`,
data will be written as a single continuous block with no
newlines.
Additional keyword arguments will be accepted (to maintain
compatibility with other codecs) but ignored.
"""
self.cols = cols
def encode(self, data, stream=None, indent='', **kwargs):
""" Convert binary data to base64 text.
:param data: The binary data from an EBML `BinaryElement`.
:param stream: An optional stream to which to write the encoded
data.
:param indent: Indentation before each row of text. Used if
the codec was instantiated with `cols` specified.
:returns: If no `stream`, the encoded data as text. If `stream`,
the number of bytes written.
Additional keyword arguments will be accepted (to maintain
compatibility with other codecs) but ignored.
"""
if isinstance(indent, bytes):
indent = indent.decode()
if isinstance(data, str):
data = data.encode('utf8')
result = base64.encodebytes(data).decode()
if stream is None:
out = StringIO()
else:
out = stream
if self.cols == 76:
# Default width of a base64 line; use existing newlines
result = "\n" + result
if indent:
result = result.replace('\n', '\n' + indent)
if stream is not None:
return out.write(result)
return result
result = result.replace('\n', '')
if self.cols is None:
if stream is not None:
return out.write(result)
return result
numbytes = 0
for chunk in range(0, len(result), self.cols):
numbytes += out.write('\n')
numbytes += out.write(indent) + out.write(result[chunk:chunk+self.cols])
if stream is None:
return out.getvalue()
return numbytes
@classmethod
def decode(cls, data, stream=None):
""" Decode binary data in base64 (e.g., from an XML file). Note: this
is a `classmethod`, and works regardles of how the encoded data was
formatted (e.g., with indentations and/or line breaks).
:param data: The base64 data from an XML file.
:param stream: A stream to which to write the encoded data.
:returns: If no `stream`, the decoded binary data. If `stream`,
the number of bytes written.
"""
if not data:
if stream is None:
return b''
else:
return 0
if isinstance(data, str):
data = data.encode('utf8')
result = base64.decodebytes(data)
if stream is not None:
return stream.write(result)
else:
return result
# ==============================================================================
#
# ==============================================================================
class HexCodec(BinaryCodec):
""" Encoder/decoder for binary data as hexadecimal format to/from text.
Encoded text is multiple columns of bytes/words (default is 16 columns,
2 bytes per column), with an optional file offset at the start of each
row.
"""
# The name shown in the encoded XML element's `encoding` attribute
NAME = "hex"
def __init__(self, width=2, cols=32, offsets=True, **kwargs):
""" Constructor.
:param width: The number of bytes displayed per column when
encoding.
:param cols: The number of columns to display when encoding. If 0
or `None`, data will be written as a single continuous block
with no newlines.
:param offsets: If `True`, each line will start with its offset
(in decimal). Applicable if `cols` is a non-zero number.
"""
self.width = width
self.cols = cols
self.offsets = bool(offsets and cols)
def encode(self, data, stream=None, offset=0, indent='', **kwargs):
""" Convert binary data to hexadecimal text.
:param data: The binary data from an EBML `BinaryElement`.
:param stream: An optional stream to which to write the encoded
data.
:param offset: A starting number for the displayed offsets column.
For showing the data's offset in an EBML file.
:param indent: Indentation before each row of hex text.
:returns: If no `stream`, the encoded data as text. If `stream`,
the number of bytes written.
"""
if not isinstance(indent, str):
indent = indent.decode()
if stream is None:
out = StringIO()
else:
out = stream
newline = bool(self.cols)
offsets = self.offsets and newline
numbytes = 0
for i, b in enumerate(data):
if newline and not i % self.cols:
numbytes += out.write('\n')
numbytes += out.write(indent)
if offsets:
numbytes += out.write('[{:06d}] '.format(i + offset))
elif not i % self.width:
numbytes += out.write(' ')
numbytes += out.write('{:02x}'.format(b))
if stream is None:
return out.getvalue()
return numbytes
@classmethod
def decode(cls, data, stream=None):
""" Decode binary data in hexadecimal (e.g., from an XML file). Note:
this is a `classmethod`, and works regardles of how the encoded
data was formatted (e.g., number of columns, with or without
offsets, etc.).
:param data: The base64 data from an XML file.
:param stream: A stream to which to write the encoded data.
:returns: If no `stream`, the decoded binary data. If `stream`,
the number of bytes written.
"""
if stream is None:
out = BytesIO()
else:
out = stream
numbytes = 0
if not data:
if stream is None:
return b''
else:
return 0
if isinstance(data, str):
data = data.encode('utf8')
for word in data.split():
if b'[' in word or b']' in word:
continue
for i in range(0, len(word), 2):
numbytes += out.write((int(word[i:i+2], 16).to_bytes(1, 'big')))
if stream is None:
return out.getvalue()
return numbytes
# ==============================================================================
#
# ==============================================================================
class IgnoreCodec(BinaryCodec):
""" Suppresses writing binary data as text.
"""
NAME = "ignore"
@staticmethod
def encode(data, stream=None, **kwargs):
if stream:
return 0
return ''
@staticmethod
def decode(data, stream=None, **kwargs):
if stream:
return 0
return b''
# ==============================================================================
#
# ==============================================================================
# Collection of codecs. The first one will be the default in the CLI (or at least
# it will be in Python 3.7 and later). User-implemented codecs should be added to
# the dictionary.
BINARY_CODECS = {'base64': Base64Codec,
'hex': HexCodec,
'ignore': IgnoreCodec}