mirror of
https://github.com/lcdr/utils.git
synced 2024-08-30 17:32:16 +00:00
230 lines
7.7 KiB
Python
230 lines
7.7 KiB
Python
"""
|
|
Module for parsing binary data into structs.
|
|
"""
|
|
import argparse
|
|
import ctypes
|
|
import math
|
|
import re
|
|
from collections import namedtuple
|
|
|
|
from pyraknet.bitstream import BitStream, c_bit
|
|
|
|
VAR_CHARS = r"[^ \t\[\]]+"
|
|
BIT = r"(BIT[0-7])?"
|
|
TYPES = "bytes", "string", "wstring", "char", "wchar", "float", "double", "s8", "u8", "s16", "u16", "s32", "u32", "s64", "u64"
|
|
TYPES_RE = "("+"|".join(TYPES)+")"
|
|
|
|
DEFINITION_SYNTAX = re.compile(r"""^
|
|
(?P<indent>\t*) # Indentation
|
|
((?P<var_assign>"""+VAR_CHARS+r""")=)? # Assign this struct a variable so the value can be back-referenced later
|
|
\[( # Start of struct information
|
|
( # A literal struct definition, as opposed to a variable back-reference
|
|
(A:(?P<address>0x[0-9a-fA-F]*"""+BIT+r"""),)? # Fixed address information, in hexadecimal. This is unnecessary for structs that directly follow the previous struct and is rarely used.
|
|
(L:(?P<length>[0-9]*"""+BIT+r""")) # The length of the struct, in decimal
|
|
)
|
|
|
|
|
(VAR:(?P<var_ref>"""+VAR_CHARS+r""")) # Back-reference to a previously scanned variable
|
|
)\] # End of struct information
|
|
\ -\ (?P<description>.*?) # Description for the struct
|
|
(,\ (?P<type>"""+TYPES_RE+r"""))? # Struct type
|
|
(,\ always\ (?P<assert>(.+?))\?)?$ # Assertion for the value, combine assertions with 'and' (useful for range checking etc)
|
|
""", re.VERBOSE)
|
|
|
|
Definition = namedtuple("Definition", ("var_assign", "address", "length", "var_ref", "description", "type", "asserts"))
|
|
Structure = namedtuple("Structure", ("description", "value"))
|
|
|
|
class StructParser:
|
|
def __init__(self, struct_defs):
|
|
"""
|
|
Set up the parser with the structure definitions.
|
|
Arguments:
|
|
struct_defs: A string of structure definitions in my custom format (currently unnamed), see the documentation of that for details.
|
|
"""
|
|
self._variables = {}
|
|
struct_defs = struct_defs.splitlines()
|
|
struct_defs = [re.search(DEFINITION_SYNTAX, struct).groupdict() for struct in struct_defs if re.search(DEFINITION_SYNTAX, struct) is not None]
|
|
|
|
self.defs = self._to_tree(iter(struct_defs))[0]
|
|
|
|
def parse(self, data):
|
|
"""
|
|
Parse the binary data, yielding structure objects.
|
|
|
|
Arguments:
|
|
data: The binary data to parse.
|
|
Yields:
|
|
Named structure tuples,
|
|
attributes:
|
|
description: The description from the structure definition used.
|
|
value: Parsed value of this structure occurrence in the binary data. The type of this is specified by the type specified in the structure definition.
|
|
Raises:
|
|
AssertionError if the value assertion per the structure definition is false.
|
|
|
|
"""
|
|
self._variables = {}
|
|
stream = BitStream(data)
|
|
yield from self._parse_struct_occurences(stream, self.defs)
|
|
|
|
if math.ceil(stream._read_offset / 8) != len(data):
|
|
print("\n\nWARNING: NOT FULLY PARSED\n\n")
|
|
|
|
def _to_tree(self, def_iter, stack_level=0, start_def=None):
|
|
current_stack = []
|
|
try:
|
|
if start_def is not None:
|
|
def_ = start_def
|
|
else:
|
|
def_ = next(def_iter)
|
|
|
|
while True:
|
|
if len(def_["indent"]) == stack_level:
|
|
def_tuple = self._to_def_tuple(def_)
|
|
current_stack.append((def_tuple, ()))
|
|
|
|
elif len(def_["indent"]) == stack_level+1:
|
|
# found a child of the previous
|
|
children, next_struct = self._to_tree(def_iter, stack_level+1, def_)
|
|
current_stack[-1] = current_stack[-1][0], children
|
|
if next_struct is None:
|
|
raise StopIteration
|
|
def_ = next_struct
|
|
continue
|
|
elif len(def_["indent"]) < stack_level:
|
|
# we're at ancestor level again, done with the children
|
|
return current_stack, def_
|
|
def_ = next(def_iter)
|
|
except StopIteration:
|
|
return current_stack, None
|
|
|
|
@staticmethod
|
|
def _to_def_tuple(def_):
|
|
if def_["address"] is not None:
|
|
split = def_["address"].split("BIT")
|
|
if split[0] != "":
|
|
bytes_ = int(split[0], 16)
|
|
else:
|
|
bytes_ = 0
|
|
if len(split) == 2:
|
|
bits = int(split[1])
|
|
else:
|
|
bits = 0
|
|
address_bits = bytes_ * 8 + bits
|
|
else:
|
|
address_bits = None
|
|
if def_["length"] is not None:
|
|
split = def_["length"].split("BIT")
|
|
if split[0] != "":
|
|
bytes_ = int(split[0])
|
|
else:
|
|
bytes_ = 0
|
|
if len(split) == 2:
|
|
bits = int(split[1])
|
|
else:
|
|
bits = 0
|
|
length_bits = bytes_ * 8 + bits
|
|
else:
|
|
length_bits = None
|
|
|
|
if def_["assert"] is not None:
|
|
asserts = def_["assert"].split(" and ")
|
|
else:
|
|
asserts = ()
|
|
|
|
if def_["var_ref"] is not None:
|
|
# if this is a variable reference we can save us the problem of finding a type
|
|
type_ = None
|
|
else:
|
|
if def_["type"] is not None:
|
|
if def_["type"] == "bytes":
|
|
type_ = bytes
|
|
elif def_["type"] == "string":
|
|
type_ = str, 1
|
|
elif def_["type"] == "wstring":
|
|
type_ = str, 2
|
|
if def_["type"] in ("char", "wchar", "float", "double"):
|
|
type_ = vars(ctypes)["c_"+def_["type"]]
|
|
# the rest of types are in the format (s|u)<bitlength>
|
|
elif def_["type"].startswith("s"):
|
|
type_ = vars(ctypes)["c_int"+def_["type"][1:]]
|
|
elif def_["type"].startswith("u"):
|
|
type_ = vars(ctypes)["c_uint"+def_["type"][1:]]
|
|
else:
|
|
# try to find a type based on the length
|
|
if length_bits == 1:
|
|
type_ = c_bit
|
|
elif length_bits == 8:
|
|
type_ = ctypes.c_byte
|
|
elif length_bits == 16:
|
|
type_ = ctypes.c_short
|
|
elif length_bits == 32:
|
|
type_ = ctypes.c_int
|
|
elif length_bits == 64:
|
|
type_ = ctypes.c_int64
|
|
else:
|
|
if length_bits % 8 == 0:
|
|
type_ = bytes
|
|
else:
|
|
raise ValueError(def_, length_bits)
|
|
|
|
return Definition(def_["var_assign"], address_bits, length_bits, def_["var_ref"], def_["description"], type_, asserts)
|
|
|
|
def _parse_struct_occurences(self, stream, defs, stack_level=0, repeat_times=1):
|
|
if len(defs) == 0:
|
|
return
|
|
|
|
for _ in range(repeat_times):
|
|
for def_, children in defs:
|
|
if def_.var_ref is not None:
|
|
value = self._variables[def_.var_ref]
|
|
else:
|
|
if def_.address != None:
|
|
stream._read_offset = def_.address
|
|
|
|
if type(def_.type) == tuple:
|
|
type_ = def_.type[0]
|
|
if type_ == str:
|
|
value = stream.read(str, char_size=def_.type[1], allocated_length=def_.length // 8)
|
|
elif def_.type == bytes:
|
|
value = stream.read(bytes, length=def_.length // 8)
|
|
else:
|
|
value = stream.read(def_.type)
|
|
self._assert_value(value, def_)
|
|
|
|
if def_.var_assign is not None:
|
|
self._variables[def_.var_assign] = value
|
|
|
|
yield Structure(def_.description, value)
|
|
|
|
yield from self._parse_struct_occurences(stream, children, stack_level+1, value)
|
|
|
|
def _assert_value(self, value, def_):
|
|
for expression in def_.asserts:
|
|
try:
|
|
globals_ = {}
|
|
globals_["__builtins__"] = {}
|
|
globals_.update(self._variables)
|
|
assert eval(str(value)+" "+expression, globals_), (value, expression) # definitely not safe, fwiw
|
|
except AssertionError:
|
|
print("ASSERTION ERROR:", str(value), "IS NOT", expression)
|
|
print("DEFINITION INFO:", def_.description)
|
|
raise
|
|
|
|
|
|
def main():
|
|
argparser = argparse.ArgumentParser(description=__doc__)
|
|
argparser.add_argument("filepath", help="path of binary file")
|
|
argparser.add_argument("definition", help="struct definition file path to parse with")
|
|
args = argparser.parse_args()
|
|
|
|
with open(args.definition) as file:
|
|
defs = file.read()
|
|
|
|
parser = StructParser(defs)
|
|
|
|
with open(args.filepath, "rb") as file:
|
|
for structure in parser.parse(file.read()):
|
|
print(structure)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|