mirror of
https://github.com/lcdr/utils.git
synced 2024-08-30 17:32:16 +00:00
Backwards-incompatible changes.
Changed: Structure definition syntax changes: * The VAR: variable reference has been replaced by EVAL: which allows you to use any expression, even involving other variables. The result of the expression is used as the value (e.g. loop length). * The "always ...?" syntax for defining fatal asserts has been changed to "assert ..." * The "expect ..." syntax for non-fatal asserts has been introduced. Parser changes: * The Structure tuples yielded by parse now include a parameter "unexpected" that will be None if no expects are defined, True if any expects are False, or False if all expects are True. * It is now possible to pass pre-defined variables to parse.
This commit is contained in:
parent
993e2d3211
commit
780fd4e614
120
structparser.py
120
structparser.py
@ -3,7 +3,6 @@ Module for parsing binary data into structs.
|
||||
"""
|
||||
import argparse
|
||||
import ctypes
|
||||
import math
|
||||
import re
|
||||
from collections import namedtuple
|
||||
|
||||
@ -15,23 +14,25 @@ TYPES = "bytes", "string", "wstring", "char", "wchar", "float", "double", "s8",
|
||||
TYPES_RE = "("+"|".join(TYPES)+")"
|
||||
|
||||
DEFINITION_SYNTAX = re.compile(r"""^
|
||||
(?P<indent>\t*) # Indentation
|
||||
((?P<var_assign>"""+VAR_CHARS+r""")=)? # Assign this struct a variable so the value can be back-referenced later
|
||||
\[( # Start of struct information
|
||||
( # A literal struct definition, as opposed to a variable back-reference
|
||||
(A:(?P<address>0x[0-9a-fA-F]*"""+BIT+r"""),)? # Fixed address information, in hexadecimal. This is unnecessary for structs that directly follow the previous struct and is rarely used.
|
||||
(L:(?P<length>[0-9]*"""+BIT+r""")) # The length of the struct, in decimal
|
||||
(?P<indent>\t*) # Indentation
|
||||
((?P<var_assign>"""+VAR_CHARS+r""")=)? # Assign this struct a variable so the value can be back-referenced later
|
||||
\[( # Start of struct information
|
||||
( # A literal struct definition
|
||||
(A:(?P<address>0x[0-9a-fA-F]*"""+BIT+r"""),)? # Fixed address information, in hexadecimal. This is unnecessary for structs that directly follow the previous struct and is rarely used.
|
||||
(L:(?P<length>[0-9]*"""+BIT+r""")) # The length of the struct, in decimal
|
||||
)
|
||||
|
|
||||
(VAR:(?P<var_ref>"""+VAR_CHARS+r""")) # Back-reference to a previously scanned variable
|
||||
)\] # End of struct information
|
||||
\ -\ (?P<description>.*?) # Description for the struct
|
||||
(,\ (?P<type>"""+TYPES_RE+r"""))? # Struct type
|
||||
(,\ always\ (?P<assert>(.+?))\?)?$ # Assertion for the value, combine assertions with 'and' (useful for range checking etc)
|
||||
(EVAL:(?P<eval>.+)) # Expression to be evaluated, evaluated value acts like struct value, usually used for variables
|
||||
)\] # End of struct information
|
||||
(\ -\ (?P<description>.*?) # Description for the struct
|
||||
(,\ (?P<type>"""+TYPES_RE+r"""))? # Struct type
|
||||
(,\ expect\ (?P<expect>(.+?)))? # Expect the value to be like this expression. Struct attribute 'unexpected' will be None if no expects, True if any expects are False, or False if all expects are True.
|
||||
(,\ assert\ (?P<assert>(.+?)))? # Assert the value to be like this expression, will raise AssertionError if not True.
|
||||
)?$
|
||||
""", re.VERBOSE)
|
||||
|
||||
Definition = namedtuple("Definition", ("var_assign", "address", "length", "var_ref", "description", "type", "asserts"))
|
||||
Structure = namedtuple("Structure", ("description", "value"))
|
||||
Definition = namedtuple("Definition", ("var_assign", "address", "length", "eval", "description", "type", "expects", "asserts"))
|
||||
Structure = namedtuple("Structure", ("description", "value", "unexpected"))
|
||||
|
||||
class StructParser:
|
||||
def __init__(self, struct_defs):
|
||||
@ -42,11 +43,11 @@ class StructParser:
|
||||
"""
|
||||
self._variables = {}
|
||||
struct_defs = struct_defs.splitlines()
|
||||
struct_defs = [re.search(DEFINITION_SYNTAX, struct).groupdict() for struct in struct_defs if re.search(DEFINITION_SYNTAX, struct) is not None]
|
||||
struct_defs = [re.search(DEFINITION_SYNTAX, struct).groupdict() for struct in struct_defs if re.search(DEFINITION_SYNTAX, struct) is not None] # Filter out lines not matching the syntax
|
||||
|
||||
self.defs = self._to_tree(iter(struct_defs))[0]
|
||||
|
||||
def parse(self, data):
|
||||
def parse(self, data, variables=None):
|
||||
"""
|
||||
Parse the binary data, yielding structure objects.
|
||||
|
||||
@ -57,19 +58,22 @@ class StructParser:
|
||||
attributes:
|
||||
description: The description from the structure definition used.
|
||||
value: Parsed value of this structure occurrence in the binary data. The type of this is specified by the type specified in the structure definition.
|
||||
unexpected: None if no expects defined, True if any expects are False, False if all expects are True.
|
||||
Raises:
|
||||
AssertionError if the value assertion per the structure definition is false.
|
||||
AssertionError if any assert is False.
|
||||
|
||||
"""
|
||||
self._variables = {}
|
||||
stream = BitStream(data)
|
||||
yield from self._parse_struct_occurences(stream, self.defs)
|
||||
|
||||
if math.ceil(stream._read_offset / 8) != len(data):
|
||||
print("\n\nWARNING: NOT FULLY PARSED\n\n")
|
||||
if variables is None:
|
||||
variables = {}
|
||||
self._variables = variables
|
||||
if isinstance(data, BitStream):
|
||||
stream = data
|
||||
else:
|
||||
stream = BitStream(data)
|
||||
yield from self._parse_struct_occurrences(stream, self.defs)
|
||||
|
||||
def _to_tree(self, def_iter, stack_level=0, start_def=None):
|
||||
current_stack = []
|
||||
current_level = []
|
||||
try:
|
||||
if start_def is not None:
|
||||
def_ = start_def
|
||||
@ -79,22 +83,20 @@ class StructParser:
|
||||
while True:
|
||||
if len(def_["indent"]) == stack_level:
|
||||
def_tuple = self._to_def_tuple(def_)
|
||||
current_stack.append((def_tuple, ()))
|
||||
|
||||
current_level.append((def_tuple, ()))
|
||||
def_ = next(def_iter)
|
||||
elif len(def_["indent"]) == stack_level+1:
|
||||
# found a child of the previous
|
||||
children, next_struct = self._to_tree(def_iter, stack_level+1, def_)
|
||||
current_stack[-1] = current_stack[-1][0], children
|
||||
current_level[-1] = current_level[-1][0], children
|
||||
if next_struct is None:
|
||||
raise StopIteration
|
||||
def_ = next_struct
|
||||
continue
|
||||
elif len(def_["indent"]) < stack_level:
|
||||
# we're at ancestor level again, done with the children
|
||||
return current_stack, def_
|
||||
def_ = next(def_iter)
|
||||
return current_level, def_
|
||||
except StopIteration:
|
||||
return current_stack, None
|
||||
return current_level, None
|
||||
|
||||
@staticmethod
|
||||
def _to_def_tuple(def_):
|
||||
@ -125,13 +127,17 @@ class StructParser:
|
||||
else:
|
||||
length_bits = None
|
||||
|
||||
if def_["expect"] is not None:
|
||||
expects = def_["expect"].split(" and ")
|
||||
else:
|
||||
expects = ()
|
||||
if def_["assert"] is not None:
|
||||
asserts = def_["assert"].split(" and ")
|
||||
else:
|
||||
asserts = ()
|
||||
|
||||
if def_["var_ref"] is not None:
|
||||
# if this is a variable reference we can save us the problem of finding a type
|
||||
if def_["eval"] is not None:
|
||||
# if this is an eval we can save us the problem of finding a type
|
||||
type_ = None
|
||||
else:
|
||||
if def_["type"] is not None:
|
||||
@ -166,21 +172,18 @@ class StructParser:
|
||||
else:
|
||||
raise ValueError(def_, length_bits)
|
||||
|
||||
return Definition(def_["var_assign"], address_bits, length_bits, def_["var_ref"], def_["description"], type_, asserts)
|
||||
|
||||
def _parse_struct_occurences(self, stream, defs, stack_level=0, repeat_times=1):
|
||||
if len(defs) == 0:
|
||||
return
|
||||
return Definition(def_["var_assign"], address_bits, length_bits, def_["eval"], def_["description"], type_, expects, asserts)
|
||||
|
||||
def _parse_struct_occurrences(self, stream, defs, stack_level=0, repeat_times=1):
|
||||
for _ in range(repeat_times):
|
||||
for def_, children in defs:
|
||||
if def_.var_ref is not None:
|
||||
value = self._variables[def_.var_ref]
|
||||
if def_.eval is not None:
|
||||
value = self._eval(def_.eval)
|
||||
else:
|
||||
if def_.address != None:
|
||||
stream._read_offset = def_.address
|
||||
|
||||
if type(def_.type) == tuple:
|
||||
if isinstance(def_.type, tuple):
|
||||
type_ = def_.type[0]
|
||||
if type_ == str:
|
||||
value = stream.read(str, char_size=def_.type[1], allocated_length=def_.length // 8)
|
||||
@ -188,26 +191,31 @@ class StructParser:
|
||||
value = stream.read(bytes, length=def_.length // 8)
|
||||
else:
|
||||
value = stream.read(def_.type)
|
||||
self._assert_value(value, def_)
|
||||
|
||||
if def_.expects:
|
||||
for expression in def_.expects:
|
||||
if not self._eval(str(value)+" "+expression):
|
||||
unexpected = True
|
||||
break
|
||||
else:
|
||||
unexpected = False
|
||||
else:
|
||||
unexpected = None
|
||||
|
||||
for expression in def_.asserts:
|
||||
assert self._eval(str(value)+" "+expression), (value, expression, def_)
|
||||
|
||||
if def_.var_assign is not None:
|
||||
self._variables[def_.var_assign] = value
|
||||
yield Structure(def_.description, value, unexpected)
|
||||
|
||||
yield Structure(def_.description, value)
|
||||
if children:
|
||||
yield from self._parse_struct_occurrences(stream, children, stack_level+1, value)
|
||||
|
||||
yield from self._parse_struct_occurences(stream, children, stack_level+1, value)
|
||||
|
||||
def _assert_value(self, value, def_):
|
||||
for expression in def_.asserts:
|
||||
try:
|
||||
globals_ = {}
|
||||
globals_["__builtins__"] = {}
|
||||
globals_.update(self._variables)
|
||||
assert eval(str(value)+" "+expression, globals_), (value, expression) # definitely not safe, fwiw
|
||||
except AssertionError:
|
||||
print("ASSERTION ERROR:", str(value), "IS NOT", expression)
|
||||
print("DEFINITION INFO:", def_.description)
|
||||
raise
|
||||
def _eval(self, expression):
|
||||
globals_ = {"__builtins__":{}}
|
||||
globals_.update(self._variables)
|
||||
return eval(expression, globals_) # definitely not safe, fwiw
|
||||
|
||||
|
||||
def main():
|
||||
|
Loading…
Reference in New Issue
Block a user