2015-03-31 18:03:55 +00:00
"""
Module for parsing binary data into structs .
"""
import argparse
import re
from collections import namedtuple
2015-09-01 18:50:21 +00:00
from pyraknet . bitstream import BitStream , c_bit , c_float , c_double , c_int8 , c_uint8 , c_int16 , c_uint16 , c_int32 , c_uint32 , c_int64 , c_uint64
2015-03-31 18:03:55 +00:00
VAR_CHARS = r " [^ \ t \ [ \ ]]+ "
BIT = r " (BIT[0-7])? "
2015-09-01 18:50:21 +00:00
BITSTREAM_TYPES = { " bytes " : bytes , " string " : ( str , 1 ) , " wstring " : ( str , 2 ) , " float " : c_float , " double " : c_double , " s8 " : c_int8 , " u8 " : c_uint8 , " s16 " : c_int16 , " u16 " : c_uint16 , " s32 " : c_int32 , " u32 " : c_uint32 , " s64 " : c_int64 , " u64 " : c_uint64 }
TYPES_RE = " ( " + " | " . join ( BITSTREAM_TYPES . keys ( ) ) + " ) "
2015-03-31 18:03:55 +00:00
DEFINITION_SYNTAX = re . compile ( r """ ^
2015-06-02 18:09:11 +00:00
( ? P < indent > \t * ) # Indentation
( ( ? P < var_assign > """ +VAR_CHARS+r """ ) = ) ? # Assign this struct a variable so the value can be back-referenced later
\[ ( # Start of struct information
( # A literal struct definition
( A : ( ? P < address > 0 x [ 0 - 9 a - fA - F ] * """ +BIT+r """ ) , ) ? # Fixed address information, in hexadecimal. This is unnecessary for structs that directly follow the previous struct and is rarely used.
( L : ( ? P < length > [ 0 - 9 ] * """ +BIT+r """ ) ) # The length of the struct, in decimal
2015-03-31 18:03:55 +00:00
)
|
2015-06-02 18:09:11 +00:00
( EVAL : ( ? P < eval > . + ) ) # Expression to be evaluated, evaluated value acts like struct value, usually used for variables
) \] # End of struct information
( \ - \ ( ? P < description > . * ? ) # Description for the struct
( , \ ( ? P < type > """ +TYPES_RE+r """ ) ) ? # Struct type
( , \ expect \ ( ? P < expect > ( . + ? ) ) ) ? # Expect the value to be like this expression. Struct attribute 'unexpected' will be None if no expects, True if any expects are False, or False if all expects are True.
( , \ assert \ ( ? P < assert > ( . + ? ) ) ) ? # Assert the value to be like this expression, will raise AssertionError if not True.
) ? $
2015-03-31 18:03:55 +00:00
""" , re.VERBOSE)
2015-06-02 18:09:11 +00:00
Definition = namedtuple ( " Definition " , ( " var_assign " , " address " , " length " , " eval " , " description " , " type " , " expects " , " asserts " ) )
2015-06-03 18:36:11 +00:00
Structure = namedtuple ( " Structure " , ( " level " , " description " , " value " , " unexpected " ) )
2015-03-31 18:03:55 +00:00
class StructParser :
def __init__ ( self , struct_defs ) :
"""
Set up the parser with the structure definitions .
Arguments :
struct_defs : A string of structure definitions in my custom format ( currently unnamed ) , see the documentation of that for details .
"""
self . _variables = { }
struct_defs = struct_defs . splitlines ( )
2015-06-02 18:09:11 +00:00
struct_defs = [ re . search ( DEFINITION_SYNTAX , struct ) . groupdict ( ) for struct in struct_defs if re . search ( DEFINITION_SYNTAX , struct ) is not None ] # Filter out lines not matching the syntax
2015-03-31 18:03:55 +00:00
self . defs = self . _to_tree ( iter ( struct_defs ) ) [ 0 ]
2015-06-02 18:09:11 +00:00
def parse ( self , data , variables = None ) :
2015-03-31 18:03:55 +00:00
"""
Parse the binary data , yielding structure objects .
Arguments :
data : The binary data to parse .
2015-08-23 16:29:54 +00:00
variables : A dict of variables to be used in checks as defined by the structure definition , such as expects or asserts .
2015-03-31 18:03:55 +00:00
Yields :
Named structure tuples ,
attributes :
2015-06-03 18:36:11 +00:00
level : The indentation level from the structure definition .
description : The description from the structure definition .
2015-03-31 18:03:55 +00:00
value : Parsed value of this structure occurrence in the binary data . The type of this is specified by the type specified in the structure definition .
2015-06-02 18:09:11 +00:00
unexpected : None if no expects defined , True if any expects are False , False if all expects are True .
2015-03-31 18:03:55 +00:00
Raises :
2015-06-02 18:09:11 +00:00
AssertionError if any assert is False .
2015-03-31 18:03:55 +00:00
"""
2015-06-02 18:09:11 +00:00
if variables is None :
variables = { }
self . _variables = variables
if isinstance ( data , BitStream ) :
stream = data
else :
stream = BitStream ( data )
yield from self . _parse_struct_occurrences ( stream , self . defs )
2015-03-31 18:03:55 +00:00
def _to_tree ( self , def_iter , stack_level = 0 , start_def = None ) :
2015-06-02 18:09:11 +00:00
current_level = [ ]
2015-03-31 18:03:55 +00:00
try :
if start_def is not None :
def_ = start_def
else :
def_ = next ( def_iter )
while True :
if len ( def_ [ " indent " ] ) == stack_level :
def_tuple = self . _to_def_tuple ( def_ )
2015-06-02 18:09:11 +00:00
current_level . append ( ( def_tuple , ( ) ) )
def_ = next ( def_iter )
2015-03-31 18:03:55 +00:00
elif len ( def_ [ " indent " ] ) == stack_level + 1 :
# found a child of the previous
children , next_struct = self . _to_tree ( def_iter , stack_level + 1 , def_ )
2015-06-02 18:09:11 +00:00
current_level [ - 1 ] = current_level [ - 1 ] [ 0 ] , children
2015-03-31 18:03:55 +00:00
if next_struct is None :
raise StopIteration
def_ = next_struct
elif len ( def_ [ " indent " ] ) < stack_level :
# we're at ancestor level again, done with the children
2015-06-02 18:09:11 +00:00
return current_level , def_
2015-03-31 18:03:55 +00:00
except StopIteration :
2015-06-02 18:09:11 +00:00
return current_level , None
2015-03-31 18:03:55 +00:00
@staticmethod
def _to_def_tuple ( def_ ) :
if def_ [ " address " ] is not None :
split = def_ [ " address " ] . split ( " BIT " )
if split [ 0 ] != " " :
bytes_ = int ( split [ 0 ] , 16 )
else :
bytes_ = 0
if len ( split ) == 2 :
bits = int ( split [ 1 ] )
else :
bits = 0
address_bits = bytes_ * 8 + bits
else :
address_bits = None
if def_ [ " length " ] is not None :
split = def_ [ " length " ] . split ( " BIT " )
if split [ 0 ] != " " :
bytes_ = int ( split [ 0 ] )
else :
bytes_ = 0
if len ( split ) == 2 :
bits = int ( split [ 1 ] )
else :
bits = 0
length_bits = bytes_ * 8 + bits
else :
length_bits = None
2015-06-02 18:09:11 +00:00
if def_ [ " eval " ] is not None :
2015-08-23 16:29:54 +00:00
eval_ = compile ( def_ [ " eval " ] , " <eval> " , " eval " )
2015-03-31 18:03:55 +00:00
type_ = None
else :
2015-08-23 16:29:54 +00:00
eval_ = None
2015-03-31 18:03:55 +00:00
if def_ [ " type " ] is not None :
2015-09-01 18:50:21 +00:00
type_ = BITSTREAM_TYPES [ def_ [ " type " ] ]
2015-03-31 18:03:55 +00:00
else :
# try to find a type based on the length
if length_bits == 1 :
type_ = c_bit
elif length_bits == 8 :
2015-09-01 18:50:21 +00:00
type_ = c_int8
2015-03-31 18:03:55 +00:00
elif length_bits == 16 :
2015-09-01 18:50:21 +00:00
type_ = c_int16
2015-03-31 18:03:55 +00:00
elif length_bits == 32 :
2015-09-01 18:50:21 +00:00
type_ = c_int32
2015-03-31 18:03:55 +00:00
elif length_bits == 64 :
2015-09-01 18:50:21 +00:00
type_ = c_int64
2015-03-31 18:03:55 +00:00
else :
if length_bits % 8 == 0 :
type_ = bytes
else :
raise ValueError ( def_ , length_bits )
2015-08-23 16:29:54 +00:00
if def_ [ " expect " ] is not None :
expects = [ compile ( " value " + i , " <expect> " , " eval " ) for i in def_ [ " expect " ] . split ( " and " ) ]
else :
expects = ( )
if def_ [ " assert " ] is not None :
asserts = [ compile ( " value " + i , " <assert> " , " eval " ) for i in def_ [ " assert " ] . split ( " and " ) ]
else :
asserts = ( )
return Definition ( def_ [ " var_assign " ] , address_bits , length_bits , eval_ , def_ [ " description " ] , type_ , expects , asserts )
2015-03-31 18:03:55 +00:00
2015-06-02 18:09:11 +00:00
def _parse_struct_occurrences ( self , stream , defs , stack_level = 0 , repeat_times = 1 ) :
2015-03-31 18:03:55 +00:00
for _ in range ( repeat_times ) :
for def_ , children in defs :
2015-06-02 18:09:11 +00:00
if def_ . eval is not None :
value = self . _eval ( def_ . eval )
2015-03-31 18:03:55 +00:00
else :
if def_ . address != None :
stream . _read_offset = def_ . address
2015-06-02 18:09:11 +00:00
if isinstance ( def_ . type , tuple ) :
2015-03-31 18:03:55 +00:00
type_ = def_ . type [ 0 ]
if type_ == str :
value = stream . read ( str , char_size = def_ . type [ 1 ] , allocated_length = def_ . length / / 8 )
elif def_ . type == bytes :
value = stream . read ( bytes , length = def_ . length / / 8 )
else :
value = stream . read ( def_ . type )
2015-06-02 18:09:11 +00:00
if def_ . expects :
for expression in def_ . expects :
2015-08-23 16:29:54 +00:00
if not self . _eval ( expression , value ) :
2015-06-02 18:09:11 +00:00
unexpected = True
break
else :
unexpected = False
else :
unexpected = None
for expression in def_ . asserts :
2015-08-23 16:29:54 +00:00
assert self . _eval ( expression , value ) , ( value , expression , def_ )
2015-03-31 18:03:55 +00:00
if def_ . var_assign is not None :
self . _variables [ def_ . var_assign ] = value
2015-06-03 18:36:11 +00:00
yield Structure ( stack_level , def_ . description , value , unexpected )
2015-06-02 18:09:11 +00:00
if children :
yield from self . _parse_struct_occurrences ( stream , children , stack_level + 1 , value )
2015-03-31 18:03:55 +00:00
2015-08-23 16:29:54 +00:00
def _eval ( self , expression , value = None ) :
globals_ = { " __builtins__ " : { } , " value " : value }
2015-06-02 18:09:11 +00:00
globals_ . update ( self . _variables )
return eval ( expression , globals_ ) # definitely not safe, fwiw
2015-03-31 18:03:55 +00:00
2015-08-23 16:29:54 +00:00
if __name__ == " __main__ " :
2015-03-31 18:03:55 +00:00
argparser = argparse . ArgumentParser ( description = __doc__ )
argparser . add_argument ( " filepath " , help = " path of binary file " )
argparser . add_argument ( " definition " , help = " struct definition file path to parse with " )
args = argparser . parse_args ( )
with open ( args . definition ) as file :
defs = file . read ( )
parser = StructParser ( defs )
with open ( args . filepath , " rb " ) as file :
for structure in parser . parse ( file . read ( ) ) :
print ( structure )