2015-03-31 18:03:55 +00:00
"""
Module for parsing binary data into structs .
"""
import argparse
import re
from collections import namedtuple
2017-12-25 12:43:33 +00:00
from pyraknet . bitstream import c_bit , c_float , c_double , c_int8 , c_uint8 , c_int16 , c_uint16 , c_int32 , c_uint32 , c_int64 , c_uint64 , ReadStream
2015-03-31 18:03:55 +00:00
VAR_CHARS = r " [^ \ t \ [ \ ]]+ "
2015-09-30 16:00:42 +00:00
DEFINITION_SYNTAX = re . compile ( r """
^ ( ? P < indent > \t * ) # Indentation
( if \ ( ? P < if_condition > . + ) :
|
while \ ( ? P < while_condition > . + ) :
|
( ? P < break > break )
|
( ( ? P < var_assign > """ +VAR_CHARS+r """ ) = ) ? # Assign this struct a variable so the value can be back-referenced later
\[
2016-07-23 09:39:29 +00:00
( ? P < type > . * ) # Struct type
2015-09-30 16:00:42 +00:00
\]
\ - \ ( ? P < description > . * ? ) # Description for the struct
( , \ expect \ ( ? P < expect > ( . + ? ) ) ) ? # Expect the value to be like this expression. Struct attribute 'unexpected' will be None if no expects, True if any expects are False, or False if all expects are True.
( , \ assert \ ( ? P < assert > ( . + ? ) ) ) ? # Assert the value to be like this expression, will raise AssertionError if not True.
) $
2015-03-31 18:03:55 +00:00
""" , re.VERBOSE)
2015-09-30 16:00:42 +00:00
IfStatement = namedtuple ( " IfStatement " , ( " condition " , ) )
WhileStatement = namedtuple ( " WhileStatement " , ( " condition " , ) )
BreakStatement = namedtuple ( " BreakStatement " , ( ) )
StructDefinition = namedtuple ( " struct_token " , ( " var_assign " , " type " , " description " , " expects " , " asserts " ) )
2015-06-03 18:36:11 +00:00
Structure = namedtuple ( " Structure " , ( " level " , " description " , " value " , " unexpected " ) )
2015-03-31 18:03:55 +00:00
class StructParser :
2016-07-23 09:39:29 +00:00
def __init__ ( self , struct_defs , type_handlers = { } ) :
2015-03-31 18:03:55 +00:00
"""
Set up the parser with the structure definitions .
Arguments :
struct_defs : A string of structure definitions in my custom format ( currently unnamed ) , see the documentation of that for details .
2016-07-23 09:39:29 +00:00
type_handlers : Parsing handlers for custom types , provided as { " type " : handler_func } .
2015-03-31 18:03:55 +00:00
"""
self . _variables = { }
struct_defs = struct_defs . splitlines ( )
2015-06-02 18:09:11 +00:00
struct_defs = [ re . search ( DEFINITION_SYNTAX , struct ) . groupdict ( ) for struct in struct_defs if re . search ( DEFINITION_SYNTAX , struct ) is not None ] # Filter out lines not matching the syntax
2015-03-31 18:03:55 +00:00
self . defs = self . _to_tree ( iter ( struct_defs ) ) [ 0 ]
2016-07-23 09:39:29 +00:00
self . _type_handlers = { }
self . _type_handlers [ " bit " ] = lambda stream : stream . read ( c_bit )
self . _type_handlers [ " float " ] = lambda stream : stream . read ( c_float )
self . _type_handlers [ " double " ] = lambda stream : stream . read ( c_double )
self . _type_handlers [ " s8 " ] = lambda stream : stream . read ( c_int8 )
self . _type_handlers [ " u8 " ] = lambda stream : stream . read ( c_uint8 )
self . _type_handlers [ " s16 " ] = lambda stream : stream . read ( c_int16 )
self . _type_handlers [ " u16 " ] = lambda stream : stream . read ( c_uint16 )
self . _type_handlers [ " s32 " ] = lambda stream : stream . read ( c_int32 )
self . _type_handlers [ " u32 " ] = lambda stream : stream . read ( c_uint32 )
self . _type_handlers [ " s64 " ] = lambda stream : stream . read ( c_int64 )
self . _type_handlers [ " u64 " ] = lambda stream : stream . read ( c_uint64 )
# string types
2017-06-10 10:29:37 +00:00
self . _type_handlers [ " u8-string " ] = lambda stream : stream . read ( bytes , length_type = c_uint8 )
self . _type_handlers [ " u16-string " ] = lambda stream : stream . read ( bytes , length_type = c_uint16 )
2016-07-23 09:39:29 +00:00
2017-06-10 10:29:37 +00:00
self . _type_handlers [ " u8-wstring " ] = lambda stream : stream . read ( str , length_type = c_uint8 )
self . _type_handlers [ " u16-wstring " ] = lambda stream : stream . read ( str , length_type = c_uint16 )
2016-07-23 09:39:29 +00:00
self . _type_handlers . update ( type_handlers )
2015-06-02 18:09:11 +00:00
def parse ( self , data , variables = None ) :
2015-03-31 18:03:55 +00:00
"""
Parse the binary data , yielding structure objects .
Arguments :
data : The binary data to parse .
2015-08-23 16:29:54 +00:00
variables : A dict of variables to be used in checks as defined by the structure definition , such as expects or asserts .
2015-03-31 18:03:55 +00:00
Yields :
Named structure tuples ,
attributes :
2015-06-03 18:36:11 +00:00
level : The indentation level from the structure definition .
description : The description from the structure definition .
2015-03-31 18:03:55 +00:00
value : Parsed value of this structure occurrence in the binary data . The type of this is specified by the type specified in the structure definition .
2015-06-02 18:09:11 +00:00
unexpected : None if no expects defined , True if any expects are False , False if all expects are True .
2015-03-31 18:03:55 +00:00
Raises :
2015-06-02 18:09:11 +00:00
AssertionError if any assert is False .
2015-03-31 18:03:55 +00:00
"""
2015-06-02 18:09:11 +00:00
if variables is None :
variables = { }
self . _variables = variables
2017-12-25 12:43:33 +00:00
if isinstance ( data , ReadStream ) :
2015-06-02 18:09:11 +00:00
stream = data
else :
2017-12-25 12:43:33 +00:00
stream = ReadStream ( data )
2015-06-02 18:09:11 +00:00
yield from self . _parse_struct_occurrences ( stream , self . defs )
2015-03-31 18:03:55 +00:00
def _to_tree ( self , def_iter , stack_level = 0 , start_def = None ) :
2015-06-02 18:09:11 +00:00
current_level = [ ]
2015-03-31 18:03:55 +00:00
try :
if start_def is not None :
def_ = start_def
else :
def_ = next ( def_iter )
while True :
if len ( def_ [ " indent " ] ) == stack_level :
def_tuple = self . _to_def_tuple ( def_ )
2015-06-02 18:09:11 +00:00
current_level . append ( ( def_tuple , ( ) ) )
def_ = next ( def_iter )
2015-03-31 18:03:55 +00:00
elif len ( def_ [ " indent " ] ) == stack_level + 1 :
# found a child of the previous
children , next_struct = self . _to_tree ( def_iter , stack_level + 1 , def_ )
2015-06-02 18:09:11 +00:00
current_level [ - 1 ] = current_level [ - 1 ] [ 0 ] , children
2015-03-31 18:03:55 +00:00
if next_struct is None :
raise StopIteration
def_ = next_struct
elif len ( def_ [ " indent " ] ) < stack_level :
# we're at ancestor level again, done with the children
2015-06-02 18:09:11 +00:00
return current_level , def_
2015-03-31 18:03:55 +00:00
except StopIteration :
2015-06-02 18:09:11 +00:00
return current_level , None
2015-03-31 18:03:55 +00:00
@staticmethod
def _to_def_tuple ( def_ ) :
2015-09-30 16:00:42 +00:00
if def_ [ " if_condition " ] is not None :
condition = compile ( def_ [ " if_condition " ] , " <if_condition> " , " eval " )
return IfStatement ( condition )
if def_ [ " while_condition " ] is not None :
condition = compile ( def_ [ " while_condition " ] , " <while_condition> " , " eval " )
return WhileStatement ( condition )
if def_ [ " break " ] is not None :
return BreakStatement ( )
2016-07-23 09:39:29 +00:00
type_ = def_ [ " type " ]
2015-03-31 18:03:55 +00:00
2015-08-23 16:29:54 +00:00
if def_ [ " expect " ] is not None :
expects = [ compile ( " value " + i , " <expect> " , " eval " ) for i in def_ [ " expect " ] . split ( " and " ) ]
else :
expects = ( )
if def_ [ " assert " ] is not None :
asserts = [ compile ( " value " + i , " <assert> " , " eval " ) for i in def_ [ " assert " ] . split ( " and " ) ]
else :
asserts = ( )
2015-09-30 16:00:42 +00:00
return StructDefinition ( def_ [ " var_assign " ] , type_ , def_ [ " description " ] , expects , asserts )
2015-03-31 18:03:55 +00:00
2015-06-02 18:09:11 +00:00
def _parse_struct_occurrences ( self , stream , defs , stack_level = 0 , repeat_times = 1 ) :
2015-03-31 18:03:55 +00:00
for _ in range ( repeat_times ) :
for def_ , children in defs :
2015-09-30 16:00:42 +00:00
if isinstance ( def_ , IfStatement ) :
if children and self . _eval ( def_ . condition ) :
break_ = yield from self . _parse_struct_occurrences ( stream , children , stack_level + 1 )
if break_ :
return True
elif isinstance ( def_ , WhileStatement ) :
if children :
while self . _eval ( def_ . condition ) :
break_ = yield from self . _parse_struct_occurrences ( stream , children , stack_level + 1 )
if break_ :
break
elif isinstance ( def_ , BreakStatement ) :
return True
2015-03-31 18:03:55 +00:00
else :
2016-07-23 09:39:29 +00:00
value = self . _type_handlers [ def_ . type ] ( stream )
2015-06-02 18:09:11 +00:00
if def_ . expects :
for expression in def_ . expects :
2015-08-23 16:29:54 +00:00
if not self . _eval ( expression , value ) :
2015-06-02 18:09:11 +00:00
unexpected = True
break
else :
unexpected = False
else :
unexpected = None
for expression in def_ . asserts :
2015-08-23 16:29:54 +00:00
assert self . _eval ( expression , value ) , ( value , expression , def_ )
2015-03-31 18:03:55 +00:00
if def_ . var_assign is not None :
self . _variables [ def_ . var_assign ] = value
2015-06-03 18:36:11 +00:00
yield Structure ( stack_level , def_ . description , value , unexpected )
2015-06-02 18:09:11 +00:00
2015-09-30 16:00:42 +00:00
if children and value :
break_ = yield from self . _parse_struct_occurrences ( stream , children , stack_level + 1 , value )
if break_ :
return True
2015-03-31 18:03:55 +00:00
2015-08-23 16:29:54 +00:00
def _eval ( self , expression , value = None ) :
globals_ = { " __builtins__ " : { } , " value " : value }
2015-06-02 18:09:11 +00:00
globals_ . update ( self . _variables )
return eval ( expression , globals_ ) # definitely not safe, fwiw
2015-03-31 18:03:55 +00:00
2015-08-23 16:29:54 +00:00
if __name__ == " __main__ " :
2015-03-31 18:03:55 +00:00
argparser = argparse . ArgumentParser ( description = __doc__ )
argparser . add_argument ( " filepath " , help = " path of binary file " )
argparser . add_argument ( " definition " , help = " struct definition file path to parse with " )
args = argparser . parse_args ( )
with open ( args . definition ) as file :
defs = file . read ( )
parser = StructParser ( defs )
with open ( args . filepath , " rb " ) as file :
for structure in parser . parse ( file . read ( ) ) :
print ( structure )