From 375972f14d1eb171aaebf434efdc2008e4873a48 Mon Sep 17 00:00:00 2001 From: Vladislav Yarmak Date: Wed, 6 Feb 2019 15:22:47 +0200 Subject: [PATCH 1/4] 1337-diff: 3x speed improvement --- win/tools/1337-diff/1337-diff.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/win/tools/1337-diff/1337-diff.py b/win/tools/1337-diff/1337-diff.py index c9ab947..a1c196d 100755 --- a/win/tools/1337-diff/1337-diff.py +++ b/win/tools/1337-diff/1337-diff.py @@ -64,24 +64,29 @@ def feed_chunks(f, chunk_size=4096): yield buf -def zip_files_bytes(*files): +def zip_files_bytes(left, right): """ Iterate over two files, returning pair of bytes. Throw LengthMismatch if file sizes is uneven. """ class EndMarker(object): pass end_marker = EndMarker() - iterators = (itertools.chain.from_iterable(feed_chunks(f)) for f in files) - for tup in itertools.zip_longest(*iterators, fillvalue=end_marker): - if any(v is end_marker for v in tup): + left_iter = itertools.chain.from_iterable(feed_chunks(left)) + right_iter = itertools.chain.from_iterable(feed_chunks(right)) + for a, b in itertools.zip_longest(left_iter, + right_iter, + fillvalue=end_marker): + if a is end_marker or b is end_marker: raise LengthMismatchException("Length of input files inequal.") - yield tup + yield a, b def diff(left, right): - for offset, (a, b) in enumerate(zip_files_bytes(left, right)): + offset = 0 + for a, b in zip_files_bytes(left, right): if a != b: yield offset, a, b + offset += 1 def compose_diff_file(orig, patched, output, header, offset_adjustment=True): From 345ed93b01f4d73cfd2ada5baa61f1c22041eb3c Mon Sep 17 00:00:00 2001 From: Vladislav Yarmak Date: Wed, 6 Feb 2019 15:04:08 +0200 Subject: [PATCH 2/4] implement missing limit function --- win/tools/1337-diff/1337-diff.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/win/tools/1337-diff/1337-diff.py b/win/tools/1337-diff/1337-diff.py index a1c196d..499e216 100755 --- a/win/tools/1337-diff/1337-diff.py +++ b/win/tools/1337-diff/1337-diff.py @@ -22,6 +22,11 @@ class LengthMismatchException(ByteDiffException): pass +class DiffLimitException(ByteDiffException): + """ Throwed when difference limit hit """ + pass + + def check_positive_int(value): value = int(value) if value <= 0: @@ -81,19 +86,23 @@ def zip_files_bytes(left, right): yield a, b -def diff(left, right): +def diff(left, right, limit=None): offset = 0 + diff_count = 0 for a, b in zip_files_bytes(left, right): if a != b: + diff_count += 1 + if limit is not None and diff_count > limit: + raise DiffLimitException() yield offset, a, b offset += 1 -def compose_diff_file(orig, patched, output, header, offset_adjustment=True): +def compose_diff_file(orig, patched, output, header, limit=None, offset_adjustment=True): output.write(HEADER_FORMAT % (header.encode('latin-1'),)) - for offset, a, b in diff(orig, patched): - o = offset + OFFSET_ADJUSTMENT if offset_adjustment else offset - output.write(LINE_FORMAT % (o, a, b)) + adj = OFFSET_ADJUSTMENT if offset_adjustment else 0 + for offset, a, b in diff(orig, patched, limit): + output.write(LINE_FORMAT % (offset + adj, a, b)) def main(): @@ -114,10 +123,13 @@ def main(): open(args.patched_file, 'rb') as patched,\ open(output_filename, 'wb') as output: try: - compose_diff_file(orig, patched, output, header_filename) + compose_diff_file(orig, patched, output, header_filename, args.limit) except LengthMismatchException: print("Input files have inequal length. Aborting...", file=sys.stderr) + except DiffLimitException: + print("Differences limit hit. Aborting...", + file=sys.stderr) if __name__ == '__main__': From 0e7740550c4e0fc6e21788e3c67fbd87593e22b0 Mon Sep 17 00:00:00 2001 From: Vladislav Yarmak Date: Wed, 6 Feb 2019 15:59:52 +0200 Subject: [PATCH 3/4] force kwargs for compose_diff_file() options --- win/tools/1337-diff/1337-diff.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/win/tools/1337-diff/1337-diff.py b/win/tools/1337-diff/1337-diff.py index 499e216..e8ceb97 100755 --- a/win/tools/1337-diff/1337-diff.py +++ b/win/tools/1337-diff/1337-diff.py @@ -98,7 +98,7 @@ def diff(left, right, limit=None): offset += 1 -def compose_diff_file(orig, patched, output, header, limit=None, offset_adjustment=True): +def compose_diff_file(orig, patched, output, header, *, limit=None, offset_adjustment=True): output.write(HEADER_FORMAT % (header.encode('latin-1'),)) adj = OFFSET_ADJUSTMENT if offset_adjustment else 0 for offset, a, b in diff(orig, patched, limit): @@ -123,7 +123,8 @@ def main(): open(args.patched_file, 'rb') as patched,\ open(output_filename, 'wb') as output: try: - compose_diff_file(orig, patched, output, header_filename, args.limit) + compose_diff_file(orig, patched, output, header_filename, + limit=args.limit) except LengthMismatchException: print("Input files have inequal length. Aborting...", file=sys.stderr) From 6e5d608b20277665392523c11d3eb16a3846263b Mon Sep 17 00:00:00 2001 From: Vladislav Yarmak Date: Wed, 6 Feb 2019 16:04:57 +0200 Subject: [PATCH 4/4] 1337-diff: fix some pep8 complaints --- win/tools/1337-diff/1337-diff.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/win/tools/1337-diff/1337-diff.py b/win/tools/1337-diff/1337-diff.py index e8ceb97..fccef5b 100755 --- a/win/tools/1337-diff/1337-diff.py +++ b/win/tools/1337-diff/1337-diff.py @@ -76,8 +76,10 @@ def zip_files_bytes(left, right): pass end_marker = EndMarker() - left_iter = itertools.chain.from_iterable(feed_chunks(left)) - right_iter = itertools.chain.from_iterable(feed_chunks(right)) + left_iter = itertools.chain.from_iterable( + feed_chunks(left)) + right_iter = itertools.chain.from_iterable( + feed_chunks(right)) for a, b in itertools.zip_longest(left_iter, right_iter, fillvalue=end_marker): @@ -98,7 +100,8 @@ def diff(left, right, limit=None): offset += 1 -def compose_diff_file(orig, patched, output, header, *, limit=None, offset_adjustment=True): +def compose_diff_file(orig, patched, output, header, *, + limit=None, offset_adjustment=True): output.write(HEADER_FORMAT % (header.encode('latin-1'),)) adj = OFFSET_ADJUSTMENT if offset_adjustment else 0 for offset, a, b in diff(orig, patched, limit):