At the moment I have tens of thousands of records of the following form:

0x4f0DAA112142FFC4BA1B9f3B76bcd238A094D65B_000000 82557 0x4f0DAA112142FFC4BA1B9f3B76bcd238A094D65B_000001 128805 0x4f0DAA112142FFC4BA1B9f3B76bcd238A094D65B_000002 94990 0x4f0DAA112142FFC4BA1B9f3B76bcd238A094D65B_000003 121020 0x4f0DAA112142FFC4BA1B9f3B76bcd238A094D65B_000004 58111390 0x4f0DAA112142FFC4BA1B9f3B76bcd238A094D65B_000005 167079 0x4f0DAA112142FFC4BA1B9f3B76bcd238A094D65B_000006 130795 0x4f0DAA112142FFC4BA1B9f3B76bcd238A094D65B_000007 236926 0x4f0DAA112142FFC4BA1B9f3B76bcd238A094D65B_000008 24754217 0x4f0DAA112142FFC4BA1B9f3B76bcd238A094D65B_000009 75407 0x4f0DAA112142FFC4BA1B9f3B76bcd238A094D65B_000010 136461 0x4f0DAA112142FFC4BA1B9f3B76bcd238A094D65B_000011 136748 0x4f0DAA112142FFC4BA1B9f3B76bcd238A094D65B_000012 146258 0x4f0DAA112142FFC4BA1B9f3B76bcd238A094D65B_000013 381091 0x4f0DAA112142FFC4BA1B9f3B76bcd238A094D65B_000014 129815

In simple spreadsheet programs it's trivial to visualize this data for a few records as done here:

I've been trying to adapt this code to visualize it, but thus far- unsuccessfully:

# Call like this: # # python opcode-farmer.py 'tst21' '6005600401' # import re import numpy as np import matplotlib.pyplot as plt import csv import sys import pprint import itertools import subprocess import collections def my_test_func(filename, data): with open(filename, 'w') as fd: fd.write(data) fd.write('

') return subprocess.check_output(['evm', 'disasm', filename]) if '__main__' == __name__: file_name = sys.argv[1] byte_code = sys.argv[2] status = my_test_func(file_name, byte_code) opcodes_list = list() for element in status.split('

'): result = re.search(r"\b[A-Z].+", element) if result: # eliminate individual 0x05 specification simple_opcode = re.sub(r'\s(.*)', '', result.group(0)) opcodes_list.append(simple_opcode) # Count up the values cnt = collections.Counter() for word in opcodes_list: cnt[word] += 1 print(cnt) # THRESHOLD threshold = 30 cnt = collections.Counter(record for record in cnt.elements() if cnt[record] >= threshold) # VISUALIZATION # Transpose the data to get the x and y values labels, values = zip(*cnt.items()) # generates this representation: [0 1 2 3 4 5 6 7], # from the number of the length indexes = np.arange(len(labels)) width = 1 plt.xlabel("most common opcodes in tx") plt.ylabel("number of occurances") plt.bar(indexes, values, width) plt.xticks(indexes + width * 0.5, labels) plt.show()