#!/usr/bin/env python3 # pylint: disable=C0103,C0114,C0115,C0116,C0321 ###################################################################### # DESCRIPTION: Fuzzer dictionary generator # # Copyright 2019-2019 by Eric Rippey. This program is free software; you # can redistribute it and/or modify it under the terms of either the GNU Lesser # General Public License Version 3 or the Perl Artistic License Version 2.0. # SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 ###################################################################### # Attempts to pull a list of keywords out of the Flex input # These are then put in a dictionary of "interesting" sequences # This will be used to help the fuzzer pick interesting inputs more quickly. from subprocess import getstatusoutput from os import system def take_while(f, a): # any(a) => (a->bool)->[a]->[a] # Does the same think as Haskell's takewhile. out = [] for elem in a: if f(elem): out.append(elem) else: return out return out def skip_while(f, a): # any(a) => (a->bool)->[a]->[a] # Basically, the opposite thing from skipwhile while len(a) and f(a[0]): a = a[1:] return a def print_lines(a): # printable(a) => [a]->void for elem in a: print(elem) def write_file(filename, contents): # str->str->void with open(filename, "w", encoding="utf8") as fh: fh.write(contents) def parse_line(s): # str->maybe str if len(s) == 0: return None part = skip_while(lambda x: x != '"', s) if len(part) == 0 or part[0] != '"': return None literal_part = take_while(lambda x: x != '"', part[1:]) return ''.join(filter(lambda x: x != '\\', literal_part)) def main(): status, output = getstatusoutput('flex -T ../../src/verilog.l') assert status == 0 lines = output.splitlines() lines = take_while(lambda x: 'beginning dump of nfa' not in x, lines) tokens = set(filter(lambda x: x, map(parse_line, lines))) dirname = 'dictionary' r = system('mkdir -p ' + dirname) assert r == 0 for i, token in enumerate(tokens): write_file(dirname + '/' + str(i), token) if __name__ == '__main__': main()