verilator/nodist/fuzzer/generate_dictionary

#!/usr/bin/env python3
# pylint: disable=C0103,C0114,C0115,C0116,C0321
######################################################################
# DESCRIPTION: Fuzzer dictionary generator
#
# Copyright 2019-2019 by Eric Rippey. This program is free software; you
# can redistribute it and/or modify it under the terms of either the GNU Lesser
# General Public License Version 3 or the Perl Artistic License Version 2.0.
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
######################################################################

# Attempts to pull a list of keywords out of the Flex input
# These are then put in a dictionary of "interesting" sequences
# This will be used to help the fuzzer pick interesting inputs more quickly.

from subprocess import getstatusoutput
from os import system


def take_while(f, a):
    # any(a) => (a->bool)->[a]->[a]
    # Does the same think as Haskell's takewhile.
    out = []
    for elem in a:
        if f(elem):
            out.append(elem)
        else:
            return out
    return out


def skip_while(f, a):
    # any(a) => (a->bool)->[a]->[a]
    # Basically, the opposite thing from skipwhile
    while len(a) and f(a[0]):
        a = a[1:]
    return a


def print_lines(a):
    # printable(a) => [a]->void
    for elem in a:
        print(elem)


def write_file(filename, contents):
    # str->str->void
    with open(filename, "w", encoding="utf8") as fh:
        fh.write(contents)


def parse_line(s):
    # str->maybe str
    if len(s) == 0: return None
    part = skip_while(lambda x: x != '"', s)
    if len(part) == 0 or part[0] != '"': return None
    literal_part = take_while(lambda x: x != '"', part[1:])
    return ''.join(filter(lambda x: x != '\\', literal_part))


def main():
    status, output = getstatusoutput('flex -T ../../src/verilog.l')
    assert status == 0

    lines = output.splitlines()
    lines = take_while(lambda x: 'beginning dump of nfa' not in x, lines)
    tokens = set(filter(lambda x: x, map(parse_line, lines)))

    dirname = 'dictionary'
    r = system('mkdir -p ' + dirname)
    assert r == 0
    for i, token in enumerate(tokens):
        write_file(dirname + '/' + str(i), token)


if __name__ == '__main__':
    main()