verilator/nodist/fuzzer/generate_dictionary
2022-12-11 21:58:02 -05:00

78 lines
2.2 KiB
Python
Executable File

#!/usr/bin/env python3
# pylint: disable=C0103,C0114,C0115,C0116,C0321
######################################################################
# DESCRIPTION: Fuzzer dictionary generator
#
# Copyright 2019-2019 by Eric Rippey. This program is free software; you
# can redistribute it and/or modify it under the terms of either the GNU Lesser
# General Public License Version 3 or the Perl Artistic License Version 2.0.
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
######################################################################
# Attempts to pull a list of keywords out of the Flex input
# These are then put in a dictionary of "interesting" sequences
# This will be used to help the fuzzer pick interesting inputs more quickly.
from subprocess import getstatusoutput
from os import system
def take_while(f, a):
# any(a) => (a->bool)->[a]->[a]
# Does the same think as Haskell's takewhile.
out = []
for elem in a:
if f(elem):
out.append(elem)
else:
return out
return out
def skip_while(f, a):
# any(a) => (a->bool)->[a]->[a]
# Basically, the opposite thing from skipwhile
while len(a) and f(a[0]):
a = a[1:]
return a
def print_lines(a):
# printable(a) => [a]->void
for elem in a:
print(elem)
def write_file(filename, contents):
# str->str->void
with open(filename, "w", encoding="utf8") as fh:
fh.write(contents)
def parse_line(s):
# str->maybe str
if len(s) == 0: return None
part = skip_while(lambda x: x != '"', s)
if len(part) == 0 or part[0] != '"': return None
literal_part = take_while(lambda x: x != '"', part[1:])
return ''.join(filter(lambda x: x != '\\', literal_part))
def main():
status, output = getstatusoutput('flex -T ../../src/verilog.l')
assert status == 0
lines = output.splitlines()
lines = take_while(lambda x: 'beginning dump of nfa' not in x, lines)
tokens = set(filter(lambda x: x, map(parse_line, lines)))
dirname = 'dictionary'
r = system('mkdir -p ' + dirname)
assert r == 0
for i, token in enumerate(tokens):
write_file(dirname + '/' + str(i), token)
if __name__ == '__main__':
main()