verilator/src/bisonpre
2021-04-06 18:07:28 -04:00

558 lines
19 KiB
Python
Executable File

#!/usr/bin/env python3
# pylint: disable=C0103,C0114,C0115,C0116,R0912,R0914,R0915,R1702,W0125
######################################################################
import argparse
import os
import re
import subprocess
import sys
# from pprint import pprint, pformat
######################################################################
def process():
unlink_outputs()
bison_version_check()
supports_report = Bison_Version >= 2.3
clean_input(Args.input, tmp_prefix() + ".y")
# Run bison
command = (
Args.yacc + (" -t" if Args.debug else "") +
(" -d" if Args.definitions else "") +
(" -k" if Args.token_table else "") + (" -v" if Args.verbose else "") +
(" --report=itemset --report=lookahead" if
(Args.verbose and supports_report) else "")
# -p required for GLR parsers; they write to -p basename, not -o
+ ((" -p " + Args.name_prefix) if Args.name_prefix else "") + " -b " +
tmp_prefix() + " -o " + tmp_prefix() + ".c" + " " + tmp_prefix() +
".y")
print(" " + command)
status = subprocess.call(command, shell=True)
if status != 0:
unlink_outputs()
sys.exit("bisonpre: %Error: " + Args.yacc + " version " +
Bison_Version + " run failed due to errors\n")
clean_output(tmp_prefix() + ".output",
output_prefix() + ".output", True, False)
warning_check(output_prefix() + ".output")
clean_output(tmp_prefix() + ".c", output_prefix() + ".c", False, True)
clean_output(tmp_prefix() + ".h", output_prefix() + ".h", False, True)
unlink_tmp()
def tmp_prefix():
return output_prefix() + "_pretmp"
def output_prefix():
if Args.output:
o = re.sub(r'\.[^.]*$', '', Args.output)
return o
return Args.file_prefix + ".tab"
def unlink_ok(filename):
try:
os.unlink(filename)
except OSError:
pass
def unlink_tmp():
unlink_ok(tmp_prefix() + ".c")
unlink_ok(tmp_prefix() + ".h")
unlink_ok(tmp_prefix() + ".output")
def unlink_outputs():
unlink_tmp()
unlink_ok(output_prefix() + ".c")
unlink_ok(output_prefix() + ".h")
# We don't remove .output file, as it's useful for debugging errors
def bison_version_check():
sp = subprocess.Popen(Args.yacc + " --version",
shell=True,
stdout=subprocess.PIPE)
out = str(sp.stdout.read())
match = re.search(r'([0-9]+\.[0-9]+)', out)
if match:
v = float(match.group(1))
if v < 1.875:
sys.exit("bisonpre: %Error: '" + Args.yacc + "' is version " + v +
"; version 1.875 or newer is required\n")
global Bison_Version # pylint: disable=global-variable-undefined
Bison_Version = v
return
sys.exit("bisonpre: %Error: '" + Args.yacc +
"' is not installed, or not working\n")
def clean_output(filename, outname, is_output, is_c):
print(" edit " + filename + " " + outname)
with open(filename) as fh:
lines = fh.readlines()
basename = re.sub(r'.*/', '', tmp_prefix() + ".")
basename = re.escape(basename)
newbase = re.sub(r'.*/', '', Args.input)
newbase = re.sub(r'\.y', '.', newbase)
if is_output:
state_line = {}
lineno = 0
for line in lines:
lineno += 1
# We add a colon so it's easy to search for the definition
match = re.match(r'^state (\d+)\s*', line)
if match:
state_line[match.group(1)] = lineno
out = []
for line in lines:
match = re.match(r'^State (\d+) (conflicts)', line)
if match:
line = line.rstrip()
if match.group(1) in state_line:
line += " // line " + state_line[match.group(1)]
line += "\n"
out.append(line)
lines = out
out = []
if is_c:
token_values = {}
for line in lines:
if re.search(r'enum\s+yytokentype',
line) and not re.search(r';', line):
match = re.search(r'\b(\S+) = (\d+)', line)
if match:
token_values[match.group(2)] = match.group(1)
out = []
for line in lines:
if _enaline(line) and re.search(r'BISONPRE_TOKEN_NAMES', line):
out.append(line)
for tv in sorted(token_values.keys()):
out.append("\tcase %d: return \"%s\";\n" %
(tv, token_values[tv]))
continue
out.append(line)
lines = out
out = []
with open(outname, "w") as fh:
for line in lines:
# Fix filename refs
line = re.sub(basename, newbase, line)
# Fix bison 2.3 and GCC 4.2.1
line = re.sub(r'\(YY_\("', '(YY_((char*)"', line)
# Fix bison 2.3 glr-parser warning about yyerrorloc.YYTYPE::yydummy uninit
line = re.sub(r'(YYLTYPE yyerrloc;)',
r'\1 yyerrloc.yydummy=0;/*bisonpre*/', line)
# Fix bison 3.6.1 unexpected nested-comment
line = re.sub(r'/\* "/\*.*\*/" \*/', '', line)
fh.write(line)
def warning_check(filename):
with open(filename) as fh:
linenum = 0
for line in fh:
linenum += 1
if re.search(r'(conflicts|warning:|^useless)',
line,
flags=re.IGNORECASE):
sys.exit("%Error: " + filename + ":" + str(linenum) + ": " +
line + "\n")
######################################################################
def clean_input(filename, outname):
print(" edit " + filename + " " + outname)
global Filename # pylint: disable=global-variable-undefined
Filename = filename
with open(filename) as fh:
lines = fh.readlines()
# Find "%tokens<type>:"
# Find "rule<type>:" and replace with just "rule:"
global Rules # pylint: disable=global-variable-undefined
Rules = {}
types = {}
tokens = {}
last_rule = None
section = 1
if True:
linesin = lines
lines = []
lineno = 0
for line in linesin:
lineno += 1
# ^/ to prevent comments from matching
if re.match(r'^[a-zA-Z0-9_<>]+:[^/]*[a-zA-Z]', line):
sys.exit("%Error: " + filename + ":" + str(lineno) +
": Move text on rule line to next line: " + line +
"\n")
matcha = re.match(r'^([a-zA-Z0-9_]+)<(\S*)>(.*)$',
line,
flags=re.DOTALL)
matchb = re.match(r'^([a-zA-Z0-9_]+):', line)
if re.match(r'^%%', line):
section += 1
if section == 2:
last_rule = None
elif matcha:
name = matcha.group(1)
dtype = matcha.group(2)
line = name + matcha.group(3)
if name in Rules:
sys.exit("%Error: " + filename + ":" + str(lineno) +
": Redeclaring '" + name + "': " + line)
if dtype not in types:
types[dtype] = {}
types[dtype][name] = 1
Rules[name] = {
'name': name,
'type': dtype,
'rules_and_productions': "",
'subrules': {}
}
if last_rule:
sys.exit("%Error: " + filename + ":" + str(lineno) +
": Unterminated previous rule\n")
last_rule = name
elif matchb:
name = matchb.group(1)
if name not in ('public', 'private'):
if name in Rules:
sys.exit("%Error: " + filename + ":" + str(lineno) +
": Redeclaring '" + name + "': " + line)
Rules[name] = {
'name': name,
'type': "",
'rules_and_productions': "",
'subrules': {}
}
if last_rule:
sys.exit("%Error: " + filename + ":" + str(lineno) +
": Unterminated previous rule\n")
last_rule = name
lines.append(line)
# Now clean the line and extract some more info
cline = re.sub(r'//.*$', '\n', line)
if re.match(r'^\s*;', cline):
if not last_rule:
sys.exit("%Error: " + filename + ":" + str(lineno) +
": Stray semicolon\n")
last_rule = None
elif last_rule:
Rules[last_rule]['rules_and_productions'] += cline
match = re.match(r'^%token\s*<(\S+)>\s*(\S+)', cline)
if match:
dtype = match.group(1)
tok = match.group(2)
if tok in tokens:
sys.exit("%Error: " + filename + ":" + str(lineno) +
": Redeclaring '" + tok + "': " + line)
tokens[tok] = dtype
for tok in re.split(r'[^a-zA-Z0-9_]+', cline):
if last_rule and re.match(r'^[a-zA-Z]', tok):
# print("TT "+last_rule+" "+tok+"\n")
Rules[last_rule]['subrules'][tok] = 1
# pprint(Rules)
# Replace BISONPRE_VERSION(ver,,...) with expanded list
if True:
linesin = lines
lines = []
lineno = 0
for line in linesin:
lineno += 1
if _enaline(line) and re.search(r'BISONPRE_VERSION', line):
# 1 2 3 4
match = re.search(
r'BISONPRE_VERSION\((\S+)\s*,\s*((\S+)\s*,)?\s*([^\),]+)\)\s*$',
line)
if not match:
sys.exit("%Error: " + filename + ":" + str(lineno) +
": Bad form of BISONPRE_VERSION: " + line)
ver = match.group(1)
ver_max = match.group(3)
cmd = match.group(4)
if Bison_Version >= float(ver) and (
not ver_max or Bison_Version <= float(ver_max)):
line = cmd + "\n"
else:
line = "//NOP: " + line
lines.append(line)
# Replace BISONPRE_NOT(type,...) with expanded list
if True:
linesin = lines
lines = []
lineno = 0
for line in linesin:
lineno += 1
if _enaline(line) and re.search(r'BISONPRE_NOT', line):
match = re.search(
r'(.*)BISONPRE_NOT\((\S+)\)\s*(\{[^}]+})\s*(.*)$',
line,
flags=re.DOTALL)
if not match:
sys.exit("%Error: " + filename + ":" + str(lineno) +
": Bad form of BISONPRE_NOT: " + line)
line = match.group(1) + match.group(4)
endtok = match.group(2)
action = match.group(3)
endtoks = endtok.split(',')
for etok in endtoks:
if etok not in tokens:
sys.exit("%Error: " + filename + ":" + str(lineno) +
": Can't find definition for token: " + etok +
"\n")
# Push it all onto one line to avoid error messages changing
pipe = ""
for tok in sorted(tokens.keys()):
hit = False
for etok in endtoks:
if tok == etok:
hit = True
break
if not hit and endtok != tok:
line += "\t" + pipe + " " + tok + " " + action
pipe = "|"
line += "\n"
lines.append(line)
# Replace BISONPRE_COPY(type,{code})
if True:
linesin = lines
lines = []
lineno = 0
for line in linesin:
lineno += 1
if _enaline(line) and re.search(r'BISONPRE_COPY', line):
line = _bisonpre_copy(line, lineno, 0)
lines.append(line)
# Replace ~[x]~ - must be after BISONPRE_COPY expansion
if True:
linesin = lines
lines = []
lineno = 0
for line in linesin:
lineno += 1
line = re.sub(r'~[a-zA-Z0-9_]+~', '', line)
lines.append(line)
# Find "BISONPRE_TYPES"
if True:
linesin = lines
lines = []
lineno = 0
needmore = 0
for line in linesin:
lineno += 1
if _enaline(line) and re.search(r'//BISONPRE_TYPES', line):
lines.append(line)
for typen in sorted(types.keys()):
if not typen:
continue
line = "%type<" + typen + ">\t"
for rule in sorted(types[typen].keys()):
line += " " + rule
line += "\n"
lines.append(line)
needmore += 1
elif needmore > 0:
# Bison doesn't have a #line directive, so we need somewhere to insert into
line = re.sub(r'^\s*//.*$', '', line)
if not re.match(r'^\s*$', line):
sys.exit(
"%Error: " + filename + ":" + str(lineno) + ": Need " +
needmore +
" more blank lines to keep line numbers are constant\n"
)
needmore -= 1
else:
lines.append(line)
with open(outname, "w") as fh:
for line in lines:
fh.write(line)
def _bisonpre_copy(text, lineno, depth):
while re.search(r'BISONPRE_COPY', text):
match = re.match(
# 1 2 3 4 5
r'(.*)BISONPRE_COPY(_ONCE)?\((\S+)\s*,\s*\{([^}]*)}\s*\)(.*)',
text,
flags=re.DOTALL)
if not match:
sys.exit("%Error: " + Filename + ":" + str(lineno) +
": Bad form of BISONPRE_NOT: " + text)
text = match.group(1) + '{HERE}' + match.group(5)
once = match.group(2)
rule = match.group(3)
code = match.group(4)
if rule not in Rules:
sys.exit("%Error: " + Filename + ":" + str(lineno) +
": Can't find definition for rule: " + rule)
if depth > 0 and once:
# _ONCE means don't inherit
text = re.sub(r'\|[ \t]+{HERE}', '', text) # Don't OR in nothing
text = re.sub(r'{HERE}', '', text)
else:
# Push it all onto one line to avoid error messages changing
insert = Rules[rule]['rules_and_productions']
insert = re.sub(r'^\S+:', '', insert) # Strip rule name
# Recurse so BISONPRE under B
for op in code.split(';'):
if re.match(r'^\s*$', op):
continue
match = re.match(r'^\s*s/(.*?)/(.*?)/g\s*$', op)
if not match:
sys.exit("%Error: " + Filename + ":" + str(lineno) +
": Didn't understand replacement: " + op)
left = match.group(1)
right = match.group(2)
insert = re.sub(left, right, insert)
insert = re.sub(r'[ \t\n]+\n', "\n", insert)
insert = re.sub(r'\n', " ",
insert) # Optional - preserve line numbering
text = re.sub(r'{HERE}', insert, text)
depth += 1
return text
def _enaline(line):
return not re.search(r'//UN', line)
######################################################################
# main
parser = argparse.ArgumentParser(
allow_abbrev=False,
formatter_class=argparse.RawDescriptionHelpFormatter,
description=
"""Bisonpre is a wrapper for the Bison YACC replacement. Input to Bison is
preprocessed with substitution as described below under EXTENSIONS. Output
from Bison is checked for additional errors, and corrected to work around
various compile warnings.""",
epilog="""
BISON GRAMMAR EXTENSIONS
//BISONPRE_TYPES
This is expanded into %type declarations.
~[a-z]+~
Any text matching ~[a-z]+~ is removed. This allows optional text to be
used only when the rule containing the ~~ is used in a BISONPRE_COPY.
rule_label<type>:
This allows the label declaring a rule to also specify the type of the
rule. The type will be inserted where /*BISONPRE_TYPES*/ is
encountered.
BISONPRE_COPY(rule, {code})
Copy the rules and productions from the specified rule, filter through
the Python code provided in the {} and insert here into the output
file.
BISONPRE_COPY_ONCE(rule, {code})
As with BISONPRE_COPY, but if called from underneath another
BISONPRE_COPY rule, ignore it.
BISONPRE_NOT(token[, token...])
Create a rule that matches every token except for those specified.
BISONPRE_VERSION(ver, cmd)
If the bison version is >= the specified version, include the given
command.
Copyright 2002-2021 by Wilson Snyder. This program is free software; you
can redistribute it and/or modify it under the terms of either the GNU
Lesser General Public License Version 3 or the Perl Artistic License
Version 2.0.
SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0""")
# Local options
parser.add_argument('--yacc',
action='store',
default='bison',
help='name of the bison executable, defaults to "bison"')
# Arguments passed through to bison
parser.add_argument('-b',
'--file-prefix',
action='store',
help='Passed to bison.')
parser.add_argument('-d',
'--definitions',
action='store_true',
help='Passed to bison.')
parser.add_argument('-k',
'--token-table',
action='store_true',
help='Passed to bison.')
parser.add_argument('-o',
'--output',
action='store',
required=True,
help='Passed to bison. Sets output file name')
parser.add_argument('-p',
'--name-prefix',
action='store',
help='Passed to bison.')
parser.add_argument('-t',
'--debug',
action='store_true',
help='Passed to bison.')
parser.add_argument('-v',
'--verbose',
action='store_true',
help='Passed to bison.')
parser.add_argument('input', help='Passed to bison. Input grammar file.')
Args = parser.parse_args()
process()
######################################################################
# Local Variables:
# compile-command: "./bisonpre "
# End: