#!/usr/bin/env python3 # pylint: disable=C0103,C0114,C0115,C0116,R0912,R0914,R0915,R1702,W0125 ###################################################################### import argparse import os import re import subprocess import sys # from pprint import pprint, pformat ###################################################################### def process(): unlink_outputs() bison_version_check() supports_report = Bison_Version >= 2.3 clean_input(Args.input, tmp_prefix() + ".y") # Run bison command = ( Args.yacc + (" -t" if Args.debug else "") + (" -d" if Args.definitions else "") + (" -k" if Args.token_table else "") + (" -v" if Args.verbose else "") + (" --report=itemset --report=lookahead" if (Args.verbose and supports_report) else "") # -p required for GLR parsers; they write to -p basename, not -o + ((" -p " + Args.name_prefix) if Args.name_prefix else "") + " -b " + tmp_prefix() + " -o " + tmp_prefix() + ".c" + " " + tmp_prefix() + ".y") print(" " + command) status = subprocess.call(command, shell=True) if status != 0: unlink_outputs() sys.exit("bisonpre: %Error: " + Args.yacc + " version " + Bison_Version + " run failed due to errors\n") clean_output(tmp_prefix() + ".output", output_prefix() + ".output", True, False) warning_check(output_prefix() + ".output") clean_output(tmp_prefix() + ".c", output_prefix() + ".c", False, True) clean_output(tmp_prefix() + ".h", output_prefix() + ".h", False, True) unlink_tmp() def tmp_prefix(): return output_prefix() + "_pretmp" def output_prefix(): if Args.output: o = re.sub(r'\.[^.]*$', '', Args.output) return o return Args.file_prefix + ".tab" def unlink_ok(filename): try: os.unlink(filename) except OSError: pass def unlink_tmp(): unlink_ok(tmp_prefix() + ".c") unlink_ok(tmp_prefix() + ".h") unlink_ok(tmp_prefix() + ".output") def unlink_outputs(): unlink_tmp() unlink_ok(output_prefix() + ".c") unlink_ok(output_prefix() + ".h") # We don't remove .output file, as it's useful for debugging errors def bison_version_check(): sp = subprocess.Popen(Args.yacc + " --version", shell=True, stdout=subprocess.PIPE) out = str(sp.stdout.read()) match = re.search(r'([0-9]+\.[0-9]+)', out) if match: v = float(match.group(1)) if v < 1.875: sys.exit("bisonpre: %Error: '" + Args.yacc + "' is version " + v + "; version 1.875 or newer is required\n") global Bison_Version # pylint: disable=global-variable-undefined Bison_Version = v return sys.exit("bisonpre: %Error: '" + Args.yacc + "' is not installed, or not working\n") def clean_output(filename, outname, is_output, is_c): print(" edit " + filename + " " + outname) with open(filename) as fh: lines = fh.readlines() basename = re.sub(r'.*/', '', tmp_prefix() + ".") basename = re.escape(basename) newbase = re.sub(r'.*/', '', Args.input) newbase = re.sub(r'\.y', '.', newbase) if is_output: state_line = {} lineno = 0 for line in lines: lineno += 1 # We add a colon so it's easy to search for the definition match = re.match(r'^state (\d+)\s*', line) if match: state_line[match.group(1)] = lineno out = [] for line in lines: match = re.match(r'^State (\d+) (conflicts)', line) if match: line = line.rstrip() if match.group(1) in state_line: line += " // line " + state_line[match.group(1)] line += "\n" out.append(line) lines = out out = [] if is_c: token_values = {} for line in lines: if re.search(r'enum\s+yytokentype', line) and not re.search(r';', line): match = re.search(r'\b(\S+) = (\d+)', line) if match: token_values[match.group(2)] = match.group(1) out = [] for line in lines: if _enaline(line) and re.search(r'BISONPRE_TOKEN_NAMES', line): out.append(line) for tv in sorted(token_values.keys()): out.append("\tcase %d: return \"%s\";\n" % (tv, token_values[tv])) continue out.append(line) lines = out out = [] with open(outname, "w") as fh: for line in lines: # Fix filename refs line = re.sub(basename, newbase, line) # Fix bison 2.3 and GCC 4.2.1 line = re.sub(r'\(YY_\("', '(YY_((char*)"', line) # Fix bison 2.3 glr-parser warning about yyerrorloc.YYTYPE::yydummy uninit line = re.sub(r'(YYLTYPE yyerrloc;)', r'\1 yyerrloc.yydummy=0;/*bisonpre*/', line) # Fix bison 3.6.1 unexpected nested-comment line = re.sub(r'/\* "/\*.*\*/" \*/', '', line) fh.write(line) def warning_check(filename): with open(filename) as fh: linenum = 0 for line in fh: linenum += 1 if re.search(r'(conflicts|warning:|^useless)', line, flags=re.IGNORECASE): sys.exit("%Error: " + filename + ":" + str(linenum) + ": " + line + "\n") ###################################################################### def clean_input(filename, outname): print(" edit " + filename + " " + outname) global Filename # pylint: disable=global-variable-undefined Filename = filename with open(filename) as fh: lines = fh.readlines() # Find "%tokens:" # Find "rule:" and replace with just "rule:" global Rules # pylint: disable=global-variable-undefined Rules = {} types = {} tokens = {} last_rule = None section = 1 if True: linesin = lines lines = [] lineno = 0 for line in linesin: lineno += 1 # ^/ to prevent comments from matching if re.match(r'^[a-zA-Z0-9_<>]+:[^/]*[a-zA-Z]', line): sys.exit("%Error: " + filename + ":" + str(lineno) + ": Move text on rule line to next line: " + line + "\n") matcha = re.match(r'^([a-zA-Z0-9_]+)<(\S*)>(.*)$', line, flags=re.DOTALL) matchb = re.match(r'^([a-zA-Z0-9_]+):', line) if re.match(r'^%%', line): section += 1 if section == 2: last_rule = None elif matcha: name = matcha.group(1) dtype = matcha.group(2) line = name + matcha.group(3) if name in Rules: sys.exit("%Error: " + filename + ":" + str(lineno) + ": Redeclaring '" + name + "': " + line) if dtype not in types: types[dtype] = {} types[dtype][name] = 1 Rules[name] = { 'name': name, 'type': dtype, 'rules_and_productions': "", 'subrules': {} } if last_rule: sys.exit("%Error: " + filename + ":" + str(lineno) + ": Unterminated previous rule\n") last_rule = name elif matchb: name = matchb.group(1) if name not in ('public', 'private'): if name in Rules: sys.exit("%Error: " + filename + ":" + str(lineno) + ": Redeclaring '" + name + "': " + line) Rules[name] = { 'name': name, 'type': "", 'rules_and_productions': "", 'subrules': {} } if last_rule: sys.exit("%Error: " + filename + ":" + str(lineno) + ": Unterminated previous rule\n") last_rule = name lines.append(line) # Now clean the line and extract some more info cline = re.sub(r'//.*$', '\n', line) if re.match(r'^\s*;', cline): if not last_rule: sys.exit("%Error: " + filename + ":" + str(lineno) + ": Stray semicolon\n") last_rule = None elif last_rule: Rules[last_rule]['rules_and_productions'] += cline match = re.match(r'^%token\s*<(\S+)>\s*(\S+)', cline) if match: dtype = match.group(1) tok = match.group(2) if tok in tokens: sys.exit("%Error: " + filename + ":" + str(lineno) + ": Redeclaring '" + tok + "': " + line) tokens[tok] = dtype for tok in re.split(r'[^a-zA-Z0-9_]+', cline): if last_rule and re.match(r'^[a-zA-Z]', tok): # print("TT "+last_rule+" "+tok+"\n") Rules[last_rule]['subrules'][tok] = 1 # pprint(Rules) # Replace BISONPRE_VERSION(ver,,...) with expanded list if True: linesin = lines lines = [] lineno = 0 for line in linesin: lineno += 1 if _enaline(line) and re.search(r'BISONPRE_VERSION', line): # 1 2 3 4 match = re.search( r'BISONPRE_VERSION\((\S+)\s*,\s*((\S+)\s*,)?\s*([^\),]+)\)\s*$', line) if not match: sys.exit("%Error: " + filename + ":" + str(lineno) + ": Bad form of BISONPRE_VERSION: " + line) ver = match.group(1) ver_max = match.group(3) cmd = match.group(4) if Bison_Version >= float(ver) and ( not ver_max or Bison_Version <= float(ver_max)): line = cmd + "\n" else: line = "//NOP: " + line lines.append(line) # Replace BISONPRE_NOT(type,...) with expanded list if True: linesin = lines lines = [] lineno = 0 for line in linesin: lineno += 1 if _enaline(line) and re.search(r'BISONPRE_NOT', line): match = re.search( r'(.*)BISONPRE_NOT\((\S+)\)\s*(\{[^}]+})\s*(.*)$', line, flags=re.DOTALL) if not match: sys.exit("%Error: " + filename + ":" + str(lineno) + ": Bad form of BISONPRE_NOT: " + line) line = match.group(1) + match.group(4) endtok = match.group(2) action = match.group(3) endtoks = endtok.split(',') for etok in endtoks: if etok not in tokens: sys.exit("%Error: " + filename + ":" + str(lineno) + ": Can't find definition for token: " + etok + "\n") # Push it all onto one line to avoid error messages changing pipe = "" for tok in sorted(tokens.keys()): hit = False for etok in endtoks: if tok == etok: hit = True break if not hit and endtok != tok: line += "\t" + pipe + " " + tok + " " + action pipe = "|" line += "\n" lines.append(line) # Replace BISONPRE_COPY(type,{code}) if True: linesin = lines lines = [] lineno = 0 for line in linesin: lineno += 1 if _enaline(line) and re.search(r'BISONPRE_COPY', line): line = _bisonpre_copy(line, lineno, 0) lines.append(line) # Replace ~[x]~ - must be after BISONPRE_COPY expansion if True: linesin = lines lines = [] lineno = 0 for line in linesin: lineno += 1 line = re.sub(r'~[a-zA-Z0-9_]+~', '', line) lines.append(line) # Find "BISONPRE_TYPES" if True: linesin = lines lines = [] lineno = 0 needmore = 0 for line in linesin: lineno += 1 if _enaline(line) and re.search(r'//BISONPRE_TYPES', line): lines.append(line) for typen in sorted(types.keys()): if not typen: continue line = "%type<" + typen + ">\t" for rule in sorted(types[typen].keys()): line += " " + rule line += "\n" lines.append(line) needmore += 1 elif needmore > 0: # Bison doesn't have a #line directive, so we need somewhere to insert into line = re.sub(r'^\s*//.*$', '', line) if not re.match(r'^\s*$', line): sys.exit( "%Error: " + filename + ":" + str(lineno) + ": Need " + needmore + " more blank lines to keep line numbers are constant\n" ) needmore -= 1 else: lines.append(line) with open(outname, "w") as fh: for line in lines: fh.write(line) def _bisonpre_copy(text, lineno, depth): while re.search(r'BISONPRE_COPY', text): match = re.match( # 1 2 3 4 5 r'(.*)BISONPRE_COPY(_ONCE)?\((\S+)\s*,\s*\{([^}]*)}\s*\)(.*)', text, flags=re.DOTALL) if not match: sys.exit("%Error: " + Filename + ":" + str(lineno) + ": Bad form of BISONPRE_NOT: " + text) text = match.group(1) + '{HERE}' + match.group(5) once = match.group(2) rule = match.group(3) code = match.group(4) if rule not in Rules: sys.exit("%Error: " + Filename + ":" + str(lineno) + ": Can't find definition for rule: " + rule) if depth > 0 and once: # _ONCE means don't inherit text = re.sub(r'\|[ \t]+{HERE}', '', text) # Don't OR in nothing text = re.sub(r'{HERE}', '', text) else: # Push it all onto one line to avoid error messages changing insert = Rules[rule]['rules_and_productions'] insert = re.sub(r'^\S+:', '', insert) # Strip rule name # Recurse so BISONPRE under B for op in code.split(';'): if re.match(r'^\s*$', op): continue match = re.match(r'^\s*s/(.*?)/(.*?)/g\s*$', op) if not match: sys.exit("%Error: " + Filename + ":" + str(lineno) + ": Didn't understand replacement: " + op) left = match.group(1) right = match.group(2) insert = re.sub(left, right, insert) insert = re.sub(r'[ \t\n]+\n', "\n", insert) insert = re.sub(r'\n', " ", insert) # Optional - preserve line numbering text = re.sub(r'{HERE}', insert, text) depth += 1 return text def _enaline(line): return not re.search(r'//UN', line) ###################################################################### # main parser = argparse.ArgumentParser( allow_abbrev=False, formatter_class=argparse.RawDescriptionHelpFormatter, description= """Bisonpre is a wrapper for the Bison YACC replacement. Input to Bison is preprocessed with substitution as described below under EXTENSIONS. Output from Bison is checked for additional errors, and corrected to work around various compile warnings.""", epilog=""" BISON GRAMMAR EXTENSIONS //BISONPRE_TYPES This is expanded into %type declarations. ~[a-z]+~ Any text matching ~[a-z]+~ is removed. This allows optional text to be used only when the rule containing the ~~ is used in a BISONPRE_COPY. rule_label: This allows the label declaring a rule to also specify the type of the rule. The type will be inserted where /*BISONPRE_TYPES*/ is encountered. BISONPRE_COPY(rule, {code}) Copy the rules and productions from the specified rule, filter through the Python code provided in the {} and insert here into the output file. BISONPRE_COPY_ONCE(rule, {code}) As with BISONPRE_COPY, but if called from underneath another BISONPRE_COPY rule, ignore it. BISONPRE_NOT(token[, token...]) Create a rule that matches every token except for those specified. BISONPRE_VERSION(ver, cmd) If the bison version is >= the specified version, include the given command. Copyright 2002-2021 by Wilson Snyder. This program is free software; you can redistribute it and/or modify it under the terms of either the GNU Lesser General Public License Version 3 or the Perl Artistic License Version 2.0. SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0""") # Local options parser.add_argument('--yacc', action='store', default='bison', help='name of the bison executable, defaults to "bison"') # Arguments passed through to bison parser.add_argument('-b', '--file-prefix', action='store', help='Passed to bison.') parser.add_argument('-d', '--definitions', action='store_true', help='Passed to bison.') parser.add_argument('-k', '--token-table', action='store_true', help='Passed to bison.') parser.add_argument('-o', '--output', action='store', required=True, help='Passed to bison. Sets output file name') parser.add_argument('-p', '--name-prefix', action='store', help='Passed to bison.') parser.add_argument('-t', '--debug', action='store_true', help='Passed to bison.') parser.add_argument('-v', '--verbose', action='store_true', help='Passed to bison.') parser.add_argument('input', help='Passed to bison. Input grammar file.') Args = parser.parse_args() process() ###################################################################### # Local Variables: # compile-command: "./bisonpre " # End: