Commit 9abb3549 authored by Chuck Lever's avatar Chuck Lever
Browse files

xdrgen: Improve parse error reporting



The current verbose Lark exception output makes it difficult to
quickly identify and fix syntax errors in XDR specifications. Users
must wade through hundreds of lines of cascading errors to find the
root cause.

Replace this with concise, compiler-style error messages showing
file, line, column, the unexpected token, and the source line with
a caret pointing to the error location.

Before:
  Unexpected token Token('__ANON_1', '+1') at line 14, column 35.
  Expected one of:
          * SEMICOLON
  Previous tokens: [Token('__ANON_0', 'LM_MAXSTRLEN')]
  [hundreds more cascading errors...]

After:
  file.x:14:35: parse error
  Unexpected number '+1'

      const LM_MAXNAMELEN = LM_MAXSTRLEN+1;
                                        ^

The error handler now raises XdrParseError on the first error,
preventing cascading messages that obscure the root cause.

Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
parent eb1f3b55
Loading
Loading
Loading
Loading
+8 −8
Original line number Diff line number Diff line
@@ -8,7 +8,6 @@ import logging

from argparse import Namespace
from lark import logger
from lark.exceptions import UnexpectedInput

from generators.constant import XdrConstantGenerator
from generators.enum import XdrEnumGenerator
@@ -24,6 +23,7 @@ from xdr_ast import transform_parse_tree, _RpcProgram, Specification
from xdr_ast import _XdrConstant, _XdrEnum, _XdrPointer
from xdr_ast import _XdrTypedef, _XdrStruct, _XdrUnion
from xdr_parse import xdr_parser, set_xdr_annotate
from xdr_parse import make_error_handler, XdrParseError

logger.setLevel(logging.INFO)

@@ -50,19 +50,19 @@ def emit_header_declarations(
        gen.emit_declaration(definition.value)


def handle_parse_error(e: UnexpectedInput) -> bool:
    """Simple parse error reporting, no recovery attempted"""
    print(e)
    return True


def subcmd(args: Namespace) -> int:
    """Generate definitions and declarations"""

    set_xdr_annotate(args.annotate)
    parser = xdr_parser()
    with open(args.filename, encoding="utf-8") as f:
        parse_tree = parser.parse(f.read(), on_error=handle_parse_error)
        source = f.read()
        try:
            parse_tree = parser.parse(
                source, on_error=make_error_handler(source, args.filename)
            )
        except XdrParseError:
            return 1
        ast = transform_parse_tree(parse_tree)

        gen = XdrHeaderTopGenerator(args.language, args.peer)
+8 −8
Original line number Diff line number Diff line
@@ -8,7 +8,6 @@ import logging

from argparse import Namespace
from lark import logger
from lark.exceptions import UnexpectedInput

from generators.constant import XdrConstantGenerator
from generators.enum import XdrEnumGenerator
@@ -24,6 +23,7 @@ from xdr_ast import transform_parse_tree, Specification
from xdr_ast import _RpcProgram, _XdrConstant, _XdrEnum, _XdrPointer
from xdr_ast import _XdrTypedef, _XdrStruct, _XdrUnion
from xdr_parse import xdr_parser, set_xdr_annotate
from xdr_parse import make_error_handler, XdrParseError

logger.setLevel(logging.INFO)

@@ -69,19 +69,19 @@ def emit_header_maxsize(root: Specification, language: str, peer: str) -> None:
        gen.emit_maxsize(definition.value)


def handle_parse_error(e: UnexpectedInput) -> bool:
    """Simple parse error reporting, no recovery attempted"""
    print(e)
    return True


def subcmd(args: Namespace) -> int:
    """Generate definitions"""

    set_xdr_annotate(args.annotate)
    parser = xdr_parser()
    with open(args.filename, encoding="utf-8") as f:
        parse_tree = parser.parse(f.read(), on_error=handle_parse_error)
        source = f.read()
        try:
            parse_tree = parser.parse(
                source, on_error=make_error_handler(source, args.filename)
            )
        except XdrParseError:
            return 1
        ast = transform_parse_tree(parse_tree)

        gen = XdrHeaderTopGenerator(args.language, args.peer)
+8 −9
Original line number Diff line number Diff line
@@ -8,26 +8,25 @@ import logging

from argparse import Namespace
from lark import logger
from lark.exceptions import UnexpectedInput

from xdr_parse import xdr_parser
from xdr_parse import xdr_parser, make_error_handler, XdrParseError
from xdr_ast import transform_parse_tree

logger.setLevel(logging.DEBUG)


def handle_parse_error(e: UnexpectedInput) -> bool:
    """Simple parse error reporting, no recovery attempted"""
    print(e)
    return True


def subcmd(args: Namespace) -> int:
    """Lexical and syntax check of an XDR specification"""

    parser = xdr_parser()
    with open(args.filename, encoding="utf-8") as f:
        parse_tree = parser.parse(f.read(), on_error=handle_parse_error)
        source = f.read()
        try:
            parse_tree = parser.parse(
                source, on_error=make_error_handler(source, args.filename)
            )
        except XdrParseError:
            return 1
        transform_parse_tree(parse_tree)

    return 0
+8 −8
Original line number Diff line number Diff line
@@ -8,7 +8,6 @@ import logging

from argparse import Namespace
from lark import logger
from lark.exceptions import UnexpectedInput

from generators.source_top import XdrSourceTopGenerator
from generators.enum import XdrEnumGenerator
@@ -23,6 +22,7 @@ from xdr_ast import _XdrAst, _XdrEnum, _XdrPointer
from xdr_ast import _XdrStruct, _XdrTypedef, _XdrUnion

from xdr_parse import xdr_parser, set_xdr_annotate
from xdr_parse import make_error_handler, XdrParseError

logger.setLevel(logging.INFO)

@@ -92,19 +92,19 @@ def generate_client_source(filename: str, root: Specification, language: str) ->
    # cel: todo: client needs PROC macros


def handle_parse_error(e: UnexpectedInput) -> bool:
    """Simple parse error reporting, no recovery attempted"""
    print(e)
    return True


def subcmd(args: Namespace) -> int:
    """Generate encoder and decoder functions"""

    set_xdr_annotate(args.annotate)
    parser = xdr_parser()
    with open(args.filename, encoding="utf-8") as f:
        parse_tree = parser.parse(f.read(), on_error=handle_parse_error)
        source = f.read()
        try:
            parse_tree = parser.parse(
                source, on_error=make_error_handler(source, args.filename)
            )
        except XdrParseError:
            return 1
        ast = transform_parse_tree(parse_tree)
        match args.peer:
            case "server":
+86 −0
Original line number Diff line number Diff line
@@ -3,12 +3,40 @@

"""Common parsing code for xdrgen"""

import sys
from typing import Callable

from lark import Lark
from lark.exceptions import UnexpectedInput, UnexpectedToken


# Set to True to emit annotation comments in generated source
annotate = False

# Map internal Lark token names to human-readable names
TOKEN_NAMES = {
    "__ANON_0": "identifier",
    "__ANON_1": "number",
    "SEMICOLON": "';'",
    "LBRACE": "'{'",
    "RBRACE": "'}'",
    "LPAR": "'('",
    "RPAR": "')'",
    "LSQB": "'['",
    "RSQB": "']'",
    "LESSTHAN": "'<'",
    "MORETHAN": "'>'",
    "EQUAL": "'='",
    "COLON": "':'",
    "COMMA": "','",
    "STAR": "'*'",
    "$END": "end of file",
}


class XdrParseError(Exception):
    """Raised when XDR parsing fails"""


def set_xdr_annotate(set_it: bool) -> None:
    """Set 'annotate' if --annotate was specified on the command line"""
@@ -21,6 +49,64 @@ def get_xdr_annotate() -> bool:
    return annotate


def make_error_handler(source: str, filename: str) -> Callable[[UnexpectedInput], bool]:
    """Create an error handler that reports the first parse error and aborts.

    Args:
        source: The XDR source text being parsed
        filename: The name of the file being parsed

    Returns:
        An error handler function for use with Lark's on_error parameter
    """
    lines = source.splitlines()

    def handle_parse_error(e: UnexpectedInput) -> bool:
        """Report a parse error with context and abort parsing"""
        line_num = e.line
        column = e.column
        line_text = lines[line_num - 1] if 0 < line_num <= len(lines) else ""

        # Build the error message
        msg_parts = [f"{filename}:{line_num}:{column}: parse error"]

        # Show what was found vs what was expected
        if isinstance(e, UnexpectedToken):
            token = e.token
            if token.type == "__ANON_0":
                found = f"identifier '{token.value}'"
            elif token.type == "__ANON_1":
                found = f"number '{token.value}'"
            else:
                found = f"'{token.value}'"
            msg_parts.append(f"Unexpected {found}")

            # Provide helpful expected tokens list
            expected = e.expected
            if expected:
                readable = [
                    TOKEN_NAMES.get(exp, exp.lower().replace("_", " "))
                    for exp in sorted(expected)
                ]
                if len(readable) == 1:
                    msg_parts.append(f"Expected {readable[0]}")
                elif len(readable) <= 4:
                    msg_parts.append(f"Expected one of: {', '.join(readable)}")
        else:
            msg_parts.append(str(e).split("\n")[0])

        # Show the offending line with a caret pointing to the error
        msg_parts.append("")
        msg_parts.append(f"    {line_text}")
        prefix = line_text[: column - 1].expandtabs()
        msg_parts.append(f"    {' ' * len(prefix)}^")

        sys.stderr.write("\n".join(msg_parts) + "\n")
        raise XdrParseError()

    return handle_parse_error


def xdr_parser() -> Lark:
    """Return a Lark parser instance configured with the XDR language grammar"""

Loading