Commit 4b132aac authored by Chuck Lever's avatar Chuck Lever
Browse files

tools: Add xdrgen



Add a Python-based tool for translating XDR specifications into XDR
encoder and decoder functions written in the Linux kernel's C coding
style. The generator attempts to match the usual C coding style of
the Linux kernel's SunRPC consumers.

This approach is similar to the netlink code generator in
tools/net/ynl .

The maintainability benefits of machine-generated XDR code include:

- Stronger type checking
- Reduces the number of bugs introduced by human error
- Makes the XDR code easier to audit and analyze
- Enables rapid prototyping of new RPC-based protocols
- Hardens the layering between protocol logic and marshaling
- Makes it easier to add observability on demand
- Unit tests might be built for both the tool and (automatically)
  for the generated code

In addition, converting the XDR layer to use memory-safe languages
such as Rust will be easier if much of the code can be converted
automatically.

Tested-by: default avatarJeff Layton <jlayton@kernel.org>
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
parent 45bb63ed
Loading
Loading
Loading
Loading
+243 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Copyright (c) 2024 Oracle and/or its affiliates.
 *
 * This header defines XDR data type primitives specified in
 * Section 4 of RFC 4506, used by RPC programs implemented
 * in the Linux kernel.
 */

#ifndef _SUNRPC_XDRGEN__BUILTINS_H_
#define _SUNRPC_XDRGEN__BUILTINS_H_

#include <linux/sunrpc/xdr.h>

static inline bool
xdrgen_decode_void(struct xdr_stream *xdr)
{
	return true;
}

static inline bool
xdrgen_encode_void(struct xdr_stream *xdr)
{
	return true;
}

static inline bool
xdrgen_decode_bool(struct xdr_stream *xdr, bool *ptr)
{
	__be32 *p = xdr_inline_decode(xdr, XDR_UNIT);

	if (unlikely(!p))
		return false;
	*ptr = (*p != xdr_zero);
	return true;
}

static inline bool
xdrgen_encode_bool(struct xdr_stream *xdr, bool val)
{
	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT);

	if (unlikely(!p))
		return false;
	*p = val ? xdr_one : xdr_zero;
	return true;
}

static inline bool
xdrgen_decode_int(struct xdr_stream *xdr, s32 *ptr)
{
	__be32 *p = xdr_inline_decode(xdr, XDR_UNIT);

	if (unlikely(!p))
		return false;
	*ptr = be32_to_cpup(p);
	return true;
}

static inline bool
xdrgen_encode_int(struct xdr_stream *xdr, s32 val)
{
	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT);

	if (unlikely(!p))
		return false;
	*p = cpu_to_be32(val);
	return true;
}

static inline bool
xdrgen_decode_unsigned_int(struct xdr_stream *xdr, u32 *ptr)
{
	__be32 *p = xdr_inline_decode(xdr, XDR_UNIT);

	if (unlikely(!p))
		return false;
	*ptr = be32_to_cpup(p);
	return true;
}

static inline bool
xdrgen_encode_unsigned_int(struct xdr_stream *xdr, u32 val)
{
	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT);

	if (unlikely(!p))
		return false;
	*p = cpu_to_be32(val);
	return true;
}

static inline bool
xdrgen_decode_long(struct xdr_stream *xdr, s32 *ptr)
{
	__be32 *p = xdr_inline_decode(xdr, XDR_UNIT);

	if (unlikely(!p))
		return false;
	*ptr = be32_to_cpup(p);
	return true;
}

static inline bool
xdrgen_encode_long(struct xdr_stream *xdr, s32 val)
{
	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT);

	if (unlikely(!p))
		return false;
	*p = cpu_to_be32(val);
	return true;
}

static inline bool
xdrgen_decode_unsigned_long(struct xdr_stream *xdr, u32 *ptr)
{
	__be32 *p = xdr_inline_decode(xdr, XDR_UNIT);

	if (unlikely(!p))
		return false;
	*ptr = be32_to_cpup(p);
	return true;
}

static inline bool
xdrgen_encode_unsigned_long(struct xdr_stream *xdr, u32 val)
{
	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT);

	if (unlikely(!p))
		return false;
	*p = cpu_to_be32(val);
	return true;
}

static inline bool
xdrgen_decode_hyper(struct xdr_stream *xdr, s64 *ptr)
{
	__be32 *p = xdr_inline_decode(xdr, XDR_UNIT * 2);

	if (unlikely(!p))
		return false;
	*ptr = get_unaligned_be64(p);
	return true;
}

static inline bool
xdrgen_encode_hyper(struct xdr_stream *xdr, s64 val)
{
	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT * 2);

	if (unlikely(!p))
		return false;
	put_unaligned_be64(val, p);
	return true;
}

static inline bool
xdrgen_decode_unsigned_hyper(struct xdr_stream *xdr, u64 *ptr)
{
	__be32 *p = xdr_inline_decode(xdr, XDR_UNIT * 2);

	if (unlikely(!p))
		return false;
	*ptr = get_unaligned_be64(p);
	return true;
}

static inline bool
xdrgen_encode_unsigned_hyper(struct xdr_stream *xdr, u64 val)
{
	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT * 2);

	if (unlikely(!p))
		return false;
	put_unaligned_be64(val, p);
	return true;
}

static inline bool
xdrgen_decode_string(struct xdr_stream *xdr, string *ptr, u32 maxlen)
{
	__be32 *p;
	u32 len;

	if (unlikely(xdr_stream_decode_u32(xdr, &len) != XDR_UNIT))
		return false;
	if (unlikely(maxlen && len > maxlen))
		return false;
	if (len != 0) {
		p = xdr_inline_decode(xdr, len);
		if (unlikely(!p))
			return false;
		ptr->data = (unsigned char *)p;
	}
	ptr->len = len;
	return true;
}

static inline bool
xdrgen_encode_string(struct xdr_stream *xdr, string val, u32 maxlen)
{
	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT + xdr_align_size(val.len));

	if (unlikely(!p))
		return false;
	xdr_encode_opaque(p, val.data, val.len);
	return true;
}

static inline bool
xdrgen_decode_opaque(struct xdr_stream *xdr, opaque *ptr, u32 maxlen)
{
	__be32 *p;
	u32 len;

	if (unlikely(xdr_stream_decode_u32(xdr, &len) != XDR_UNIT))
		return false;
	if (unlikely(maxlen && len > maxlen))
		return false;
	if (len != 0) {
		p = xdr_inline_decode(xdr, len);
		if (unlikely(!p))
			return false;
		ptr->data = (u8 *)p;
	}
	ptr->len = len;
	return true;
}

static inline bool
xdrgen_encode_opaque(struct xdr_stream *xdr, opaque val)
{
	__be32 *p = xdr_reserve_space(xdr, XDR_UNIT + xdr_align_size(val.len));

	if (unlikely(!p))
		return false;
	xdr_encode_opaque(p, val.data, val.len);
	return true;
}

#endif /* _SUNRPC_XDRGEN__BUILTINS_H_ */
+26 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Copyright (c) 2024 Oracle and/or its affiliates.
 *
 * This header defines XDR data type primitives specified in
 * Section 4 of RFC 4506, used by RPC programs implemented
 * in the Linux kernel.
 */

#ifndef _SUNRPC_XDRGEN__DEFS_H_
#define _SUNRPC_XDRGEN__DEFS_H_

#define TRUE	(true)
#define FALSE	(false)

typedef struct {
	u32 len;
	unsigned char *data;
} string;

typedef struct {
	u32 len;
	u8 *data;
} opaque;

#endif /* _SUNRPC_XDRGEN__DEFS_H_ */
+2 −0
Original line number Diff line number Diff line
__pycache__
generators/__pycache__
+244 −0
Original line number Diff line number Diff line
xdrgen - Linux Kernel XDR code generator

Introduction
------------

SunRPC programs are typically specified using a language defined by
RFC 4506. In fact, all IETF-published NFS specifications provide a
description of the specified protocol using this language.

Since the 1990's, user space consumers of SunRPC have had access to
a tool that could read such XDR specifications and then generate C
code that implements the RPC portions of that protocol. This tool is
called rpcgen.

This RPC-level code is code that handles input directly from the
network, and thus a high degree of memory safety and sanity checking
is needed to help ensure proper levels of security. Bugs in this
code can have significant impact on security and performance.

However, it is code that is repetitive and tedious to write by hand.

The C code generated by rpcgen makes extensive use of the facilities
of the user space TI-RPC library and libc. Furthermore, the dialect
of the generated code is very traditional K&R C.

The Linux kernel's implementation of SunRPC-based protocols hand-roll
their XDR implementation. There are two main reasons for this:

1. libtirpc (and its predecessors) operate only in user space. The
   kernel's RPC implementation and its API are significantly
   different than libtirpc.

2. rpcgen-generated code is believed to be less efficient than code
   that is hand-written.

These days, gcc and its kin are capable of optimizing code better
than human authors. There are only a few instances where writing
XDR code by hand will make a measurable performance different.

In addition, the current hand-written code in the Linux kernel is
difficult to audit and prove that it implements exactly what is in
the protocol specification.

In order to accrue the benefits of machine-generated XDR code in the
kernel, a tool is needed that will output C code that works against
the kernel's SunRPC implementation rather than libtirpc.

Enter xdrgen.


Dependencies
------------

These dependencies are typically packaged by Linux distributions:

- python3
- python3-lark
- python3-jinja2

These dependencies are available via PyPi:

- pip install 'lark[interegular]'


XDR Specifications
------------------

When adding a new protocol implementation to the kernel, the XDR
specification can be derived by feeding a .txt copy of the RFC to
the script located in tools/net/sunrpc/extract.sh.

   $ extract.sh < rfc0001.txt > new2.x


Operation
---------

Once a .x file is available, use xdrgen to generate source and
header files containing an implementation of XDR encoding and
decoding functions for the specified protocol.

   $ ./xdrgen definitions new2.x > include/linux/sunrpc/xdrgen/new2.h
   $ ./xdrgen declarations new2.x > new2xdr_gen.h

and

   $ ./xdrgen source new2.x > new2xdr_gen.c

The files are ready to use for a server-side protocol implementation,
or may be used as a guide for implementing these routines by hand.

By default, the only comments added to this code are kdoc comments
that appear directly in front of the public per-procedure APIs. For
deeper introspection, specifying the "--annotate" flag will insert
additional comments in the generated code to help readers match the
generated code to specific parts of the XDR specification.

Because the generated code is targeted for the Linux kernel, it
is tagged with a GPLv2-only license.

The xdrgen tool can also provide lexical and syntax checking of
an XDR specification:

   $ ./xdrgen lint xdr/new.x


How It Works
------------

xdrgen does not use machine learning to generate source code. The
translation is entirely deterministic.

RFC 4506 Section 6 contains a BNF grammar of the XDR specification
language. The grammar has been adapted for use by the Python Lark
module.

The xdr.ebnf file in this directory contains the grammar used to
parse XDR specifications. xdrgen configures Lark using the grammar
in xdr.ebnf. Lark parses the target XDR specification using this
grammar, creating a parse tree.

xdrgen then transforms the parse tree into an abstract syntax tree.
This tree is passed to a series of code generators.

The generators are implemented as Python classes residing in the
generators/ directory. Each generator emits code created from Jinja2
templates stored in the templates/ directory.

The source code is generated in the same order in which they appear
in the specification to ensure the generated code compiles. This
conforms with the behavior of rpcgen.

xdrgen assumes that the generated source code is further compiled by
a compiler that can optimize in a number of ways, including:

 - Unused functions are discarded (ie, not added to the executable)

 - Aggressive function inlining removes unnecessary stack frames

 - Single-arm switch statements are replaced by a single conditional
   branch

And so on.


Pragmas
-------

Pragma directives specify exceptions to the normal generation of
encoding and decoding functions. Currently one directive is
implemented: "public".

Pragma exclude
------ -------

  pragma exclude <RPC procedure> ;

In some cases, a procedure encoder or decoder function might need
special processing that cannot be automatically generated. The
automatically-generated functions might conflict or interfere with
the hand-rolled function. To avoid editing the generated source code
by hand, a pragma can specify that the procedure's encoder and
decoder functions are not included in the generated header and
source.

For example:

  pragma exclude NFSPROC3_READDIRPLUS;

Excludes the decoder function for the READDIRPLUS argument and the
encoder function for the READDIRPLUS result.

Note that because data item encoder and decoder functions are
defined "static __maybe_unused", subsequent compilation
automatically excludes data item encoder and decoder functions that
are used only by excluded procedure.

Pragma header
------ ------

  pragma header <string> ;

Provide a name to use for the header file. For example:

  pragma header nlm4;

Adds

  #include "nlm4xdr_gen.h"

to the generated source file.

Pragma public
------ ------

  pragma public <XDR data item> ;

Normally XDR encoder and decoder functions are "static". In case an
implementer wants to call these functions from other source code,
s/he can add a public pragma in the input .x file to indicate a set
of functions that should get a prototype in the generated header,
and the function definitions will not be declared static.

For example:

  pragma public nfsstat3;

Adds these prototypes in the generated header:

  bool xdrgen_decode_nfsstat3(struct xdr_stream *xdr, enum nfsstat3 *ptr);
  bool xdrgen_encode_nfsstat3(struct xdr_stream *xdr, enum nfsstat3 value);

And, in the generated source code, both of these functions appear
without the "static __maybe_unused" modifiers.


Future Work
-----------

Finish implementing XDR pointer and list types.

Generate client-side procedure functions

Expand the README into a user guide similar to rpcgen(1)

Add more pragma directives:

  * @pages -- use xdr_read/write_pages() for the specified opaque
    field
  * @skip -- do not decode, but rather skip, the specified argument
    field

Enable something like a #include to dynamically insert the content
of other specification files

Properly support line-by-line pass-through via the "%" decorator

Build a unit test suite for verifying translation of XDR language
into compilable code

Add a command-line option to insert trace_printk call sites in the
generated source code, for improved (temporary) observability

Generate kernel Rust code as well as C code
+2 −0
Original line number Diff line number Diff line
# SPDX-License-Identifier: GPL-2.0
# Just to make sphinx-apidoc document this directory
Loading