Commit a6404823 authored by Mark Zhang's avatar Mark Zhang Committed by Leon Romanovsky
Browse files

RDMA/cma: Support IB service record resolution



Add new UCMA command and the corresponding CMA implementation. Userspace
can send this command to request service resolution based on service
name or ID.

On a successful resolution, one or multiple service records are
returned, the first one will be used as destination address by default.

Two new CM events are added and returned to caller accordingly:
  - RDMA_CM_EVENT_ADDRINFO_RESOLVED: Resolve succeeded;
  - RDMA_CM_EVENT_ADDRINFO_ERROR:  Resolve failed.

Internally two new CM states are added:
  - RDMA_CM_ADDRINFO_QUERY: CM is in the process of IB service
    resolution;
  - RDMA_CM_ADDRINFO_RESOLVED: CM has finished the resolve process.

With these new states, beside existing state transfer processes, 2 new
processes are supported:
 1. The default address is used:
    RDMA_CM_ADDR_BOUND ->
      RDMA_CM_ADDRINFO_QUERY ->
        RDMA_CM_ADDRINFO_RESOLVED ->
          RDMA_CM_ROUTE_QUERY

 2. To use a different address:
    RDMA_CM_ADDR_BOUND ->
      RDMA_CM_ADDRINFO_QUERY->
        RDMA_CM_ADDRINFO_RESOLVED ->
          RDMA_CM_ADDR_QUERY ->
            RDMA_CM_ADDR_RESOLVED ->
              RDMA_CM_ROUTE_QUERY

In the 2nd case, resolve_addrinfo returns multiple records, a user
could call rdma_resolve_addr() with the one that is not the first.

Signed-off-by: default avatarOr Har-Toov <ohartoov@nvidia.com>
Signed-off-by: default avatarMark Zhang <markzhang@nvidia.com>
Reviewed-by: default avatarVlad Dumitrescu <vdumitrescu@nvidia.com>
Link: https://patch.msgid.link/b6e82ad75522a13b5efe4ff86da0e465aab04cc2.1751279794.git.leonro@nvidia.com


Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
parent a892a3e7
Loading
Loading
Loading
Loading
+134 −2
Original line number Diff line number Diff line
@@ -2076,6 +2076,7 @@ static void _destroy_id(struct rdma_id_private *id_priv,
	kfree(id_priv->id.route.path_rec);
	kfree(id_priv->id.route.path_rec_inbound);
	kfree(id_priv->id.route.path_rec_outbound);
	kfree(id_priv->id.route.service_recs);

	put_net(id_priv->id.route.addr.dev_addr.net);
	kfree(id_priv);
@@ -3382,13 +3383,18 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
{
	struct rdma_id_private *id_priv;
	enum rdma_cm_state state;
	int ret;

	if (!timeout_ms)
		return -EINVAL;

	id_priv = container_of(id, struct rdma_id_private, id);
	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
	state = id_priv->state;
	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
			   RDMA_CM_ROUTE_QUERY) &&
	    !cma_comp_exch(id_priv, RDMA_CM_ADDRINFO_RESOLVED,
			   RDMA_CM_ROUTE_QUERY))
		return -EINVAL;

	cma_id_get(id_priv);
@@ -3409,7 +3415,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)

	return 0;
err:
	cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
	cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, state);
	cma_id_put(id_priv);
	return ret;
}
@@ -5506,3 +5512,129 @@ static void __exit cma_cleanup(void)

module_init(cma_init);
module_exit(cma_cleanup);

static void cma_query_ib_service_handler(int status,
					 struct sa_service_rec *recs,
					 unsigned int num_recs, void *context)
{
	struct cma_work *work = context;
	struct rdma_id_private *id_priv = work->id;
	struct sockaddr_ib *addr;

	if (status)
		goto fail;

	if (!num_recs) {
		status = -ENOENT;
		goto fail;
	}

	if (id_priv->id.route.service_recs) {
		status = -EALREADY;
		goto fail;
	}

	id_priv->id.route.service_recs =
		kmalloc_array(num_recs, sizeof(*recs), GFP_KERNEL);
	if (!id_priv->id.route.service_recs) {
		status = -ENOMEM;
		goto fail;
	}

	id_priv->id.route.num_service_recs = num_recs;
	memcpy(id_priv->id.route.service_recs, recs, sizeof(*recs) * num_recs);

	addr = (struct sockaddr_ib *)&id_priv->id.route.addr.dst_addr;
	addr->sib_family = AF_IB;
	addr->sib_addr = *(struct ib_addr *)&recs->gid;
	addr->sib_pkey = recs->pkey;
	addr->sib_sid = recs->id;
	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr,
			   (union ib_gid *)&addr->sib_addr);
	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr,
			 ntohs(addr->sib_pkey));

	queue_work(cma_wq, &work->work);
	return;

fail:
	work->old_state = RDMA_CM_ADDRINFO_QUERY;
	work->new_state = RDMA_CM_ADDR_BOUND;
	work->event.event = RDMA_CM_EVENT_ADDRINFO_ERROR;
	work->event.status = status;
	pr_debug_ratelimited(
		"RDMA CM: SERVICE_ERROR: failed to query service record. status %d\n",
		status);
	queue_work(cma_wq, &work->work);
}

static int cma_resolve_ib_service(struct rdma_id_private *id_priv,
				  struct rdma_ucm_ib_service *ibs)
{
	struct sa_service_rec sr = {};
	ib_sa_comp_mask mask = 0;
	struct cma_work *work;

	work = kzalloc(sizeof(*work), GFP_KERNEL);
	if (!work)
		return -ENOMEM;

	cma_id_get(id_priv);

	work->id = id_priv;
	INIT_WORK(&work->work, cma_work_handler);
	work->old_state = RDMA_CM_ADDRINFO_QUERY;
	work->new_state = RDMA_CM_ADDRINFO_RESOLVED;
	work->event.event = RDMA_CM_EVENT_ADDRINFO_RESOLVED;

	if (ibs->flags & RDMA_USER_CM_IB_SERVICE_FLAG_ID) {
		sr.id = cpu_to_be64(ibs->service_id);
		mask |= IB_SA_SERVICE_REC_SERVICE_ID;
	}
	if (ibs->flags & RDMA_USER_CM_IB_SERVICE_FLAG_NAME) {
		strscpy(sr.name, ibs->service_name, sizeof(sr.name));
		mask |= IB_SA_SERVICE_REC_SERVICE_NAME;
	}

	id_priv->query_id = ib_sa_service_rec_get(&sa_client,
						  id_priv->id.device,
						  id_priv->id.port_num,
						  &sr, mask,
						  2000, GFP_KERNEL,
						  cma_query_ib_service_handler,
						  work, &id_priv->query);

	if (id_priv->query_id < 0) {
		cma_id_put(id_priv);
		kfree(work);
		return id_priv->query_id;
	}

	return 0;
}

int rdma_resolve_ib_service(struct rdma_cm_id *id,
			    struct rdma_ucm_ib_service *ibs)
{
	struct rdma_id_private *id_priv;
	int ret;

	id_priv = container_of(id, struct rdma_id_private, id);
	if (!id_priv->cma_dev ||
	    !cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDRINFO_QUERY))
		return -EINVAL;

	if (rdma_cap_ib_sa(id->device, id->port_num))
		ret = cma_resolve_ib_service(id_priv, ibs);
	else
		ret = -EOPNOTSUPP;

	if (ret)
		goto err;

	return 0;
err:
	cma_comp_exch(id_priv, RDMA_CM_ADDRINFO_QUERY, RDMA_CM_ADDR_BOUND);
	return ret;
}
EXPORT_SYMBOL(rdma_resolve_ib_service);
+3 −1
Original line number Diff line number Diff line
@@ -47,7 +47,9 @@ enum rdma_cm_state {
	RDMA_CM_ADDR_BOUND,
	RDMA_CM_LISTEN,
	RDMA_CM_DEVICE_REMOVAL,
	RDMA_CM_DESTROYING
	RDMA_CM_DESTROYING,
	RDMA_CM_ADDRINFO_QUERY,
	RDMA_CM_ADDRINFO_RESOLVED
};

struct rdma_id_private {
+29 −1
Original line number Diff line number Diff line
@@ -282,6 +282,10 @@ static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx,
	}
	uevent->resp.event = event->event;
	uevent->resp.status = event->status;

	if (event->event == RDMA_CM_EVENT_ADDRINFO_RESOLVED)
		goto out;

	if (ctx->cm_id->qp_type == IB_QPT_UD)
		ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud,
				   &event->param.ud);
@@ -289,6 +293,7 @@ static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx,
		ucma_copy_conn_event(&uevent->resp.param.conn,
				     &event->param.conn);

out:
	uevent->resp.ece.vendor_id = event->ece.vendor_id;
	uevent->resp.ece.attr_mod = event->ece.attr_mod;
	return uevent;
@@ -728,6 +733,28 @@ static ssize_t ucma_resolve_addr(struct ucma_file *file,
	return ret;
}

static ssize_t ucma_resolve_ib_service(struct ucma_file *file,
				       const char __user *inbuf, int in_len,
				       int out_len)
{
	struct rdma_ucm_resolve_ib_service cmd;
	struct ucma_context *ctx;
	int ret;

	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
		return -EFAULT;

	ctx = ucma_get_ctx(file, cmd.id);
	if (IS_ERR(ctx))
		return PTR_ERR(ctx);

	mutex_lock(&ctx->mutex);
	ret = rdma_resolve_ib_service(ctx->cm_id, &cmd.ibs);
	mutex_unlock(&ctx->mutex);
	ucma_put_ctx(ctx);
	return ret;
}

static ssize_t ucma_resolve_route(struct ucma_file *file,
				  const char __user *inbuf,
				  int in_len, int out_len)
@@ -1703,7 +1730,8 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
	[RDMA_USER_CM_CMD_QUERY]	 = ucma_query,
	[RDMA_USER_CM_CMD_BIND]		 = ucma_bind,
	[RDMA_USER_CM_CMD_RESOLVE_ADDR]	 = ucma_resolve_addr,
	[RDMA_USER_CM_CMD_JOIN_MCAST]	 = ucma_join_multicast
	[RDMA_USER_CM_CMD_JOIN_MCAST]	 = ucma_join_multicast,
	[RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE] = ucma_resolve_ib_service
};

static ssize_t ucma_write(struct file *filp, const char __user *buf,
+17 −1
Original line number Diff line number Diff line
@@ -33,7 +33,9 @@ enum rdma_cm_event_type {
	RDMA_CM_EVENT_MULTICAST_JOIN,
	RDMA_CM_EVENT_MULTICAST_ERROR,
	RDMA_CM_EVENT_ADDR_CHANGE,
	RDMA_CM_EVENT_TIMEWAIT_EXIT
	RDMA_CM_EVENT_TIMEWAIT_EXIT,
	RDMA_CM_EVENT_ADDRINFO_RESOLVED,
	RDMA_CM_EVENT_ADDRINFO_ERROR
};

const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event);
@@ -63,6 +65,9 @@ struct rdma_route {
	 * 2 - Both primary and alternate path are available
	 */
	int num_pri_alt_paths;

	unsigned int num_service_recs;
	struct sa_service_rec *service_recs;
};

struct rdma_conn_param {
@@ -197,6 +202,17 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 */
int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms);

/**
 * rdma_resolve_ib_service - Resolve the IB service record of the
 *   service with the given service ID or name.
 *
 * This function is optional in the rdma cm flow. It is called on the client
 * side of a connection, before calling rdma_resolve_route. The resolution
 * can be done once per rdma_cm_id.
 */
int rdma_resolve_ib_service(struct rdma_cm_id *id,
			    struct rdma_ucm_ib_service *ibs);

/**
 * rdma_create_qp - Allocate a QP and associate it with the specified RDMA
 * identifier.
+19 −1
Original line number Diff line number Diff line
@@ -67,7 +67,8 @@ enum {
	RDMA_USER_CM_CMD_QUERY,
	RDMA_USER_CM_CMD_BIND,
	RDMA_USER_CM_CMD_RESOLVE_ADDR,
	RDMA_USER_CM_CMD_JOIN_MCAST
	RDMA_USER_CM_CMD_JOIN_MCAST,
	RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE
};

/* See IBTA Annex A11, servies ID bytes 4 & 5 */
@@ -338,4 +339,21 @@ struct rdma_ucm_migrate_resp {
	__u32 events_reported;
};

enum {
	RDMA_USER_CM_IB_SERVICE_FLAG_ID = 1 << 0,
	RDMA_USER_CM_IB_SERVICE_FLAG_NAME = 1 << 1,
};

#define RDMA_USER_CM_IB_SERVICE_NAME_SIZE 64
struct rdma_ucm_ib_service {
	__u64 service_id;
	__u8  service_name[RDMA_USER_CM_IB_SERVICE_NAME_SIZE];
	__u32 flags;
	__u32 reserved;
};

struct rdma_ucm_resolve_ib_service {
	__u32 id;
	struct rdma_ucm_ib_service ibs;
};
#endif /* RDMA_USER_CM_H */