RDMA/sa_query: Support IB service records resolution

Add an SA query API ib_sa_service_rec_get() to support building and
sending SA query MADs that ask for service records with a specific
name or ID, and receiving and parsing responses from the SM.

Signed-off-by: Or Har-Toov <ohartoov@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Reviewed-by: Vlad Dumitrescu <vdumitrescu@nvidia.com>
Link: https://patch.msgid.link/9af6c82f3a3a9d975115a33235fb4ffc7c8edb21.1751279793.git.leonro@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
This commit is contained in:
Mark Zhang 2025-06-30 13:52:32 +03:00 committed by Leon Romanovsky
parent ef5fcdb730
commit a892a3e74f
3 changed files with 276 additions and 0 deletions

View File

@ -152,6 +152,13 @@ struct ib_sa_mcmember_query {
struct ib_sa_query sa_query;
};
struct ib_sa_service_query {
void (*callback)(int status, struct sa_service_rec *rec,
unsigned int num_services, void *context);
void *context;
struct ib_sa_query sa_query;
};
static LIST_HEAD(ib_nl_request_list);
static DEFINE_SPINLOCK(ib_nl_request_lock);
static atomic_t ib_nl_sa_request_seq;
@ -686,6 +693,58 @@ static const struct ib_field guidinfo_rec_table[] = {
.size_bits = 512 },
};
#define SERVICE_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct sa_service_rec, field), \
.struct_size_bytes = sizeof_field(struct sa_service_rec, field), \
.field_name = "sa_service_rec:" #field
static const struct ib_field service_rec_table[] = {
{ SERVICE_REC_FIELD(id),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 64 },
{ SERVICE_REC_FIELD(gid),
.offset_words = 2,
.offset_bits = 0,
.size_bits = 128 },
{ SERVICE_REC_FIELD(pkey),
.offset_words = 6,
.offset_bits = 0,
.size_bits = 16 },
{ RESERVED,
.offset_words = 6,
.offset_bits = 16,
.size_bits = 16 },
{ SERVICE_REC_FIELD(lease),
.offset_words = 7,
.offset_bits = 0,
.size_bits = 32 },
{ SERVICE_REC_FIELD(key),
.offset_words = 8,
.offset_bits = 0,
.size_bits = 128 },
{ SERVICE_REC_FIELD(name),
.offset_words = 12,
.offset_bits = 0,
.size_bits = 512 },
{ SERVICE_REC_FIELD(data_8),
.offset_words = 28,
.offset_bits = 0,
.size_bits = 128 },
{ SERVICE_REC_FIELD(data_16),
.offset_words = 32,
.offset_bits = 0,
.size_bits = 128 },
{ SERVICE_REC_FIELD(data_32),
.offset_words = 36,
.offset_bits = 0,
.size_bits = 128 },
{ SERVICE_REC_FIELD(data_64),
.offset_words = 40,
.offset_bits = 0,
.size_bits = 128 },
};
#define RDMA_PRIMARY_PATH_MAX_REC_NUM 3
static inline void ib_sa_disable_local_svc(struct ib_sa_query *query)
@ -1392,6 +1451,20 @@ void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
}
EXPORT_SYMBOL(ib_sa_pack_path);
void ib_sa_pack_service(struct sa_service_rec *rec, void *attribute)
{
ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), rec,
attribute);
}
EXPORT_SYMBOL(ib_sa_pack_service);
void ib_sa_unpack_service(void *attribute, struct sa_service_rec *rec)
{
ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table), attribute,
rec);
}
EXPORT_SYMBOL(ib_sa_unpack_service);
static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
struct ib_sa_device *sa_dev,
u32 port_num)
@ -1481,6 +1554,68 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
}
}
#define IB_SA_DATA_OFFS 56
#define IB_SERVICE_REC_SZ 176
static void ib_unpack_service_rmpp(struct sa_service_rec *rec,
struct ib_mad_recv_wc *mad_wc,
int num_services)
{
unsigned int cp_sz, data_i, data_size, rec_i = 0, buf_i = 0;
struct ib_mad_recv_buf *mad_buf;
u8 buf[IB_SERVICE_REC_SZ];
u8 *data;
data_size = sizeof(((struct ib_sa_mad *) mad_buf->mad)->data);
list_for_each_entry(mad_buf, &mad_wc->rmpp_list, list) {
data = ((struct ib_sa_mad *) mad_buf->mad)->data;
data_i = 0;
while (data_i < data_size && rec_i < num_services) {
cp_sz = min(IB_SERVICE_REC_SZ - buf_i,
data_size - data_i);
memcpy(buf + buf_i, data + data_i, cp_sz);
data_i += cp_sz;
buf_i += cp_sz;
if (buf_i == IB_SERVICE_REC_SZ) {
ib_sa_unpack_service(buf, rec + rec_i);
buf_i = 0;
rec_i++;
}
}
}
}
static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, int status,
struct ib_mad_recv_wc *mad_wc)
{
struct ib_sa_service_query *query =
container_of(sa_query, struct ib_sa_service_query, sa_query);
struct sa_service_rec *rec;
int num_services;
if (!mad_wc || !mad_wc->recv_buf.mad) {
query->callback(status, NULL, 0, query->context);
return;
}
num_services = (mad_wc->mad_len - IB_SA_DATA_OFFS) / IB_SERVICE_REC_SZ;
if (!num_services) {
query->callback(-ENODATA, NULL, 0, query->context);
return;
}
rec = kmalloc_array(num_services, sizeof(*rec), GFP_KERNEL);
if (!rec) {
query->callback(-ENOMEM, NULL, 0, query->context);
return;
}
ib_unpack_service_rmpp(rec, mad_wc, num_services);
query->callback(status, rec, num_services, query->context);
kfree(rec);
}
static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
{
struct ib_sa_path_query *query =
@ -1490,6 +1625,14 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
kfree(query);
}
static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
{
struct ib_sa_service_query *query =
container_of(sa_query, struct ib_sa_service_query, sa_query);
kfree(query);
}
/**
* ib_sa_path_rec_get - Start a Path get query
* @client:SA client
@ -1620,6 +1763,101 @@ err1:
}
EXPORT_SYMBOL(ib_sa_path_rec_get);
/**
* ib_sa_service_rec_get - Start a Service get query
* @client: SA client
* @device: device to send query on
* @port_num: port number to send query on
* @rec: Service Record to send in query
* @comp_mask: component mask to send in query
* @timeout_ms: time to wait for response
* @gfp_mask: GFP mask to use for internal allocations
* @callback: function called when query completes, times out or is
* canceled
* @context: opaque user context passed to callback
* @sa_query: query context, used to cancel query
*
* Send a Service Record Get query to the SA to look up a path. The
* callback function will be called when the query completes (or
* fails); status is 0 for a successful response, -EINTR if the query
* is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
* occurred sending the query. The resp parameter of the callback is
* only valid if status is 0.
*
* If the return value of ib_sa_service_rec_get() is negative, it is an
* error code. Otherwise it is a query ID that can be used to cancel
* the query.
*/
int ib_sa_service_rec_get(struct ib_sa_client *client,
struct ib_device *device, u32 port_num,
struct sa_service_rec *rec,
ib_sa_comp_mask comp_mask,
unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct sa_service_rec *resp,
unsigned int num_services,
void *context),
void *context, struct ib_sa_query **sa_query)
{
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
struct ib_sa_service_query *query;
struct ib_mad_agent *agent;
struct ib_sa_port *port;
struct ib_sa_mad *mad;
int ret;
if (!sa_dev)
return -ENODEV;
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
query->sa_query.port = port;
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
goto err1;
ib_sa_client_get(client);
query->sa_query.client = client;
query->callback = callback;
query->context = context;
mad = query->sa_query.mad_buf->mad;
init_mad(&query->sa_query, agent);
query->sa_query.rmpp_callback = callback ? ib_sa_service_rec_callback :
NULL;
query->sa_query.release = ib_sa_service_rec_release;
mad->mad_hdr.method = IB_MGMT_METHOD_GET_TABLE;
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
mad->sa_hdr.comp_mask = comp_mask;
ib_sa_pack_service(rec, mad->data);
*sa_query = &query->sa_query;
query->sa_query.mad_buf->context[1] = rec;
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
goto err2;
return ret;
err2:
*sa_query = NULL;
ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
err1:
kfree(query);
return ret;
}
EXPORT_SYMBOL(ib_sa_service_rec_get);
static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
int status, struct ib_sa_mad *mad)
{

View File

@ -48,6 +48,7 @@
#define IB_MGMT_METHOD_REPORT 0x06
#define IB_MGMT_METHOD_REPORT_RESP 0x86
#define IB_MGMT_METHOD_TRAP_REPRESS 0x07
#define IB_MGMT_METHOD_GET_TABLE 0x12
#define IB_MGMT_METHOD_RESP 0x80
#define IB_BM_ATTR_MOD_RESP cpu_to_be32(1)

View File

@ -189,6 +189,20 @@ struct sa_path_rec {
u32 flags;
};
struct sa_service_rec {
__be64 id;
__u8 gid[16];
__be16 pkey;
__u8 reserved[2];
__be32 lease;
__u8 key[16];
__u8 name[64];
__u8 data_8[16];
__be16 data_16[8];
__be32 data_32[4];
__be64 data_64[2];
};
static inline enum ib_gid_type
sa_conv_pathrec_to_gid_type(struct sa_path_rec *rec)
{
@ -417,6 +431,17 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device,
unsigned int num_prs, void *context),
void *context, struct ib_sa_query **query);
int ib_sa_service_rec_get(struct ib_sa_client *client,
struct ib_device *device, u32 port_num,
struct sa_service_rec *rec,
ib_sa_comp_mask comp_mask,
unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct sa_service_rec *resp,
unsigned int num_services,
void *context),
void *context, struct ib_sa_query **sa_query);
struct ib_sa_multicast {
struct ib_sa_mcmember_rec rec;
ib_sa_comp_mask comp_mask;
@ -508,6 +533,18 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num,
*/
void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute);
/**
* ib_sa_pack_service - Convert a service record from struct ib_sa_service_rec
* to IB MAD wire format.
*/
void ib_sa_pack_service(struct sa_service_rec *rec, void *attribute);
/**
* ib_sa_unpack_service - Convert a service record from MAD format to struct
* ib_sa_service_rec.
*/
void ib_sa_unpack_service(void *attribute, struct sa_service_rec *rec);
/**
* ib_sa_unpack_path - Convert a path record from MAD format to struct
* ib_sa_path_rec.