Newer
Older
Greg Kroah-Hartman
committed
// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
* Basic Transport Functions exploiting Infiniband API
*
* Copyright IBM Corp. 2016
*
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
*/
#include <linux/socket.h>
#include <linux/if_vlan.h>
#include <linux/random.h>
#include <linux/workqueue.h>
#include <linux/wait.h>
#include <linux/reboot.h>
#include <net/tcp.h>
#include <net/sock.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
#include "smc.h"
#include "smc_clc.h"
#include "smc_core.h"
#include "smc_ib.h"
#define SMC_LGR_NUM_INCR 256
#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
#define SMC_LGR_FREE_DELAY_FAST (8 * HZ)
static struct smc_lgr_list smc_lgr_list = { /* established link groups */
.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
.list = LIST_HEAD_INIT(smc_lgr_list.list),
.num = 0,
};
static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
struct smc_ib_up_work {
struct work_struct work;
struct smc_link_group *lgr;
struct smc_ib_device *smcibdev;
u8 ibport;
};
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc);
static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
static void smc_link_up_work(struct work_struct *work);
static void smc_link_down_work(struct work_struct *work);
/* return head of link group list and its lock for a given link group */
static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
spinlock_t **lgr_lock)
{
if (lgr->is_smcd) {
*lgr_lock = &lgr->smcd->lgr_lock;
return &lgr->smcd->lgr_list;
}
*lgr_lock = &smc_lgr_list.lock;
return &smc_lgr_list.list;
}
static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
{
/* client link group creation always follows the server link group
* creation. For client use a somewhat higher removal delay time,
* otherwise there is a risk of out-of-sync link groups.
*/
if (!lgr->freeing && !lgr->freefast) {
mod_delayed_work(system_wq, &lgr->free_work,
(!lgr->is_smcd && lgr->role == SMC_CLNT) ?
SMC_LGR_FREE_DELAY_CLNT :
SMC_LGR_FREE_DELAY_SERV);
}
void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
{
if (!lgr->freeing && !lgr->freefast) {
lgr->freefast = 1;
mod_delayed_work(system_wq, &lgr->free_work,
SMC_LGR_FREE_DELAY_FAST);
}
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/* Register connection's alert token in our lookup structure.
* To use rbtrees we have to implement our own insert core.
* Requires @conns_lock
* @smc connection to register
* Returns 0 on success, != otherwise.
*/
static void smc_lgr_add_alert_token(struct smc_connection *conn)
{
struct rb_node **link, *parent = NULL;
u32 token = conn->alert_token_local;
link = &conn->lgr->conns_all.rb_node;
while (*link) {
struct smc_connection *cur = rb_entry(*link,
struct smc_connection, alert_node);
parent = *link;
if (cur->alert_token_local > token)
link = &parent->rb_left;
else
link = &parent->rb_right;
}
/* Put the new node there */
rb_link_node(&conn->alert_node, parent, link);
rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
}
/* Register connection in link group by assigning an alert token
* registered in a search tree.
* Requires @conns_lock
* Note that '0' is a reserved value and not assigned.
*/
static int smc_lgr_register_conn(struct smc_connection *conn)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
static atomic_t nexttoken = ATOMIC_INIT(0);
/* find a new alert_token_local value not yet used by some connection
* in this link group
*/
sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
while (!conn->alert_token_local) {
conn->alert_token_local = atomic_inc_return(&nexttoken);
if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
conn->alert_token_local = 0;
}
smc_lgr_add_alert_token(conn);
/* assign the new connection to a link */
if (!conn->lgr->is_smcd) {
struct smc_link *lnk;
int i;
/* tbd - link balancing */
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
lnk = &conn->lgr->lnk[i];
if (lnk->state == SMC_LNK_ACTIVATING ||
lnk->state == SMC_LNK_ACTIVE)
conn->lnk = lnk;
}
if (!conn->lnk)
return SMC_CLC_DECL_NOACTLINK;
}
return 0;
}
/* Unregister connection and reset the alert token of the given connection<
*/
static void __smc_lgr_unregister_conn(struct smc_connection *conn)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
struct smc_link_group *lgr = conn->lgr;
rb_erase(&conn->alert_node, &lgr->conns_all);
lgr->conns_num--;
conn->alert_token_local = 0;
sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
}
/* Unregister connection from lgr
*/
static void smc_lgr_unregister_conn(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
if (!lgr)
return;
write_lock_bh(&lgr->conns_lock);
if (conn->alert_token_local) {
__smc_lgr_unregister_conn(conn);
}
write_unlock_bh(&lgr->conns_lock);
void smc_lgr_cleanup_early(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
if (!lgr)
return;
smc_conn_free(conn);
smc_lgr_forget(lgr);
smc_lgr_schedule_free_work_fast(lgr);
}
static void smc_lgr_free(struct smc_link_group *lgr);
static void smc_lgr_free_work(struct work_struct *work)
{
struct smc_link_group *lgr = container_of(to_delayed_work(work),
struct smc_link_group,
free_work);
spinlock_t *lgr_lock;
int i;
smc_lgr_list_head(lgr, &lgr_lock);
spin_lock_bh(lgr_lock);
if (lgr->freeing) {
spin_unlock_bh(lgr_lock);
return;
}
read_lock_bh(&lgr->conns_lock);
conns = RB_EMPTY_ROOT(&lgr->conns_all);
read_unlock_bh(&lgr->conns_lock);
if (!conns) { /* number of lgr connections is no longer zero */
spin_unlock_bh(lgr_lock);
list_del_init(&lgr->list); /* remove from smc_lgr_list */
lgr->freeing = 1; /* this instance does the freeing, no new schedule */
spin_unlock_bh(lgr_lock);
cancel_delayed_work(&lgr->free_work);
if (lgr->is_smcd && !lgr->terminating)
if (!lgr->is_smcd) {
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &lgr->lnk[i];
if (smc_link_usable(lnk))
wake_up_interruptible_all(&lgr->llc_waiter);
static void smc_lgr_terminate_work(struct work_struct *work)
{
struct smc_link_group *lgr = container_of(work, struct smc_link_group,
terminate_work);
__smc_lgr_terminate(lgr, true);
/* return next unique link id for the lgr */
static u8 smcr_next_link_id(struct smc_link_group *lgr)
{
u8 link_id;
int i;
while (1) {
link_id = ++lgr->next_link_id;
if (!link_id) /* skip zero as link_id */
link_id = ++lgr->next_link_id;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (smc_link_usable(&lgr->lnk[i]) &&
lgr->lnk[i].link_id == link_id)
continue;
}
break;
}
return link_id;
}
int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
u8 link_idx, struct smc_init_info *ini)
{
u8 rndvec[3];
int rc;
get_device(&ini->ib_dev->ibdev->dev);
atomic_inc(&ini->ib_dev->lnk_cnt);
lnk->state = SMC_LNK_ACTIVATING;
lnk->link_id = smcr_next_link_id(lgr);
lnk->link_idx = link_idx;
lnk->smcibdev = ini->ib_dev;
lnk->ibport = ini->ib_port;
lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
if (!ini->ib_dev->initialized) {
rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
if (rc)
goto out;
}
get_random_bytes(rndvec, sizeof(rndvec));
lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
(rndvec[2] << 16);
rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
ini->vlan_id, lnk->gid, &lnk->sgid_index);
if (rc)
goto out;
rc = smc_llc_link_init(lnk);
if (rc)
goto out;
rc = smc_wr_alloc_link_mem(lnk);
if (rc)
goto clear_llc_lnk;
rc = smc_ib_create_protection_domain(lnk);
if (rc)
goto free_link_mem;
rc = smc_ib_create_queue_pair(lnk);
if (rc)
goto dealloc_pd;
rc = smc_wr_create_link(lnk);
if (rc)
goto destroy_qp;
return 0;
destroy_qp:
smc_ib_destroy_queue_pair(lnk);
dealloc_pd:
smc_ib_dealloc_protection_domain(lnk);
free_link_mem:
smc_wr_free_link_mem(lnk);
clear_llc_lnk:
smc_llc_link_clear(lnk);
out:
put_device(&ini->ib_dev->ibdev->dev);
memset(lnk, 0, sizeof(struct smc_link));
lnk->state = SMC_LNK_UNUSED;
if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
wake_up(&ini->ib_dev->lnks_deleted);
return rc;
}
/* create a new SMC link group */
static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_link_group *lgr;
struct list_head *lgr_list;
spinlock_t *lgr_lock;
if (ini->is_smcd && ini->vlan_id) {
if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
rc = SMC_CLC_DECL_ISMVLANERR;
lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
if (!lgr) {
lgr->terminating = 0;
lgr->freefast = 0;
lgr->freeing = 0;
mutex_init(&lgr->sndbufs_lock);
mutex_init(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
INIT_LIST_HEAD(&lgr->sndbufs[i]);
INIT_LIST_HEAD(&lgr->rmbs[i]);
}
smc_lgr_list.num += SMC_LGR_NUM_INCR;
memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
/* SMC-D specific settings */
get_device(&ini->ism_dev->dev);
lgr->peer_gid = ini->ism_gid;
lgr->smcd = ini->ism_dev;
lgr_list = &ini->ism_dev->lgr_list;
lgr_lock = &lgr->smcd->lgr_lock;
lgr->peer_shutdown = 0;
atomic_inc(&ini->ism_dev->lgr_cnt);
} else {
/* SMC-R specific settings */
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
SMC_SYSTEMID_LEN);
memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
SMC_MAX_PNETID_LEN);
smc_llc_lgr_init(lgr, smc);
link_idx = SMC_SINGLE_LINK;
lnk = &lgr->lnk[link_idx];
rc = smcr_link_init(lgr, lnk, link_idx, ini);
if (rc)
goto free_lgr;
lgr_list = &smc_lgr_list.list;
lgr_lock = &smc_lgr_list.lock;
spin_lock_bh(lgr_lock);
list_add(&lgr->list, lgr_list);
spin_unlock_bh(lgr_lock);
return 0;
free_lgr:
kfree(lgr);
ism_put_vlan:
if (ini->is_smcd && ini->vlan_id)
smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
if (rc < 0) {
if (rc == -ENOMEM)
rc = SMC_CLC_DECL_MEM;
else
rc = SMC_CLC_DECL_INTERR;
}
static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
struct smc_link_group *lgr)
int rc;
if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
/* unregister rmb with peer */
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
if (!rc) {
/* protect against smc_llc_cli_rkey_exchange() */
mutex_lock(&lgr->llc_conf_mutex);
smc_llc_do_delete_rkey(lgr, rmb_desc);
rmb_desc->is_conf_rkey = false;
mutex_unlock(&lgr->llc_conf_mutex);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
}
if (rmb_desc->is_reg_err) {
/* buf registration failed, reuse not possible */
mutex_lock(&lgr->rmbs_lock);
list_del(&rmb_desc->list);
mutex_unlock(&lgr->rmbs_lock);
smc_buf_free(lgr, true, rmb_desc);
} else {
rmb_desc->used = 0;
}
}
static void smc_buf_unuse(struct smc_connection *conn,
struct smc_link_group *lgr)
if (conn->sndbuf_desc)
if (conn->rmb_desc && lgr->is_smcd)
conn->rmb_desc->used = 0;
else if (conn->rmb_desc)
smcr_buf_unuse(conn->rmb_desc, lgr);
/* remove a finished connection from its link group */
void smc_conn_free(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
if (!lgr)
if (!list_empty(&lgr->list))
smc_ism_unset_conn(conn);
tasklet_kill(&conn->rx_tsklet);
} else {
smc_cdc_tx_dismiss_slots(conn);
if (!list_empty(&lgr->list)) {
smc_lgr_unregister_conn(conn);
smc_buf_unuse(conn, lgr); /* allow buffer reuse */
}
if (!lgr->conns_num)
smc_lgr_schedule_free_work(lgr);
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
/* unregister a link from a buf_desc */
static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
struct smc_link *lnk)
{
if (is_rmb)
buf_desc->is_reg_mr[lnk->link_idx] = false;
if (!buf_desc->is_map_ib[lnk->link_idx])
return;
if (is_rmb) {
if (buf_desc->mr_rx[lnk->link_idx]) {
smc_ib_put_memory_region(
buf_desc->mr_rx[lnk->link_idx]);
buf_desc->mr_rx[lnk->link_idx] = NULL;
}
smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
} else {
smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
}
sg_free_table(&buf_desc->sgt[lnk->link_idx]);
buf_desc->is_map_ib[lnk->link_idx] = false;
}
/* unmap all buffers of lgr for a deleted link */
static void smcr_buf_unmap_lgr(struct smc_link *lnk)
{
struct smc_link_group *lgr = lnk->lgr;
struct smc_buf_desc *buf_desc, *bf;
int i;
for (i = 0; i < SMC_RMBE_SIZES; i++) {
mutex_lock(&lgr->rmbs_lock);
list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
smcr_buf_unmap_link(buf_desc, true, lnk);
mutex_unlock(&lgr->rmbs_lock);
mutex_lock(&lgr->sndbufs_lock);
list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
list)
smcr_buf_unmap_link(buf_desc, false, lnk);
mutex_unlock(&lgr->sndbufs_lock);
}
}
static void smcr_rtoken_clear_link(struct smc_link *lnk)
{
struct smc_link_group *lgr = lnk->lgr;
int i;
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
lgr->rtokens[i][lnk->link_idx].rkey = 0;
lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
}
}
/* must be called under lgr->llc_conf_mutex lock */
void smcr_link_clear(struct smc_link *lnk)
struct smc_ib_device *smcibdev;
if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
return;
smcr_buf_unmap_lgr(lnk);
smcr_rtoken_clear_link(lnk);
smc_ib_modify_qp_reset(lnk);
smc_wr_free_link(lnk);
smc_ib_destroy_queue_pair(lnk);
smc_ib_dealloc_protection_domain(lnk);
smc_wr_free_link_mem(lnk);
put_device(&lnk->smcibdev->ibdev->dev);
smcibdev = lnk->smcibdev;
memset(lnk, 0, sizeof(struct smc_link));
lnk->state = SMC_LNK_UNUSED;
if (!atomic_dec_return(&smcibdev->lnk_cnt))
wake_up(&smcibdev->lnks_deleted);
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc)
int i;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
if (buf_desc->pages)
__free_pages(buf_desc->pages, buf_desc->order);
static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
struct smc_buf_desc *buf_desc)
{
if (is_dmb) {
/* restore original buf len */
buf_desc->len += sizeof(struct smcd_cdc_msg);
smc_ism_unregister_dmb(lgr->smcd, buf_desc);
kfree(buf_desc->cpu_addr);
kfree(buf_desc);
}
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc)
{
if (lgr->is_smcd)
smcd_buf_free(lgr, is_rmb, buf_desc);
else
smcr_buf_free(lgr, is_rmb, buf_desc);
}
static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
struct smc_buf_desc *buf_desc, *bf_desc;
struct list_head *buf_list;
int i;
for (i = 0; i < SMC_RMBE_SIZES; i++) {
if (is_rmb)
buf_list = &lgr->rmbs[i];
else
buf_list = &lgr->sndbufs[i];
list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
list_del(&buf_desc->list);
smc_buf_free(lgr, is_rmb, buf_desc);
static void smc_lgr_free_bufs(struct smc_link_group *lgr)
{
/* free send buffers */
__smc_lgr_free_bufs(lgr, false);
/* free rmbs */
__smc_lgr_free_bufs(lgr, true);
}
static void smc_lgr_free(struct smc_link_group *lgr)
int i;
smc_lgr_free_bufs(lgr);
if (lgr->is_smcd) {
if (!lgr->terminating) {
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
put_device(&lgr->smcd->dev);
}
if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
wake_up(&lgr->smcd->lgrs_deleted);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].state != SMC_LNK_UNUSED)
smcr_link_clear(&lgr->lnk[i]);
smc_llc_lgr_clear(lgr);
if (!atomic_dec_return(&lgr_cnt))
wake_up(&lgrs_deleted);
void smc_lgr_forget(struct smc_link_group *lgr)
{
struct list_head *lgr_list;
spinlock_t *lgr_lock;
lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
spin_lock_bh(lgr_lock);
/* do not use this link group for new connections */
if (!list_empty(lgr_list))
list_del_init(lgr_list);
spin_unlock_bh(lgr_lock);
static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
{
int i;
for (i = 0; i < SMC_RMBE_SIZES; i++) {
struct smc_buf_desc *buf_desc;
list_for_each_entry(buf_desc, &lgr->rmbs[i], list) {
buf_desc->len += sizeof(struct smcd_cdc_msg);
smc_ism_unregister_dmb(lgr->smcd, buf_desc);
}
}
}
static void smc_sk_wake_ups(struct smc_sock *smc)
{
smc->sk.sk_write_space(&smc->sk);
smc->sk.sk_data_ready(&smc->sk);
smc->sk.sk_state_change(&smc->sk);
}
/* kill a connection */
static void smc_conn_kill(struct smc_connection *conn, bool soft)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
else
smc_close_abort(conn);
smc->sk.sk_err = ECONNABORTED;
smc_sk_wake_ups(smc);
if (conn->lgr->is_smcd) {
smc_ism_unset_conn(conn);
if (soft)
tasklet_kill(&conn->rx_tsklet);
else
tasklet_unlock_wait(&conn->rx_tsklet);
} else {
smc_cdc_tx_dismiss_slots(conn);
smc_lgr_unregister_conn(conn);
static void smc_lgr_cleanup(struct smc_link_group *lgr)
{
int i;
if (lgr->is_smcd) {
smc_ism_signal_shutdown(lgr);
smcd_unregister_all_dmbs(lgr);
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
put_device(&lgr->smcd->dev);
} else {
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &lgr->lnk[i];
if (smc_link_usable(lnk))
wake_up_interruptible_all(&lgr->llc_waiter);
/* terminate link group
* @soft: true if link group shutdown can take its time
* false if immediate link group shutdown is required
*/
static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
{
struct smc_connection *conn;
if (lgr->terminating)
return; /* lgr already terminating */
if (!soft)
cancel_delayed_work_sync(&lgr->free_work);
/* kill remaining link group connections */
read_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
while (node) {
read_unlock_bh(&lgr->conns_lock);
conn = rb_entry(node, struct smc_connection, alert_node);
smc = container_of(conn, struct smc_sock, conn);
sock_hold(&smc->sk); /* sock_put below */
smc_conn_kill(conn, soft);
sock_put(&smc->sk); /* sock_hold above */
read_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
}
read_unlock_bh(&lgr->conns_lock);
smc_lgr_cleanup(lgr);
if (soft)
smc_lgr_schedule_free_work_fast(lgr);
else
smc_lgr_free(lgr);
/* unlink link group and schedule termination */
void smc_lgr_terminate_sched(struct smc_link_group *lgr)
spinlock_t *lgr_lock;
smc_lgr_list_head(lgr, &lgr_lock);
spin_lock_bh(lgr_lock);
if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
spin_unlock_bh(lgr_lock);
return; /* lgr already terminating */
}
list_del_init(&lgr->list);
spin_unlock_bh(lgr_lock);
schedule_work(&lgr->terminate_work);
/* Called when peer lgr shutdown (regularly or abnormally) is received */
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
{
struct smc_link_group *lgr, *l;
LIST_HEAD(lgr_free_list);
/* run common cleanup function and build free list */
spin_lock_bh(&dev->lgr_lock);
list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
if ((!peer_gid || lgr->peer_gid == peer_gid) &&
(vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
if (peer_gid) /* peer triggered termination */
lgr->peer_shutdown = 1;
list_move(&lgr->list, &lgr_free_list);
}
}
spin_unlock_bh(&dev->lgr_lock);
/* cancel the regular free workers and actually free lgrs */
list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
list_del_init(&lgr->list);
schedule_work(&lgr->terminate_work);
/* Called when an SMCD device is removed or the smc module is unloaded */
void smc_smcd_terminate_all(struct smcd_dev *smcd)
{
struct smc_link_group *lgr, *lg;
LIST_HEAD(lgr_free_list);
spin_lock_bh(&smcd->lgr_lock);
list_splice_init(&smcd->lgr_list, &lgr_free_list);
list_for_each_entry(lgr, &lgr_free_list, list)
lgr->freeing = 1;
spin_unlock_bh(&smcd->lgr_lock);
list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
list_del_init(&lgr->list);
__smc_lgr_terminate(lgr, false);
}
if (atomic_read(&smcd->lgr_cnt))
wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
/* Called when an SMCR device is removed or the smc module is unloaded.
* If smcibdev is given, all SMCR link groups using this device are terminated.
* If smcibdev is NULL, all SMCR link groups are terminated.
*/
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
{
struct smc_link_group *lgr, *lg;
LIST_HEAD(lgr_free_list);
int i;
spin_lock_bh(&smc_lgr_list.lock);
if (!smcibdev) {
list_splice_init(&smc_lgr_list.list, &lgr_free_list);
list_for_each_entry(lgr, &lgr_free_list, list)
lgr->freeing = 1;
} else {
list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].smcibdev == smcibdev)
smcr_link_down_cond_sched(&lgr->lnk[i]);
}
}
}
spin_unlock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
list_del_init(&lgr->list);
__smc_lgr_terminate(lgr, false);
}
if (smcibdev) {
if (atomic_read(&smcibdev->lnk_cnt))
wait_event(smcibdev->lnks_deleted,
!atomic_read(&smcibdev->lnk_cnt));
} else {
if (atomic_read(&lgr_cnt))
wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
}
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
/* link is up - establish alternate link if applicable */
static void smcr_link_up(struct smc_link_group *lgr,
struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_link *link = NULL;
if (list_empty(&lgr->list) ||
lgr->type == SMC_LGR_SYMMETRIC ||
lgr->type == SMC_LGR_ASYMMETRIC_PEER)
return;
if (lgr->role == SMC_SERV) {
/* trigger local add link processing */
link = smc_llc_usable_link(lgr);
if (!link)
return;
/* tbd: call smc_llc_srv_add_link_local(link); */
} else {
/* invite server to start add link processing */
u8 gid[SMC_GID_SIZE];
if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid,
NULL))
return;
if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
/* some other llc task is ongoing */
wait_event_interruptible_timeout(lgr->llc_waiter,
(lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
SMC_LLC_WAIT_TIME);
}
if (list_empty(&lgr->list) ||
!smc_ib_port_active(smcibdev, ibport))
return; /* lgr or device no longer active */
link = smc_llc_usable_link(lgr);
if (!link)
return;
smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid,
NULL, SMC_LLC_REQ);
}
}
void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_ib_up_work *ib_work;
struct smc_link_group *lgr, *n;
list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
SMC_MAX_PNETID_LEN) ||
lgr->type == SMC_LGR_SYMMETRIC ||
lgr->type == SMC_LGR_ASYMMETRIC_PEER)
continue;
ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL);
if (!ib_work)
continue;
INIT_WORK(&ib_work->work, smc_link_up_work);
ib_work->lgr = lgr;
ib_work->smcibdev = smcibdev;
ib_work->ibport = ibport;
schedule_work(&ib_work->work);
}
}
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* link is down - switch connections to alternate link,
* must be called under lgr->llc_conf_mutex lock
*/
static void smcr_link_down(struct smc_link *lnk)
{
struct smc_link_group *lgr = lnk->lgr;
struct smc_link *to_lnk;
int del_link_id;
if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
return;
smc_ib_modify_qp_reset(lnk);
to_lnk = NULL;
/* tbd: call to_lnk = smc_switch_conns(lgr, lnk, true); */
if (!to_lnk) { /* no backup link available */
smcr_link_clear(lnk);
return;
}
lgr->type = SMC_LGR_SINGLE;
del_link_id = lnk->link_id;
if (lgr->role == SMC_SERV) {
/* trigger local delete link processing */
} else {
if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
/* another llc task is ongoing */
mutex_unlock(&lgr->llc_conf_mutex);
wait_event_interruptible_timeout(lgr->llc_waiter,
(lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
SMC_LLC_WAIT_TIME);
mutex_lock(&lgr->llc_conf_mutex);
}
smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true,
SMC_LLC_DEL_LOST_PATH);
}
}
/* must be called under lgr->llc_conf_mutex lock */
void smcr_link_down_cond(struct smc_link *lnk)
{
if (smc_link_downing(&lnk->state))
smcr_link_down(lnk);
}
/* will get the lgr->llc_conf_mutex lock */
void smcr_link_down_cond_sched(struct smc_link *lnk)
{
if (smc_link_downing(&lnk->state))
schedule_work(&lnk->link_down_wrk);
}
void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_link_group *lgr, *n;
int i;
list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
SMC_MAX_PNETID_LEN))
continue; /* lgr is not affected */
if (list_empty(&lgr->list))