threads.c 8.77 KB
Newer Older
Andy Grover's avatar
Andy Grover committed
1
/*
2
 * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
Andy Grover's avatar
Andy Grover committed
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 */
#include <linux/kernel.h>
#include <linux/random.h>
35
#include <linux/export.h>
Andy Grover's avatar
Andy Grover committed
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64

#include "rds.h"

/*
 * All of connection management is simplified by serializing it through
 * work queues that execute in a connection managing thread.
 *
 * TCP wants to send acks through sendpage() in response to data_ready(),
 * but it needs a process context to do so.
 *
 * The receive paths need to allocate but can't drop packets (!) so we have
 * a thread around to block allocating if the receive fast path sees an
 * allocation failure.
 */

/* Grand Unified Theory of connection life cycle:
 * At any point in time, the connection can be in one of these states:
 * DOWN, CONNECTING, UP, DISCONNECTING, ERROR
 *
 * The following transitions are possible:
 *  ANY		  -> ERROR
 *  UP		  -> DISCONNECTING
 *  ERROR	  -> DISCONNECTING
 *  DISCONNECTING -> DOWN
 *  DOWN	  -> CONNECTING
 *  CONNECTING	  -> UP
 *
 * Transition to state DISCONNECTING/DOWN:
 *  -	Inside the shutdown worker; synchronizes with xmit path
65
 *	through RDS_IN_XMIT, and with connection management callbacks
Andy Grover's avatar
Andy Grover committed
66 67 68 69 70 71
 *	via c_cm_lock.
 *
 *	For receive callbacks, we rely on the underlying transport
 *	(TCP, IB/RDMA) to provide the necessary synchronisation.
 */
struct workqueue_struct *rds_wq;
72
EXPORT_SYMBOL_GPL(rds_wq);
Andy Grover's avatar
Andy Grover committed
73

74
void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
Andy Grover's avatar
Andy Grover committed
75
{
76
	if (!rds_conn_path_transition(cp, curr, RDS_CONN_UP)) {
Andy Grover's avatar
Andy Grover committed
77 78 79
		printk(KERN_WARNING "%s: Cannot transition to state UP, "
				"current state is %d\n",
				__func__,
80
				atomic_read(&cp->cp_state));
81
		rds_conn_path_drop(cp, false);
Andy Grover's avatar
Andy Grover committed
82 83 84
		return;
	}

85 86
	rdsdebug("conn %p for %pI6c to %pI6c complete\n",
		 cp->cp_conn, &cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr);
Andy Grover's avatar
Andy Grover committed
87

88 89
	cp->cp_reconnect_jiffies = 0;
	set_bit(0, &cp->cp_conn->c_map_queued);
90
	rcu_read_lock();
91
	if (!rds_destroy_pending(cp->cp_conn)) {
92 93 94 95
		queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
		queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
	}
	rcu_read_unlock();
Andy Grover's avatar
Andy Grover committed
96
}
97 98 99 100
EXPORT_SYMBOL_GPL(rds_connect_path_complete);

void rds_connect_complete(struct rds_connection *conn)
{
101
	rds_connect_path_complete(&conn->c_path[0], RDS_CONN_CONNECTING);
102
}
103
EXPORT_SYMBOL_GPL(rds_connect_complete);
Andy Grover's avatar
Andy Grover committed
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122

/*
 * This random exponential backoff is relied on to eventually resolve racing
 * connects.
 *
 * If connect attempts race then both parties drop both connections and come
 * here to wait for a random amount of time before trying again.  Eventually
 * the backoff range will be so much greater than the time it takes to
 * establish a connection that one of the pair will establish the connection
 * before the other's random delay fires.
 *
 * Connection attempts that arrive while a connection is already established
 * are also considered to be racing connects.  This lets a connection from
 * a rebooted machine replace an existing stale connection before the transport
 * notices that the connection has failed.
 *
 * We should *always* start with a random backoff; otherwise a broken connection
 * will always take several iterations to be re-established.
 */
123
void rds_queue_reconnect(struct rds_conn_path *cp)
Andy Grover's avatar
Andy Grover committed
124 125
{
	unsigned long rand;
126
	struct rds_connection *conn = cp->cp_conn;
Andy Grover's avatar
Andy Grover committed
127

128 129 130
	rdsdebug("conn %p for %pI6c to %pI6c reconnect jiffies %lu\n",
		 conn, &conn->c_laddr, &conn->c_faddr,
		 cp->cp_reconnect_jiffies);
Andy Grover's avatar
Andy Grover committed
131

132 133
	/* let peer with smaller addr initiate reconnect, to avoid duels */
	if (conn->c_trans->t_type == RDS_TRANS_TCP &&
134
	    rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) >= 0)
135 136
		return;

137 138 139
	set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
	if (cp->cp_reconnect_jiffies == 0) {
		cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
140
		rcu_read_lock();
141
		if (!rds_destroy_pending(cp->cp_conn))
142 143
			queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
		rcu_read_unlock();
Andy Grover's avatar
Andy Grover committed
144 145 146 147
		return;
	}

	get_random_bytes(&rand, sizeof(rand));
148
	rdsdebug("%lu delay %lu ceil conn %p for %pI6c -> %pI6c\n",
149
		 rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
Andy Grover's avatar
Andy Grover committed
150
		 conn, &conn->c_laddr, &conn->c_faddr);
151
	rcu_read_lock();
152
	if (!rds_destroy_pending(cp->cp_conn))
153 154 155
		queue_delayed_work(rds_wq, &cp->cp_conn_w,
				   rand % cp->cp_reconnect_jiffies);
	rcu_read_unlock();
Andy Grover's avatar
Andy Grover committed
156

157
	cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
Andy Grover's avatar
Andy Grover committed
158 159 160 161 162
					rds_sysctl_reconnect_max_jiffies);
}

void rds_connect_worker(struct work_struct *work)
{
163 164 165 166
	struct rds_conn_path *cp = container_of(work,
						struct rds_conn_path,
						cp_conn_w.work);
	struct rds_connection *conn = cp->cp_conn;
Andy Grover's avatar
Andy Grover committed
167 168
	int ret;

169
	if (cp->cp_index > 0 &&
170
	    rds_addr_cmp(&cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr) >= 0)
171
		return;
172
	clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
173 174 175
	ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
	if (ret) {
		ret = conn->c_trans->conn_path_connect(cp);
176 177
		rdsdebug("conn %p for %pI6c to %pI6c dispatched, ret %d\n",
			 conn, &conn->c_laddr, &conn->c_faddr, ret);
Andy Grover's avatar
Andy Grover committed
178 179

		if (ret) {
180 181 182 183
			if (rds_conn_path_transition(cp,
						     RDS_CONN_CONNECTING,
						     RDS_CONN_DOWN))
				rds_queue_reconnect(cp);
Andy Grover's avatar
Andy Grover committed
184
			else
185
				rds_conn_path_error(cp, "connect failed\n");
Andy Grover's avatar
Andy Grover committed
186 187 188 189 190 191
		}
	}
}

void rds_send_worker(struct work_struct *work)
{
192 193 194
	struct rds_conn_path *cp = container_of(work,
						struct rds_conn_path,
						cp_send_w.work);
Andy Grover's avatar
Andy Grover committed
195 196
	int ret;

197 198
	if (rds_conn_path_state(cp) == RDS_CONN_UP) {
		clear_bit(RDS_LL_SEND_FULL, &cp->cp_flags);
199
		ret = rds_send_xmit(cp);
200
		cond_resched();
201
		rdsdebug("conn %p ret %d\n", cp->cp_conn, ret);
Andy Grover's avatar
Andy Grover committed
202 203 204
		switch (ret) {
		case -EAGAIN:
			rds_stats_inc(s_send_immediate_retry);
205
			queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
Andy Grover's avatar
Andy Grover committed
206 207 208
			break;
		case -ENOMEM:
			rds_stats_inc(s_send_delayed_retry);
209
			queue_delayed_work(rds_wq, &cp->cp_send_w, 2);
Andy Grover's avatar
Andy Grover committed
210 211 212 213 214 215 216 217
		default:
			break;
		}
	}
}

void rds_recv_worker(struct work_struct *work)
{
218 219 220
	struct rds_conn_path *cp = container_of(work,
						struct rds_conn_path,
						cp_recv_w.work);
Andy Grover's avatar
Andy Grover committed
221 222
	int ret;

223
	if (rds_conn_path_state(cp) == RDS_CONN_UP) {
224
		ret = cp->cp_conn->c_trans->recv_path(cp);
225
		rdsdebug("conn %p ret %d\n", cp->cp_conn, ret);
Andy Grover's avatar
Andy Grover committed
226 227 228
		switch (ret) {
		case -EAGAIN:
			rds_stats_inc(s_recv_immediate_retry);
229
			queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
Andy Grover's avatar
Andy Grover committed
230 231 232
			break;
		case -ENOMEM:
			rds_stats_inc(s_recv_delayed_retry);
233
			queue_delayed_work(rds_wq, &cp->cp_recv_w, 2);
Andy Grover's avatar
Andy Grover committed
234 235 236 237 238 239
		default:
			break;
		}
	}
}

240 241
void rds_shutdown_worker(struct work_struct *work)
{
242 243 244
	struct rds_conn_path *cp = container_of(work,
						struct rds_conn_path,
						cp_down_w);
245

246
	rds_conn_shutdown(cp);
247 248
}

Andy Grover's avatar
Andy Grover committed
249 250 251 252 253
void rds_threads_exit(void)
{
	destroy_workqueue(rds_wq);
}

254
int rds_threads_init(void)
Andy Grover's avatar
Andy Grover committed
255
{
256
	rds_wq = create_singlethread_workqueue("krdsd");
257
	if (!rds_wq)
Andy Grover's avatar
Andy Grover committed
258 259 260 261
		return -ENOMEM;

	return 0;
}
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308

/* Compare two IPv6 addresses.  Return 0 if the two addresses are equal.
 * Return 1 if the first is greater.  Return -1 if the second is greater.
 */
int rds_addr_cmp(const struct in6_addr *addr1,
		 const struct in6_addr *addr2)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
	const __be64 *a1, *a2;
	u64 x, y;

	a1 = (__be64 *)addr1;
	a2 = (__be64 *)addr2;

	if (*a1 != *a2) {
		if (be64_to_cpu(*a1) < be64_to_cpu(*a2))
			return -1;
		else
			return 1;
	} else {
		x = be64_to_cpu(*++a1);
		y = be64_to_cpu(*++a2);
		if (x < y)
			return -1;
		else if (x > y)
			return 1;
		else
			return 0;
	}
#else
	u32 a, b;
	int i;

	for (i = 0; i < 4; i++) {
		if (addr1->s6_addr32[i] != addr2->s6_addr32[i]) {
			a = ntohl(addr1->s6_addr32[i]);
			b = ntohl(addr2->s6_addr32[i]);
			if (a < b)
				return -1;
			else if (a > b)
				return 1;
		}
	}
	return 0;
#endif
}
EXPORT_SYMBOL_GPL(rds_addr_cmp);