netdevice.h 136 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7 8 9
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Definitions for the Interfaces handler.
 *
 * Version:	@(#)dev.h	1.0.10	08/12/93
 *
10
 * Authors:	Ross Biro
Linus Torvalds's avatar
Linus Torvalds committed
11 12 13
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *		Corey Minyard <wf-rch!minyard@relay.EU.net>
 *		Donald J. Becker, <becker@cesdis.gsfc.nasa.gov>
14
 *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds's avatar
Linus Torvalds committed
15 16 17 18 19 20 21 22 23 24 25 26 27
 *		Bjorn Ekwall. <bj0rn@blox.se>
 *              Pekka Riikonen <priikone@poseidon.pspt.fi>
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 *		Moved to /usr/include/linux for NET3
 */
#ifndef _LINUX_NETDEVICE_H
#define _LINUX_NETDEVICE_H

Al Viro's avatar
Al Viro committed
28
#include <linux/timer.h>
29
#include <linux/bug.h>
30
#include <linux/delay.h>
Arun Sharma's avatar
Arun Sharma committed
31
#include <linux/atomic.h>
32
#include <linux/prefetch.h>
Linus Torvalds's avatar
Linus Torvalds committed
33 34 35 36
#include <asm/cache.h>
#include <asm/byteorder.h>

#include <linux/percpu.h>
37
#include <linux/rculist.h>
38
#include <linux/dmaengine.h>
39
#include <linux/workqueue.h>
Tom Herbert's avatar
Tom Herbert committed
40
#include <linux/dynamic_queue_limits.h>
Linus Torvalds's avatar
Linus Torvalds committed
41

42
#include <linux/ethtool.h>
43
#include <net/net_namespace.h>
Jeff Kirsher's avatar
Jeff Kirsher committed
44
#ifdef CONFIG_DCB
45 46
#include <net/dcbnl.h>
#endif
47
#include <net/netprio_cgroup.h>
48

49
#include <linux/netdev_features.h>
50
#include <linux/neighbour.h>
51
#include <uapi/linux/netdevice.h>
52
#include <uapi/linux/if_bonding.h>
53
#include <uapi/linux/pkt_cls.h>
54
#include <linux/hashtable.h>
55

56
struct netpoll_info;
57
struct device;
58
struct phy_device;
59 60
struct dsa_switch_tree;

61 62
/* 802.11 specific */
struct wireless_dev;
63 64
/* 802.15.4 specific */
struct wpan_dev;
65
struct mpls_dev;
66 67
/* UDP Tunnel offloads */
struct udp_tunnel_info;
68
struct bpf_prog;
Linus Torvalds's avatar
Linus Torvalds committed
69

70 71
void netdev_set_default_ethtool_ops(struct net_device *dev,
				    const struct ethtool_ops *ops);
72

73 74 75 76
/* Backlog congestion levels */
#define NET_RX_SUCCESS		0	/* keep 'em coming, baby */
#define NET_RX_DROP		1	/* packet dropped */

77 78 79 80 81 82 83 84 85 86 87 88
/*
 * Transmit return codes: transmit return codes originate from three different
 * namespaces:
 *
 * - qdisc return codes
 * - driver transmit return codes
 * - errno values
 *
 * Drivers are allowed to return any one of those in their hard_start_xmit()
 * function. Real network devices commonly used with qdiscs should only return
 * the driver transmit return codes though - when qdiscs are used, the actual
 * transmission happens asynchronously, so the value is not propagated to
89 90
 * higher layers. Virtual network devices transmit synchronously; in this case
 * the driver transmit return codes are consumed by dev_queue_xmit(), and all
91 92 93 94 95
 * others are propagated to higher layers.
 */

/* qdisc ->enqueue() return codes. */
#define NET_XMIT_SUCCESS	0x00
96 97 98
#define NET_XMIT_DROP		0x01	/* skb dropped			*/
#define NET_XMIT_CN		0x02	/* congestion notification	*/
#define NET_XMIT_MASK		0x0f	/* qdisc flags in net/sch_generic.h */
Linus Torvalds's avatar
Linus Torvalds committed
99

100 101 102
/* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It
 * indicates that the device will soon be dropping packets, or already drops
 * some packets of the same priority; prompting us to send less aggressively. */
103
#define net_xmit_eval(e)	((e) == NET_XMIT_CN ? 0 : (e))
Linus Torvalds's avatar
Linus Torvalds committed
104 105
#define net_xmit_errno(e)	((e) != NET_XMIT_CN ? -ENOBUFS : 0)

106
/* Driver transmit return codes */
107
#define NETDEV_TX_MASK		0xf0
108

109
enum netdev_tx {
110
	__NETDEV_TX_MIN	 = INT_MIN,	/* make sure enum is signed */
111 112
	NETDEV_TX_OK	 = 0x00,	/* driver took care of packet */
	NETDEV_TX_BUSY	 = 0x10,	/* driver tx path was busy*/
113 114 115
};
typedef enum netdev_tx netdev_tx_t;

116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
/*
 * Current order: NETDEV_TX_MASK > NET_XMIT_MASK >= 0 is significant;
 * hard_start_xmit() return < NET_XMIT_MASK means skb was consumed.
 */
static inline bool dev_xmit_complete(int rc)
{
	/*
	 * Positive cases with an skb consumed by a driver:
	 * - successful transmission (rc == NETDEV_TX_OK)
	 * - error while transmitting (rc < 0)
	 * - error while queueing to a different device (rc & NET_XMIT_MASK)
	 */
	if (likely(rc < NET_XMIT_MASK))
		return true;

	return false;
}

Linus Torvalds's avatar
Linus Torvalds committed
134
/*
135
 *	Compute the worst-case header length according to the protocols
Linus Torvalds's avatar
Linus Torvalds committed
136 137
 *	used.
 */
138

139 140 141
#if defined(CONFIG_HYPERV_NET)
# define LL_MAX_HEADER 128
#elif defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25)
142 143 144 145 146
# if defined(CONFIG_MAC80211_MESH)
#  define LL_MAX_HEADER 128
# else
#  define LL_MAX_HEADER 96
# endif
Linus Torvalds's avatar
Linus Torvalds committed
147
#else
148
# define LL_MAX_HEADER 32
Linus Torvalds's avatar
Linus Torvalds committed
149 150
#endif

151 152
#if !IS_ENABLED(CONFIG_NET_IPIP) && !IS_ENABLED(CONFIG_NET_IPGRE) && \
    !IS_ENABLED(CONFIG_IPV6_SIT) && !IS_ENABLED(CONFIG_IPV6_TUNNEL)
Linus Torvalds's avatar
Linus Torvalds committed
153 154 155 156 157 158
#define MAX_HEADER LL_MAX_HEADER
#else
#define MAX_HEADER (LL_MAX_HEADER + 48)
#endif

/*
159 160
 *	Old network device statistics. Fields are native words
 *	(unsigned long) so they can be read and written atomically.
Linus Torvalds's avatar
Linus Torvalds committed
161
 */
162

Eric Dumazet's avatar
Eric Dumazet committed
163
struct net_device_stats {
164 165 166 167 168 169 170 171 172
	unsigned long	rx_packets;
	unsigned long	tx_packets;
	unsigned long	rx_bytes;
	unsigned long	tx_bytes;
	unsigned long	rx_errors;
	unsigned long	tx_errors;
	unsigned long	rx_dropped;
	unsigned long	tx_dropped;
	unsigned long	multicast;
Linus Torvalds's avatar
Linus Torvalds committed
173 174
	unsigned long	collisions;
	unsigned long	rx_length_errors;
175 176 177 178 179
	unsigned long	rx_over_errors;
	unsigned long	rx_crc_errors;
	unsigned long	rx_frame_errors;
	unsigned long	rx_fifo_errors;
	unsigned long	rx_missed_errors;
Linus Torvalds's avatar
Linus Torvalds committed
180 181 182 183 184 185 186 187 188 189 190 191 192
	unsigned long	tx_aborted_errors;
	unsigned long	tx_carrier_errors;
	unsigned long	tx_fifo_errors;
	unsigned long	tx_heartbeat_errors;
	unsigned long	tx_window_errors;
	unsigned long	rx_compressed;
	unsigned long	tx_compressed;
};


#include <linux/cache.h>
#include <linux/skbuff.h>

193
#ifdef CONFIG_RPS
194 195
#include <linux/static_key.h>
extern struct static_key rps_needed;
Eric Dumazet's avatar
Eric Dumazet committed
196
extern struct static_key rfs_needed;
197 198
#endif

Linus Torvalds's avatar
Linus Torvalds committed
199 200 201 202
struct neighbour;
struct neigh_parms;
struct sk_buff;

203 204 205 206
struct netdev_hw_addr {
	struct list_head	list;
	unsigned char		addr[MAX_ADDR_LEN];
	unsigned char		type;
Jiri Pirko's avatar
Jiri Pirko committed
207 208 209 210
#define NETDEV_HW_ADDR_T_LAN		1
#define NETDEV_HW_ADDR_T_SAN		2
#define NETDEV_HW_ADDR_T_SLAVE		3
#define NETDEV_HW_ADDR_T_UNICAST	4
211 212
#define NETDEV_HW_ADDR_T_MULTICAST	5
	bool			global_use;
213
	int			sync_cnt;
214
	int			refcount;
215
	int			synced;
216 217 218
	struct rcu_head		rcu_head;
};

219 220 221 222 223
struct netdev_hw_addr_list {
	struct list_head	list;
	int			count;
};

224 225 226 227
#define netdev_hw_addr_list_count(l) ((l)->count)
#define netdev_hw_addr_list_empty(l) (netdev_hw_addr_list_count(l) == 0)
#define netdev_hw_addr_list_for_each(ha, l) \
	list_for_each_entry(ha, &(l)->list, list)
228

229 230 231 232
#define netdev_uc_count(dev) netdev_hw_addr_list_count(&(dev)->uc)
#define netdev_uc_empty(dev) netdev_hw_addr_list_empty(&(dev)->uc)
#define netdev_for_each_uc_addr(ha, dev) \
	netdev_hw_addr_list_for_each(ha, &(dev)->uc)
233

234 235
#define netdev_mc_count(dev) netdev_hw_addr_list_count(&(dev)->mc)
#define netdev_mc_empty(dev) netdev_hw_addr_list_empty(&(dev)->mc)
236
#define netdev_for_each_mc_addr(ha, dev) \
237
	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
238

Eric Dumazet's avatar
Eric Dumazet committed
239
struct hh_cache {
240
	unsigned int	hh_len;
241
	seqlock_t	hh_lock;
Linus Torvalds's avatar
Linus Torvalds committed
242 243 244 245

	/* cached hardware header; allow for machine alignment needs.        */
#define HH_DATA_MOD	16
#define HH_DATA_OFF(__len) \
Jiri Benc's avatar
Jiri Benc committed
246
	(HH_DATA_MOD - (((__len - 1) & (HH_DATA_MOD - 1)) + 1))
Linus Torvalds's avatar
Linus Torvalds committed
247 248 249 250 251
#define HH_DATA_ALIGN(__len) \
	(((__len)+(HH_DATA_MOD-1))&~(HH_DATA_MOD - 1))
	unsigned long	hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)];
};

252
/* Reserve HH_DATA_MOD byte-aligned hard_header_len, but at least that much.
Linus Torvalds's avatar
Linus Torvalds committed
253 254 255 256 257 258 259 260
 * Alternative is:
 *   dev->hard_header_len ? (dev->hard_header_len +
 *                           (HH_DATA_MOD - 1)) & ~(HH_DATA_MOD - 1) : 0
 *
 * We could use other alignment values, but we must maintain the
 * relationship HH alignment <= LL alignment.
 */
#define LL_RESERVED_SPACE(dev) \
261
	((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
Linus Torvalds's avatar
Linus Torvalds committed
262
#define LL_RESERVED_SPACE_EXTRA(dev,extra) \
263
	((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
Linus Torvalds's avatar
Linus Torvalds committed
264

265 266 267
struct header_ops {
	int	(*create) (struct sk_buff *skb, struct net_device *dev,
			   unsigned short type, const void *daddr,
268
			   const void *saddr, unsigned int len);
269
	int	(*parse)(const struct sk_buff *skb, unsigned char *haddr);
270
	int	(*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type);
271 272 273
	void	(*cache_update)(struct hh_cache *hh,
				const struct net_device *dev,
				const unsigned char *haddr);
274
	bool	(*validate)(const char *ll_header, unsigned int len);
275 276
};

Linus Torvalds's avatar
Linus Torvalds committed
277
/* These flag bits are private to the generic network queueing
278
 * layer; they may not be explicitly referenced by any other
Linus Torvalds's avatar
Linus Torvalds committed
279 280 281
 * code.
 */

Eric Dumazet's avatar
Eric Dumazet committed
282
enum netdev_state_t {
Linus Torvalds's avatar
Linus Torvalds committed
283 284 285
	__LINK_STATE_START,
	__LINK_STATE_PRESENT,
	__LINK_STATE_NOCARRIER,
286 287
	__LINK_STATE_LINKWATCH_PENDING,
	__LINK_STATE_DORMANT,
Linus Torvalds's avatar
Linus Torvalds committed
288 289 290 291
};


/*
292
 * This structure holds boot-time configured netdevice settings. They
293
 * are then used in the device probing.
Linus Torvalds's avatar
Linus Torvalds committed
294 295 296 297 298 299 300
 */
struct netdev_boot_setup {
	char name[IFNAMSIZ];
	struct ifmap map;
};
#define NETDEV_BOOT_SETUP_MAX 8

301
int __init netdev_boot_setup(char *str);
Linus Torvalds's avatar
Linus Torvalds committed
302

303 304 305 306 307 308 309
/*
 * Structure for NAPI scheduling similar to tasklet but with weighting
 */
struct napi_struct {
	/* The poll_list must only be managed by the entity which
	 * changes the state of the NAPI_STATE_SCHED bit.  This means
	 * whoever atomically sets that bit can add this napi_struct
310
	 * to the per-CPU poll_list, and whoever clears that bit
311 312 313 314 315 316
	 * can remove from the list right before clearing the bit.
	 */
	struct list_head	poll_list;

	unsigned long		state;
	int			weight;
317
	unsigned int		gro_count;
318 319 320 321
	int			(*poll)(struct napi_struct *, int);
#ifdef CONFIG_NETPOLL
	int			poll_owner;
#endif
Herbert Xu's avatar
Herbert Xu committed
322
	struct net_device	*dev;
323
	struct sk_buff		*gro_list;
Herbert Xu's avatar
Herbert Xu committed
324
	struct sk_buff		*skb;
325
	struct hrtimer		timer;
326
	struct list_head	dev_list;
Eliezer Tamir's avatar
Eliezer Tamir committed
327 328
	struct hlist_node	napi_hash_node;
	unsigned int		napi_id;
329 330
};

Eric Dumazet's avatar
Eric Dumazet committed
331
enum {
332
	NAPI_STATE_SCHED,	/* Poll is scheduled */
Eric Dumazet's avatar
Eric Dumazet committed
333
	NAPI_STATE_MISSED,	/* reschedule a napi */
334
	NAPI_STATE_DISABLE,	/* Disable pending */
335
	NAPI_STATE_NPSVC,	/* Netpoll - don't dequeue from poll_list */
Eric Dumazet's avatar
Eric Dumazet committed
336 337
	NAPI_STATE_HASHED,	/* In NAPI hash (busy polling possible) */
	NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
338 339 340 341
	NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
};

enum {
Eric Dumazet's avatar
Eric Dumazet committed
342 343 344 345 346 347 348
	NAPIF_STATE_SCHED	 = BIT(NAPI_STATE_SCHED),
	NAPIF_STATE_MISSED	 = BIT(NAPI_STATE_MISSED),
	NAPIF_STATE_DISABLE	 = BIT(NAPI_STATE_DISABLE),
	NAPIF_STATE_NPSVC	 = BIT(NAPI_STATE_NPSVC),
	NAPIF_STATE_HASHED	 = BIT(NAPI_STATE_HASHED),
	NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
	NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
349 350
};

351
enum gro_result {
352 353 354 355 356
	GRO_MERGED,
	GRO_MERGED_FREE,
	GRO_HELD,
	GRO_NORMAL,
	GRO_DROP,
357
	GRO_CONSUMED,
358
};
359
typedef enum gro_result gro_result_t;
360

361 362 363 364 365 366 367
/*
 * enum rx_handler_result - Possible return values for rx_handlers.
 * @RX_HANDLER_CONSUMED: skb was consumed by rx_handler, do not process it
 * further.
 * @RX_HANDLER_ANOTHER: Do another round in receive path. This is indicated in
 * case skb->dev was changed by rx_handler.
 * @RX_HANDLER_EXACT: Force exact delivery, no wildcard.
368
 * @RX_HANDLER_PASS: Do nothing, pass the skb as if no rx_handler was called.
369 370 371 372 373 374 375 376 377 378 379 380 381 382
 *
 * rx_handlers are functions called from inside __netif_receive_skb(), to do
 * special processing of the skb, prior to delivery to protocol handlers.
 *
 * Currently, a net_device can only have a single rx_handler registered. Trying
 * to register a second rx_handler will return -EBUSY.
 *
 * To register a rx_handler on a net_device, use netdev_rx_handler_register().
 * To unregister a rx_handler on a net_device, use
 * netdev_rx_handler_unregister().
 *
 * Upon return, rx_handler is expected to tell __netif_receive_skb() what to
 * do with the skb.
 *
383
 * If the rx_handler consumed the skb in some way, it should return
384
 * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for
385
 * the skb to be delivered in some other way.
386 387 388 389 390
 *
 * If the rx_handler changed skb->dev, to divert the skb to another
 * net_device, it should return RX_HANDLER_ANOTHER. The rx_handler for the
 * new device will be called if it exists.
 *
391
 * If the rx_handler decides the skb should be ignored, it should return
392
 * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that
393
 * are registered on exact device (ptype->dev == skb->dev).
394
 *
395
 * If the rx_handler didn't change skb->dev, but wants the skb to be normally
396 397 398 399 400 401 402 403 404 405 406 407 408 409
 * delivered, it should return RX_HANDLER_PASS.
 *
 * A device without a registered rx_handler will behave as if rx_handler
 * returned RX_HANDLER_PASS.
 */

enum rx_handler_result {
	RX_HANDLER_CONSUMED,
	RX_HANDLER_ANOTHER,
	RX_HANDLER_EXACT,
	RX_HANDLER_PASS,
};
typedef enum rx_handler_result rx_handler_result_t;
typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);
410

411
void __napi_schedule(struct napi_struct *n);
412
void __napi_schedule_irqoff(struct napi_struct *n);
413

414
static inline bool napi_disable_pending(struct napi_struct *n)
415 416 417 418
{
	return test_bit(NAPI_STATE_DISABLE, &n->state);
}

Eric Dumazet's avatar
Eric Dumazet committed
419
bool napi_schedule_prep(struct napi_struct *n);
420 421 422

/**
 *	napi_schedule - schedule NAPI poll
423
 *	@n: NAPI context
424 425 426 427 428 429 430 431 432 433
 *
 * Schedule NAPI poll routine to be called if it is not already
 * running.
 */
static inline void napi_schedule(struct napi_struct *n)
{
	if (napi_schedule_prep(n))
		__napi_schedule(n);
}

434 435
/**
 *	napi_schedule_irqoff - schedule NAPI poll
436
 *	@n: NAPI context
437 438 439 440 441 442 443 444 445
 *
 * Variant of napi_schedule(), assuming hard irqs are masked.
 */
static inline void napi_schedule_irqoff(struct napi_struct *n)
{
	if (napi_schedule_prep(n))
		__napi_schedule_irqoff(n);
}

446
/* Try to reschedule poll. Called by dev->poll() after napi_complete().  */
447
static inline bool napi_reschedule(struct napi_struct *napi)
448 449 450
{
	if (napi_schedule_prep(napi)) {
		__napi_schedule(napi);
451
		return true;
452
	}
453
	return false;
454 455
}

456
bool napi_complete_done(struct napi_struct *n, int work_done);
457 458
/**
 *	napi_complete - NAPI processing complete
459
 *	@n: NAPI context
460 461
 *
 * Mark NAPI processing as complete.
462
 * Consider using napi_complete_done() instead.
463
 * Return false if device should avoid rearming interrupts.
464
 */
465
static inline bool napi_complete(struct napi_struct *n)
466 467 468
{
	return napi_complete_done(n, 0);
}
469

Eliezer Tamir's avatar
Eliezer Tamir committed
470 471
/**
 *	napi_hash_del - remove a NAPI from global table
472
 *	@napi: NAPI context
Eliezer Tamir's avatar
Eliezer Tamir committed
473
 *
474
 * Warning: caller must observe RCU grace period
475 476
 * before freeing memory containing @napi, if
 * this function returns true.
477
 * Note: core networking stack automatically calls it
478
 * from netif_napi_del().
479
 * Drivers might want to call this helper to combine all
480
 * the needed RCU grace periods into a single one.
Eliezer Tamir's avatar
Eliezer Tamir committed
481
 */
482
bool napi_hash_del(struct napi_struct *napi);
Eliezer Tamir's avatar
Eliezer Tamir committed
483

484 485
/**
 *	napi_disable - prevent NAPI from scheduling
486
 *	@n: NAPI context
487 488 489 490
 *
 * Stop NAPI from being scheduled on this context.
 * Waits till any outstanding processing completes.
 */
491
void napi_disable(struct napi_struct *n);
492 493 494

/**
 *	napi_enable - enable NAPI scheduling
495
 *	@n: NAPI context
496 497 498 499 500 501 502
 *
 * Resume NAPI from being scheduled on this context.
 * Must be paired with napi_disable.
 */
static inline void napi_enable(struct napi_struct *n)
{
	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
503
	smp_mb__before_atomic();
504
	clear_bit(NAPI_STATE_SCHED, &n->state);
505
	clear_bit(NAPI_STATE_NPSVC, &n->state);
506 507
}

508 509
/**
 *	napi_synchronize - wait until NAPI is not running
510
 *	@n: NAPI context
511 512 513 514 515 516 517
 *
 * Wait until NAPI is done being scheduled on this context.
 * Waits till any outstanding processing completes but
 * does not disable future activations.
 */
static inline void napi_synchronize(const struct napi_struct *n)
{
518 519 520 521 522
	if (IS_ENABLED(CONFIG_SMP))
		while (test_bit(NAPI_STATE_SCHED, &n->state))
			msleep(1);
	else
		barrier();
523 524
}

Eric Dumazet's avatar
Eric Dumazet committed
525
enum netdev_queue_state_t {
526 527
	__QUEUE_STATE_DRV_XOFF,
	__QUEUE_STATE_STACK_XOFF,
528
	__QUEUE_STATE_FROZEN,
529
};
530 531 532 533 534 535 536 537 538 539 540

#define QUEUE_STATE_DRV_XOFF	(1 << __QUEUE_STATE_DRV_XOFF)
#define QUEUE_STATE_STACK_XOFF	(1 << __QUEUE_STATE_STACK_XOFF)
#define QUEUE_STATE_FROZEN	(1 << __QUEUE_STATE_FROZEN)

#define QUEUE_STATE_ANY_XOFF	(QUEUE_STATE_DRV_XOFF | QUEUE_STATE_STACK_XOFF)
#define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF | \
					QUEUE_STATE_FROZEN)
#define QUEUE_STATE_DRV_XOFF_OR_FROZEN (QUEUE_STATE_DRV_XOFF | \
					QUEUE_STATE_FROZEN)

541 542 543 544 545 546 547 548 549
/*
 * __QUEUE_STATE_DRV_XOFF is used by drivers to stop the transmit queue.  The
 * netif_tx_* functions below are used to manipulate this flag.  The
 * __QUEUE_STATE_STACK_XOFF flag is used by the stack to stop the transmit
 * queue independently.  The netif_xmit_*stopped functions below are called
 * to check if the queue has been stopped by the driver or stack (either
 * of the XOFF bits are set in the state).  Drivers should not need to call
 * netif_xmit*stopped functions, they should only be using netif_tx_*.
 */
550

551
struct netdev_queue {
552
/*
553
 * read-mostly part
554
 */
555
	struct net_device	*dev;
556
	struct Qdisc __rcu	*qdisc;
557
	struct Qdisc		*qdisc_sleeping;
558
#ifdef CONFIG_SYSFS
Tom Herbert's avatar
Tom Herbert committed
559 560
	struct kobject		kobj;
#endif
561 562 563
#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
	int			numa_node;
#endif
564 565 566 567 568 569
	unsigned long		tx_maxrate;
	/*
	 * Number of TX timeouts for this queue
	 * (/sys/class/net/DEV/Q/trans_timeout)
	 */
	unsigned long		trans_timeout;
570
/*
571
 * write-mostly part
572 573 574
 */
	spinlock_t		_xmit_lock ____cacheline_aligned_in_smp;
	int			xmit_lock_owner;
575
	/*
576
	 * Time (in jiffies) of last Tx
577 578
	 */
	unsigned long		trans_start;
579

Tom Herbert's avatar
Tom Herbert committed
580 581 582 583 584
	unsigned long		state;

#ifdef CONFIG_BQL
	struct dql		dql;
#endif
585
} ____cacheline_aligned_in_smp;
586

587 588 589 590 591
static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
{
#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
	return q->numa_node;
#else
592
	return NUMA_NO_NODE;
593 594 595 596 597 598 599 600 601 602
#endif
}

static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node)
{
#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
	q->numa_node = node;
#endif
}

Eric Dumazet's avatar
Eric Dumazet committed
603
#ifdef CONFIG_RPS
Tom Herbert's avatar
Tom Herbert committed
604 605 606 607 608 609 610 611 612
/*
 * This structure holds an RPS map which can be of variable length.  The
 * map is an array of CPUs.
 */
struct rps_map {
	unsigned int len;
	struct rcu_head rcu;
	u16 cpus[0];
};
613
#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
Tom Herbert's avatar
Tom Herbert committed
614

Tom Herbert's avatar
Tom Herbert committed
615
/*
616 617 618
 * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
 * tail pointer for that CPU's input queue at the time of last enqueue, and
 * a hardware filter index.
Tom Herbert's avatar
Tom Herbert committed
619 620 621
 */
struct rps_dev_flow {
	u16 cpu;
622
	u16 filter;
Tom Herbert's avatar
Tom Herbert committed
623 624
	unsigned int last_qtail;
};
625
#define RPS_NO_FILTER 0xffff
Tom Herbert's avatar
Tom Herbert committed
626 627 628 629 630 631 632 633 634 635

/*
 * The rps_dev_flow_table structure contains a table of flow mappings.
 */
struct rps_dev_flow_table {
	unsigned int mask;
	struct rcu_head rcu;
	struct rps_dev_flow flows[0];
};
#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
636
    ((_num) * sizeof(struct rps_dev_flow)))
Tom Herbert's avatar
Tom Herbert committed
637 638 639 640

/*
 * The rps_sock_flow_table contains mappings of flows to the last CPU
 * on which they were processed by the application (set in recvmsg).
641 642
 * Each entry is a 32bit value. Upper part is the high-order bits
 * of flow hash, lower part is CPU number.
643
 * rps_cpu_mask is used to partition the space, depending on number of
644 645
 * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
 * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
646
 * meaning we use 32-6=26 bits for the hash.
Tom Herbert's avatar
Tom Herbert committed
647 648
 */
struct rps_sock_flow_table {
649
	u32	mask;
650 651

	u32	ents[0] ____cacheline_aligned_in_smp;
Tom Herbert's avatar
Tom Herbert committed
652
};
653
#define	RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
Tom Herbert's avatar
Tom Herbert committed
654 655 656

#define RPS_NO_CPU 0xffff

657 658 659
extern u32 rps_cpu_mask;
extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;

Tom Herbert's avatar
Tom Herbert committed
660 661 662 663
static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
					u32 hash)
{
	if (table && hash) {
664 665
		unsigned int index = hash & table->mask;
		u32 val = hash & ~rps_cpu_mask;
Tom Herbert's avatar
Tom Herbert committed
666

667
		/* We only give a hint, preemption can change CPU under us */
668
		val |= raw_smp_processor_id();
Tom Herbert's avatar
Tom Herbert committed
669

670 671
		if (table->ents[index] != val)
			table->ents[index] = val;
Tom Herbert's avatar
Tom Herbert committed
672 673 674
	}
}

675
#ifdef CONFIG_RFS_ACCEL
676 677
bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
			 u16 filter_id);
678
#endif
679
#endif /* CONFIG_RPS */
680

Tom Herbert's avatar
Tom Herbert committed
681 682
/* This structure contains an instance of an RX queue. */
struct netdev_rx_queue {
683
#ifdef CONFIG_RPS
Eric Dumazet's avatar
Eric Dumazet committed
684 685
	struct rps_map __rcu		*rps_map;
	struct rps_dev_flow_table __rcu	*rps_flow_table;
686
#endif
Eric Dumazet's avatar
Eric Dumazet committed
687
	struct kobject			kobj;
688
	struct net_device		*dev;
Tom Herbert's avatar
Tom Herbert committed
689
} ____cacheline_aligned_in_smp;
690 691 692 693 694 695 696 697 698 699 700

/*
 * RX queue sysfs structures and functions.
 */
struct rx_queue_attribute {
	struct attribute attr;
	ssize_t (*show)(struct netdev_rx_queue *queue,
	    struct rx_queue_attribute *attr, char *buf);
	ssize_t (*store)(struct netdev_rx_queue *queue,
	    struct rx_queue_attribute *attr, const char *buf, size_t len);
};
701

Tom Herbert's avatar
Tom Herbert committed
702 703 704 705 706 707 708 709 710 711 712
#ifdef CONFIG_XPS
/*
 * This structure holds an XPS map which can be of variable length.  The
 * map is an array of queues.
 */
struct xps_map {
	unsigned int len;
	unsigned int alloc_len;
	struct rcu_head rcu;
	u16 queues[0];
};
713
#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16)))
714 715
#define XPS_MIN_MAP_ALLOC ((L1_CACHE_ALIGN(offsetof(struct xps_map, queues[1])) \
       - sizeof(struct xps_map)) / sizeof(u16))
Tom Herbert's avatar
Tom Herbert committed
716 717 718 719 720 721

/*
 * This structure holds all XPS maps for device.  Maps are indexed by CPU.
 */
struct xps_dev_maps {
	struct rcu_head rcu;
Eric Dumazet's avatar
Eric Dumazet committed
722
	struct xps_map __rcu *cpu_map[0];
Tom Herbert's avatar
Tom Herbert committed
723
};
724 725
#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +		\
	(nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
Tom Herbert's avatar
Tom Herbert committed
726 727
#endif /* CONFIG_XPS */

728 729 730 731 732 733 734 735
#define TC_MAX_QUEUE	16
#define TC_BITMASK	15
/* HW offloaded queuing disciplines txq count and offset maps */
struct netdev_tc_txq {
	u16 count;
	u16 offset;
};

736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752
#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
/*
 * This structure is to hold information about the device
 * configured to run FCoE protocol stack.
 */
struct netdev_fcoe_hbainfo {
	char	manufacturer[64];
	char	serial_number[64];
	char	hardware_version[64];
	char	driver_version[64];
	char	optionrom_version[64];
	char	firmware_version[64];
	char	model[256];
	char	model_description[256];
};
#endif

753
#define MAX_PHYS_ITEM_ID_LEN 32
754

755 756
/* This structure holds a unique identifier to identify some
 * physical item (port for example) used by a netdevice.
757
 */
758 759
struct netdev_phys_item_id {
	unsigned char id[MAX_PHYS_ITEM_ID_LEN];
760 761 762
	unsigned char id_len;
};

763 764 765 766 767 768 769
static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
					    struct netdev_phys_item_id *b)
{
	return a->id_len == b->id_len &&
	       memcmp(a->id, b->id, a->id_len) == 0;
}

770 771 772
typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
				       struct sk_buff *skb);

773
/* These structures hold the attributes of qdisc and classifiers
774 775 776 777
 * that are being passed to the netdevice through the setup_tc op.
 */
enum {
	TC_SETUP_MQPRIO,
778
	TC_SETUP_CLSU32,
779
	TC_SETUP_CLSFLOWER,
780
	TC_SETUP_MATCHALL,
781
	TC_SETUP_CLSBPF,
782 783
};

784 785
struct tc_cls_u32_offload;

786 787 788
struct tc_to_netdev {
	unsigned int type;
	union {
789
		struct tc_cls_u32_offload *cls_u32;
790
		struct tc_cls_flower_offload *cls_flower;
791
		struct tc_cls_matchall_offload *cls_mall;
792
		struct tc_cls_bpf_offload *cls_bpf;
793
		struct tc_mqprio_qopt *mqprio;
794
	};
795
	bool egress_dev;
796 797
};

798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815
/* These structures hold the attributes of xdp state that are being passed
 * to the netdevice through the xdp op.
 */
enum xdp_netdev_command {
	/* Set or clear a bpf program used in the earliest stages of packet
	 * rx. The prog will have been loaded as BPF_PROG_TYPE_XDP. The callee
	 * is responsible for calling bpf_prog_put on any old progs that are
	 * stored. In case of error, the callee need not release the new prog
	 * reference, but on success it takes ownership and must bpf_prog_put
	 * when it is no longer used.
	 */
	XDP_SETUP_PROG,
	/* Check if a bpf program is set on the device.  The callee should
	 * return true if a program is currently attached and running.
	 */
	XDP_QUERY_PROG,
};

816 817
struct netlink_ext_ack;

818 819 820 821
struct netdev_xdp {
	enum xdp_netdev_command command;
	union {
		/* XDP_SETUP_PROG */
822 823 824 825
		struct {
			struct bpf_prog *prog;
			struct netlink_ext_ack *extack;
		};
826 827 828 829
		/* XDP_QUERY_PROG */
		bool prog_attached;
	};
};
830

831 832 833 834 835 836 837 838 839 840
#ifdef CONFIG_XFRM_OFFLOAD
struct xfrmdev_ops {
	int	(*xdo_dev_state_add) (struct xfrm_state *x);
	void	(*xdo_dev_state_delete) (struct xfrm_state *x);
	void	(*xdo_dev_state_free) (struct xfrm_state *x);
	bool	(*xdo_dev_offload_ok) (struct sk_buff *skb,
				       struct xfrm_state *x);
};
#endif

841 842
/*
 * This structure defines the management hooks for network devices.
843 844
 * The following hooks can be defined; unless noted otherwise, they are
 * optional and can be filled with a null pointer.
845 846
 *
 * int (*ndo_init)(struct net_device *dev);
847 848 849 850
 *     This function is called once when a network device is registered.
 *     The network device can use this for any late stage initialization
 *     or semantic validation. It can fail with an error code which will
 *     be propagated back to register_netdev.
851 852 853 854 855 856
 *
 * void (*ndo_uninit)(struct net_device *dev);
 *     This function is called when device is unregistered or when registration
 *     fails. It is not called if init fails.
 *
 * int (*ndo_open)(struct net_device *dev);
857
 *     This function is called when a network device transitions to the up
858 859 860
 *     state.
 *
 * int (*ndo_stop)(struct net_device *dev);
861
 *     This function is called when a network device transitions to the down
862 863
 *     state.
 *
864 865
 * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb,
 *                               struct net_device *dev);
866
 *	Called when a packet needs to be transmitted.
867 868 869 870
 *	Returns NETDEV_TX_OK.  Can return NETDEV_TX_BUSY, but you should stop
 *	the queue before that can happen; it's for obsolete devices and weird
 *	corner cases, but the stack really does a non-trivial amount
 *	of useless work if you return NETDEV_TX_BUSY.
871
 *	Required; cannot be NULL.
872
 *
873 874 875 876 877 878 879 880 881
 * netdev_features_t (*ndo_features_check)(struct sk_buff *skb,
 *					   struct net_device *dev
 *					   netdev_features_t features);
 *	Called by core transmit path to determine if device is capable of
 *	performing offload operations on a given packet. This is to give
 *	the device an opportunity to implement any restrictions that cannot
 *	be otherwise expressed by feature flags. The check is called with
 *	the set of features that the stack has calculated and it returns
 *	those the driver believes to be appropriate.
882
 *
883
 * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
884
 *                         void *accel_priv, select_queue_fallback_t fallback);
885
 *	Called to decide which queue to use when device supports multiple
886 887
 *	transmit queues.
 *
888 889
 * void (*ndo_change_rx_flags)(struct net_device *dev, int flags);
 *	This function is called to allow device receiver to make
890
 *	changes to configuration when multicast or promiscuous is enabled.
891 892 893
 *
 * void (*ndo_set_rx_mode)(struct net_device *dev);
 *	This function is called device changes address list filtering.
894
 *	If driver handles unicast address filtering, it should set
895
 *	IFF_UNICAST_FLT in its priv_flags.
896 897 898
 *
 * int (*ndo_set_mac_address)(struct net_device *dev, void *addr);
 *	This function  is called when the Media Access Control address
899
 *	needs to be changed. If this interface is not defined, the
900
 *	MAC address can not be changed.
901 902 903 904 905
 *
 * int (*ndo_validate_addr)(struct net_device *dev);
 *	Test if Media Access Control address is valid for the device.
 *
 * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);
906 907
 *	Called when a user requests an ioctl which can't be handled by
 *	the generic interface code. If not defined ioctls return
908 909 910 911
 *	not supported error code.
 *
 * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map);
 *	Used to set network devices bus interface parameters. This interface
912
 *	is retained for legacy reasons; new devices should use the bus
913 914 915 916 917 918 919
 *	interface (PCI) for low level management.
 *
 * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu);
 *	Called when a user wants to change the Maximum Transfer Unit
 *	of a device. If not defined, any request to change MTU will
 *	will return an error.
 *
920
 * void (*ndo_tx_timeout)(struct net_device *dev);
921
 *	Callback used when the transmitter has not made any progress
922 923
 *	for dev->watchdog ticks.
 *
924 925
 * void (*ndo_get_stats64)(struct net_device *dev,
 *                         struct rtnl_link_stats64 *storage);
926
 * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
927
 *	Called when a user wants to get the network device usage
928
 *	statistics. Drivers must do one of the following:
929 930
 *	1. Define @ndo_get_stats64 to fill in a zero-initialised
 *	   rtnl_link_stats64 structure passed by the caller.
931
 *	2. Define @ndo_get_stats to update a net_device_stats structure
932 933 934 935 936
 *	   (which should normally be dev->stats) and return a pointer to
 *	   it. The structure may be changed asynchronously only if each
 *	   field is written atomically.
 *	3. Update dev->stats asynchronously and atomically, and define
 *	   neither operation.
937
 *
938
 * bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id)
939 940 941 942 943 944 945
 *	Return true if this device supports offload stats of this attr_id.
 *
 * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev,
 *	void *attr_data)
 *	Get statistics for offload operations by attr_id. Write it into the
 *	attr_data pointer.
 *
946
 * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid);
947
 *	If device supports VLAN filtering this function is called when a
948
 *	VLAN id is registered.
949
 *
950
 * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, __be16 proto, u16 vid);
951
 *	If device supports VLAN filtering this function is called when a
952
 *	VLAN id is unregistered.
953 954
 *
 * void (*ndo_poll_controller)(struct net_device *dev);
955 956 957
 *
 *	SR-IOV management functions.
 * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
958 959
 * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan,
 *			  u8 qos, __be16 proto);
960 961
 * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
 *			  int max_tx_rate);
962
 * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
963
 * int (*ndo_set_vf_trust)(struct net_device *dev, int vf, bool setting);
964 965
 * int (*ndo_get_vf_config)(struct net_device *dev,
 *			    int vf, struct ifla_vf_info *ivf);
966
 * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state);
967 968
 * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
 *			  struct nlattr *port[]);
969 970 971
 *
 *      Enable or disable the VF ability to query its RSS Redirection Table and
 *      Hash Key. This is needed since on some devices VF share this information
972
 *      with PF and querying it may introduce a theoretical security risk.
973
 * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting);
974
 * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
975 976 977 978 979 980
 * int (*ndo_setup_tc)(struct net_device *dev, u32 handle,
 *		       __be16 protocol, struct tc_to_netdev *tc);
 *	Called to setup any 'tc' scheduler, classifier or action on @dev.
 *	This is always called from the stack with the rtnl lock held and netif
 *	tx queues stopped. This allows the netdevice to perform queue
 *	management safely.
981
 *
982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
 *	Fiber Channel over Ethernet (FCoE) offload functions.
 * int (*ndo_fcoe_enable)(struct net_device *dev);
 *	Called when the FCoE protocol stack wants to start using LLD for FCoE
 *	so the underlying device can perform whatever needed configuration or
 *	initialization to support acceleration of FCoE traffic.
 *
 * int (*ndo_fcoe_disable)(struct net_device *dev);
 *	Called when the FCoE protocol stack wants to stop using LLD for FCoE
 *	so the underlying device can perform whatever needed clean-ups to
 *	stop supporting acceleration of FCoE traffic.
 *
 * int (*ndo_fcoe_ddp_setup)(struct net_device *dev, u16 xid,
 *			     struct scatterlist *sgl, unsigned int sgc);
 *	Called when the FCoE Initiator wants to initialize an I/O that
 *	is a possible candidate for Direct Data Placement (DDP). The LLD can
 *	perform necessary setup and returns 1 to indicate the device is set up
 *	successfully to perform DDP on this I/O, otherwise this returns 0.
 *
 * int (*ndo_fcoe_ddp_done)(struct net_device *dev,  u16 xid);
 *	Called when the FCoE Initiator/Target is done with the DDPed I/O as
 *	indicated by the FC exchange id 'xid', so the underlying device can
 *	clean up and reuse resources for later DDP requests.
 *
 * int (*ndo_fcoe_ddp_target)(struct net_device *dev, u16 xid,
 *			      struct scatterlist *sgl, unsigned int sgc);
 *	Called when the FCoE Target wants to initialize an I/O that
 *	is a possible candidate for Direct Data Placement (DDP). The LLD can
 *	perform necessary setup and returns 1 to indicate the device is set up
 *	successfully to perform DDP on this I/O, otherwise this returns 0.
 *
1012 1013 1014 1015 1016 1017 1018
 * int (*ndo_fcoe_get_hbainfo)(struct net_device *dev,
 *			       struct netdev_fcoe_hbainfo *hbainfo);
 *	Called when the FCoE Protocol stack wants information on the underlying
 *	device. This information is utilized by the FCoE protocol stack to
 *	register attributes with Fiber Channel management service as per the
 *	FC-GS Fabric Device Management Information(FDMI) specification.
 *
1019 1020 1021 1022 1023 1024
 * int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type);
 *	Called when the underlying device wants to override default World Wide
 *	Name (WWN) generation mechanism in FCoE protocol stack to pass its own
 *	World Wide Port Name (WWPN) or World Wide Node Name (WWNN) to the FCoE
 *	protocol stack to use.
 *
1025 1026 1027 1028 1029 1030
 *	RFS acceleration.
 * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb,
 *			    u16 rxq_index, u32 flow_id);
 *	Set hardware filter for RFS.  rxq_index is the target queue index;
 *	flow_id is a flow ID to be passed to rps_may_expire_flow() later.
 *	Return the filter ID on success, or a negative error code.
1031
 *
1032
 *	Slave management functions (for bridge, bonding, etc).
1033 1034 1035 1036 1037
 * int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev);
 *	Called to make another netdev an underling.
 *
 * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev);
 *	Called to release previously enslaved netdev.
1038 1039
 *
 *      Feature/offload setting functions.
1040 1041 1042 1043 1044 1045
 * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
 *		netdev_features_t features);
 *	Adjusts the requested feature flags according to device-specific
 *	constraints, and returns the resulting flags. Must not modify
 *	the device state.
 *
1046
 * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features);
1047 1048 1049 1050
 *	Called to update device configuration to new features. Passed
 *	feature set might be less than what was returned by ndo_fix_features()).
 *	Must return >0 or -errno if it changed dev->features itself.
 *
1051 1052
 * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[],
 *		      struct net_device *dev,
1053
 *		      const unsigned char *addr, u16 vid, u16 flags)
1054
 *	Adds an FDB entry to dev for addr.
1055 1056
 * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[],
 *		      struct net_device *dev,
1057
 *		      const unsigned char *addr, u16 vid)
1058 1059
 *	Deletes the FDB entry from dev coresponding to addr.
 * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
1060
 *		       struct net_device *dev, struct net_device *filter_dev,
1061
 *		       int *idx)
1062 1063
 *	Used to add FDB entries to dump requests. Implementers should add
 *	entries to skb and update idx with the number of entries.
1064
 *
1065 1066
 * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh,
 *			     u16 flags)
1067
 * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq,
1068 1069
 *			     struct net_device *dev, u32 filter_mask,
 *			     int nlflags)
1070 1071
 * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh,
 *			     u16 flags);
1072 1073 1074 1075 1076 1077 1078 1079
 *
 * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier);
 *	Called to change device carrier. Soft-devices (like dummy, team, etc)
 *	which do not represent real hardware may define this to allow their
 *	userspace components to manage their virtual carrier state. Devices
 *	that determine carrier state from physical hardware properties (eg
 *	network cables) or protocol-dependent mechanisms (eg
 *	USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function.
1080 1081
 *
 * int (*ndo_get_phys_port_id)(struct net_device *dev,
1082
 *			       struct netdev_phys_item_id *ppid);
1083 1084 1085
 *	Called to get ID of physical port of this device. If driver does
 *	not implement this, it is assumed that the hw is not able to have
 *	multiple net devices on single physical port.
1086
 *
1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099
 * void (*ndo_udp_tunnel_add)(struct net_device *dev,
 *			      struct udp_tunnel_info *ti);
 *	Called by UDP tunnel to notify a driver about the UDP port and socket
 *	address family that a UDP tunnel is listnening to. It is called only
 *	when a new port starts listening. The operation is protected by the
 *	RTNL.
 *
 * void (*ndo_udp_tunnel_del)(struct net_device *dev,
 *			      struct udp_tunnel_info *ti);
 *	Called by UDP tunnel to notify the driver about a UDP port and socket
 *	address family that the UDP tunnel is not listening to anymore. The
 *	operation is protected by the RTNL.
 *
1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117
 * void* (*ndo_dfwd_add_station)(struct net_device *pdev,
 *				 struct net_device *dev)
 *	Called by upper layer devices to accelerate switching or other
 *	station functionality into hardware. 'pdev is the lowerdev
 *	to use for the offload and 'dev' is the net device that will
 *	back the offload. Returns a pointer to the private structure
 *	the upper layer will maintain.
 * void (*ndo_dfwd_del_station)(struct net_device *pdev, void *priv)
 *	Called by upper layer device to delete the station created
 *	by 'ndo_dfwd_add_station'. 'pdev' is the net device backing
 *	the station and priv is the structure returned by the add
 *	operation.
 * netdev_tx_t (*ndo_dfwd_start_xmit)(struct sk_buff *skb,
 *				      struct net_device *dev,
 *				      void *priv);
 *	Callback to use for xmit over the accelerated station. This
 *	is used in place of ndo_start_xmit on accelerated net
 *	devices.
1118 1119 1120 1121
 * int (*ndo_set_tx_maxrate)(struct net_device *dev,
 *			     int queue_index, u32 maxrate);
 *	Called when a user wants to set a max-rate limitation of specific
 *	TX queue.
1122 1123
 * int (*ndo_get_iflink)(const struct net_device *dev);
 *	Called to get the iflink value of this device.
1124
 * void (*ndo_change_proto_down)(struct net_device *dev,
1125
 *				 bool proto_down);
1126 1127 1128
 *	This function is used to pass protocol port error state information
 *	to the switch driver. The switch driver can react to the proto_down
 *      by doing a phys down on the associated switch port.
1129 1130 1131 1132
 * int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb);
 *	This function is used to get egress tunnel information for given skb.
 *	This is useful for retrieving outer tunnel header parameters while
 *	sampling packet.
1133 1134 1135 1136
 * void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom);
 *	This function is used to specify the headroom that the skb must
 *	consider when allocation skb during packet reception. Setting
 *	appropriate rx headroom value allows avoiding skb head copy on
1137
 *	forward. Setting a negative value resets the rx headroom to the
1138
 *	default value.
1139 1140 1141
 * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp);
 *	This function is used to set or query state related to XDP on the
 *	netdevice. See definition of enum xdp_netdev_command for details.
1142
 *
1143 1144 1145 1146 1147 1148
 */
struct net_device_ops {
	int			(*ndo_init)(struct net_device *dev);
	void			(*ndo_uninit)(struct net_device *dev);
	int			(*ndo_open)(struct net_device *dev);
	int			(*ndo_stop)(struct net_device *dev);
1149 1150 1151 1152 1153
	netdev_tx_t		(*ndo_start_xmit)(struct sk_buff *skb,
						  struct net_device *dev);
	netdev_features_t	(*ndo_features_check)(struct sk_buff *skb,
						      struct net_device *dev,
						      netdev_features_t features);
1154
	u16			(*ndo_select_queue)(struct net_device *dev,
1155
						    struct sk_buff *skb,
1156 1157
						    void *accel_priv,
						    select_queue_fallback_t fallback);
1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
	void			(*ndo_change_rx_flags)(struct net_device *dev,
						       int flags);
	void			(*ndo_set_rx_mode)(struct net_device *dev);
	int			(*ndo_set_mac_address)(struct net_device *dev,
						       void *addr);
	int			(*ndo_validate_addr)(struct net_device *dev);
	int			(*ndo_do_ioctl)(struct net_device *dev,
					        struct ifreq *ifr, int cmd);
	int			(*ndo_set_config)(struct net_device *dev,
					          struct ifmap *map);
1168 1169 1170 1171
	int			(*ndo_change_mtu)(struct net_device *dev,
						  int new_mtu);
	int			(*ndo_neigh_setup)(struct net_device *dev,
						   struct neigh_parms *);
1172 1173
	void			(*ndo_tx_timeout) (struct net_device *dev);

1174 1175
	void			(*ndo_get_stats64)(struct net_device *dev,
						   struct rtnl_link_stats64 *storage);
1176
	bool			(*ndo_has_offload_stats)(const struct net_device *dev, int attr_id);
1177 1178 1179
	int			(*ndo_get_offload_stats)(int attr_id,
							 const struct net_device *dev,
							 void *attr_data);
1180 1181
	struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);

1182
	int			(*ndo_vlan_rx_add_vid)(struct net_device *dev,
1183
						       __be16 proto, u16 vid);
1184
	int			(*ndo_vlan_rx_kill_vid)(struct net_device *dev,
1185
						        __be16 proto, u16 vid);
1186 1187
#ifdef CONFIG_NET_POLL_CONTROLLER
	void                    (*ndo_poll_controller)(struct net_device *dev);