Newer
Older
* net/tipc/socket.c: TIPC socket API
* Copyright (c) 2001-2007, 2012-2016, Ericsson AB
* Copyright (c) 2004-2008, 2010-2013, Wind River Systems
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* Alternatively, this software may be distributed under the terms of the
* GNU General Public License ("GPL") version 2 as published by the Free
* Software Foundation.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
#include <linux/rhashtable.h>
#include "name_table.h"
#include "link.h"
#include "name_distr.h"
#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */
#define TIPC_FWD_MSG 1
#define TIPC_MAX_PORT 0xffffffff
#define TIPC_MIN_PORT 1
enum {
TIPC_LISTEN = TCP_LISTEN,
TIPC_ESTABLISHED = TCP_ESTABLISHED,
TIPC_OPEN = TCP_CLOSE,
TIPC_DISCONNECTING = TCP_CLOSE_WAIT,
TIPC_CONNECTING = TCP_SYN_SENT,
/**
* struct tipc_sock - TIPC socket structure
* @sk: socket - interacts with 'port' and with user via the socket API
* @conn_type: TIPC type used when connection was established
* @conn_instance: TIPC instance used when connection was established
* @published: non-zero if port has one or more associated names
* @max_pkt: maximum packet size "hint" used when building messages sent by port
* @portid: unique port identity in TIPC socket hash table
* @phdr: preformatted message header used when sending messages
* @publications: list of publications for port
* @pub_count: total # of publications port has made during its lifetime
* @probing_state:
* @conn_timeout: the time we can wait for an unresponded setup request
* @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
* @link_cong: non-zero if owner must sleep because of link congestion
* @sent_unacked: # messages sent by socket, and not yet acked by peer
* @rcv_unacked: # messages read by user, but not yet acked back to peer
* @peer: 'connected' peer for dgram/rdm
* @node: hash table node
* @rcu: rcu struct for tipc_sock
*/
struct tipc_sock {
struct sock sk;
u32 conn_type;
u32 conn_instance;
int published;
u32 max_pkt;
struct tipc_msg phdr;
struct list_head sock_list;
struct list_head publications;
u32 pub_count;
uint conn_timeout;
atomic_t dupl_rcvcnt;
bool probe_unacked;
u16 snt_unacked;
u16 snd_win;
u16 rcv_unacked;
u16 rcv_win;
struct sockaddr_tipc peer;
struct rhash_head node;
struct rcu_head rcu;
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
David S. Miller
committed
static void tipc_data_ready(struct sock *sk);
static void tipc_write_space(struct sock *sk);
static void tipc_sock_destruct(struct sock *sk);
static int tipc_release(struct socket *sock);
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
static void tipc_sk_timeout(unsigned long data);
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
static int __tipc_send_stream(struct socket *sock, struct msghdr *m,
size_t dsz);
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
static const struct rhashtable_params tsk_rht_params;
static u32 tsk_own_node(struct tipc_sock *tsk)
{
return msg_prevnode(&tsk->phdr);
}
static u32 tsk_peer_node(struct tipc_sock *tsk)
return msg_destnode(&tsk->phdr);
static u32 tsk_peer_port(struct tipc_sock *tsk)
return msg_destport(&tsk->phdr);
static bool tsk_unreliable(struct tipc_sock *tsk)
return msg_src_droppable(&tsk->phdr) != 0;
static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable)
msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0);
static bool tsk_unreturnable(struct tipc_sock *tsk)
return msg_dest_droppable(&tsk->phdr) != 0;
static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable)
msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0);
static int tsk_importance(struct tipc_sock *tsk)
return msg_importance(&tsk->phdr);
static int tsk_set_importance(struct tipc_sock *tsk, int imp)
{
if (imp > TIPC_CRITICAL_IMPORTANCE)
return -EINVAL;
msg_set_importance(&tsk->phdr, (u32)imp);
static struct tipc_sock *tipc_sk(const struct sock *sk)
{
return container_of(sk, struct tipc_sock, sk);
}
static bool tsk_conn_cong(struct tipc_sock *tsk)
return tsk->snt_unacked > tsk->snd_win;
}
/* tsk_blocks(): translate a buffer size in bytes to number of
* advertisable blocks, taking into account the ratio truesize(len)/len
* We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
*/
static u16 tsk_adv_blocks(int len)
{
return len / FLOWCTL_BLK_SZ / 4;
}
/* tsk_inc(): increment counter for sent or received data
* - If block based flow control is not supported by peer we
* fall back to message based ditto, incrementing the counter
*/
static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
{
if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
return ((msglen / FLOWCTL_BLK_SZ) + 1);
return 1;
* tsk_advance_rx_queue - discard first buffer in socket receive queue
*
* Caller must hold socket lock
static void tsk_advance_rx_queue(struct sock *sk)
kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
/* tipc_sk_respond() : send response message back to sender
*/
static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err)
{
u32 selector;
u32 dnode;
u32 onode = tipc_own_addr(sock_net(sk));
if (!tipc_msg_reverse(onode, &skb, err))
return;
dnode = msg_destnode(buf_msg(skb));
selector = msg_origport(buf_msg(skb));
tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
}
* tsk_rej_rx_queue - reject all buffers in socket receive queue
*
* Caller must hold socket lock
static void tsk_rej_rx_queue(struct sock *sk)
struct sk_buff *skb;
while ((skb = __skb_dequeue(&sk->sk_receive_queue)))
tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
static bool tipc_sk_connected(struct sock *sk)
{
return sk->sk_state == TIPC_ESTABLISHED;
/* tipc_sk_type_connectionless - check if the socket is datagram socket
* @sk: socket
*
* Returns true if connection less, false otherwise
*/
static bool tipc_sk_type_connectionless(struct sock *sk)
{
return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM;
}
/* tsk_peer_msg - verify if message was sent by connected port's peer
*
* Handles cases where the node's network address has changed from
* the default of <0.0.0> to its configured setting.
*/
static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
struct sock *sk = &tsk->sk;
struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
u32 peer_port = tsk_peer_port(tsk);
if (unlikely(!tipc_sk_connected(sk)))
return false;
if (unlikely(msg_origport(msg) != peer_port))
return false;
orig_node = msg_orignode(msg);
peer_node = tsk_peer_node(tsk);
if (likely(orig_node == peer_node))
return true;
if (!orig_node && (peer_node == tn->own_addr))
if (!peer_node && (orig_node == tn->own_addr))
return true;
return false;
}
/* tipc_set_sk_state - set the sk_state of the socket
* @sk: socket
*
* Caller must hold socket lock
*
* Returns 0 on success, errno otherwise
*/
static int tipc_set_sk_state(struct sock *sk, int state)
{
int oldsk_state = sk->sk_state;
int res = -EINVAL;
switch (state) {
case TIPC_OPEN:
res = 0;
break;
case TIPC_CONNECTING:
if (oldsk_state == TIPC_OPEN)
res = 0;
break;
case TIPC_ESTABLISHED:
if (oldsk_state == TIPC_CONNECTING ||
oldsk_state == TIPC_OPEN)
res = 0;
break;
case TIPC_DISCONNECTING:
if (oldsk_state == TIPC_CONNECTING ||
oldsk_state == TIPC_ESTABLISHED)
res = 0;
break;
}
if (!res)
sk->sk_state = state;
return res;
}
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
static int tipc_sk_sock_err(struct socket *sock, long *timeout)
{
struct sock *sk = sock->sk;
int err = sock_error(sk);
int typ = sock->type;
if (err)
return err;
if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) {
if (sk->sk_state == TIPC_DISCONNECTING)
return -EPIPE;
else if (!tipc_sk_connected(sk))
return -ENOTCONN;
}
if (!*timeout)
return -EAGAIN;
if (signal_pending(current))
return sock_intr_errno(*timeout);
return 0;
}
#define tipc_wait_for_cond(sock_, timeout_, condition_) \
({ \
int rc_ = 0; \
int done_ = 0; \
\
while (!(condition_) && !done_) { \
struct sock *sk_ = sock->sk; \
DEFINE_WAIT_FUNC(wait_, woken_wake_function); \
\
rc_ = tipc_sk_sock_err(sock_, timeout_); \
if (rc_) \
break; \
prepare_to_wait(sk_sleep(sk_), &wait_, \
TASK_INTERRUPTIBLE); \
done_ = sk_wait_event(sk_, timeout_, \
(condition_), &wait_); \
remove_wait_queue(sk_sleep(sk_), &wait_); \
} \
rc_; \
})
* tipc_sk_create - create a TIPC socket
* @net: network namespace (must be default network)
* @sock: pre-allocated socket structure
* @protocol: protocol indicator (must be 0)
* @kern: caused by kernel or by userspace?
* This routine creates additional data structures used by the TIPC socket,
* initializes them, and links them together.
static int tipc_sk_create(struct net *net, struct socket *sock,
int protocol, int kern)
struct tipc_net *tn;
struct tipc_sock *tsk;
struct tipc_msg *msg;
if (unlikely(protocol != 0))
return -EPROTONOSUPPORT;
switch (sock->type) {
case SOCK_STREAM:
default:
return -EPROTOTYPE;
/* Allocate socket's protocol area */
sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern);
tsk = tipc_sk(sk);
tsk->max_pkt = MAX_PKT_DEFAULT;
INIT_LIST_HEAD(&tsk->publications);
msg = &tsk->phdr;
tn = net_generic(sock_net(sk), tipc_net_id);
tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
NAMED_H_SIZE, 0);
/* Finish initializing socket data structures */
sock->ops = ops;
sock_init_data(sock, sk);
tipc_set_sk_state(sk, TIPC_OPEN);
if (tipc_sk_insert(tsk)) {
pr_warn("Socket create failed; port number exhausted\n");
return -EINVAL;
}
msg_set_origport(msg, tsk->portid);
setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
sk->sk_backlog_rcv = tipc_backlog_rcv;
sk->sk_rcvbuf = sysctl_tipc_rmem[1];
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
sk->sk_destruct = tipc_sock_destruct;
tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
atomic_set(&tsk->dupl_rcvcnt, 0);
/* Start out with safe limits until we receive an advertised window */
tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
tsk->rcv_win = tsk->snd_win;
if (tipc_sk_type_connectionless(sk)) {
tsk_set_unreturnable(tsk, true);
tsk_set_unreliable(tsk, true);
static void tipc_sk_callback(struct rcu_head *head)
{
struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);
sock_put(&tsk->sk);
}
/* Caller should hold socket lock for the socket. */
static void __tipc_shutdown(struct socket *sock, int error)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
struct net *net = sock_net(sk);
u32 dnode = tsk_peer_node(tsk);
struct sk_buff *skb;
/* Reject all unreceived messages, except on an active connection
* (which disconnects locally & sends a 'FIN+' to peer).
*/
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
if (TIPC_SKB_CB(skb)->bytes_read) {
kfree_skb(skb);
if (!tipc_sk_type_connectionless(sk) &&
sk->sk_state != TIPC_DISCONNECTING) {
tipc_set_sk_state(sk, TIPC_DISCONNECTING);
tipc_node_remove_conn(net, dnode, tsk->portid);
}
tipc_sk_respond(sk, skb, error);
if (tipc_sk_type_connectionless(sk))
return;
if (sk->sk_state != TIPC_DISCONNECTING) {
skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
tsk_own_node(tsk), tsk_peer_port(tsk),
tsk->portid, error);
if (skb)
tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
tipc_node_remove_conn(net, dnode, tsk->portid);
tipc_set_sk_state(sk, TIPC_DISCONNECTING);
* tipc_release - destroy a TIPC socket
* @sock: socket to destroy
*
* This routine cleans up any messages that are still queued on the socket.
* For DGRAM and RDM socket types, all queued messages are rejected.
* For SEQPACKET and STREAM socket types, the first message is rejected
* and any others are discarded. (If the first message on a STREAM socket
* is partially-read, it is discarded and the next one is rejected instead.)
* NOTE: Rejected messages are not necessarily returned to the sender! They
* are returned or discarded according to the "destination droppable" setting
* specified for the message by the sender.
*
* Returns 0 on success, errno otherwise
*/
static int tipc_release(struct socket *sock)
struct tipc_sock *tsk;
/*
* Exit if socket isn't fully initialized (occurs when a failed accept()
* releases a pre-allocated child socket that was never used)
*/
if (sk == NULL)
tsk = tipc_sk(sk);
__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
sk->sk_shutdown = SHUTDOWN_MASK;
tipc_sk_withdraw(tsk, 0, NULL);
sk_stop_timer(sk, &sk->sk_timer);
tipc_sk_remove(tsk);
/* Reject any messages that accumulated in backlog queue */
release_sock(sk);
call_rcu(&tsk->rcu, tipc_sk_callback);
* tipc_bind - associate or disassocate TIPC name(s) with a socket
* @sock: socket structure
* @uaddr: socket address describing name(s) and desired operation
* @uaddr_len: size of socket address data structure
* Name and name sequence binding is indicated using a positive scope value;
* a negative scope value unbinds the specified name. Specifying no name
* (i.e. a socket address length of 0) unbinds all names from the socket.
*
* NOTE: This routine doesn't need to take the socket lock since it doesn't
* access any non-constant socket information.
static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
int uaddr_len)
struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
struct tipc_sock *tsk = tipc_sk(sk);
lock_sock(sk);
if (unlikely(!uaddr_len)) {
res = tipc_sk_withdraw(tsk, 0, NULL);
if (uaddr_len < sizeof(struct sockaddr_tipc)) {
res = -EINVAL;
goto exit;
}
if (addr->family != AF_TIPC) {
res = -EAFNOSUPPORT;
goto exit;
}
if (addr->addrtype == TIPC_ADDR_NAME)
addr->addr.nameseq.upper = addr->addr.nameseq.lower;
else if (addr->addrtype != TIPC_ADDR_NAMESEQ) {
res = -EAFNOSUPPORT;
goto exit;
}
if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
(addr->addr.nameseq.type != TIPC_TOP_SRV) &&
(addr->addr.nameseq.type != TIPC_CFG_SRV)) {
res = -EACCES;
goto exit;
}
tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
exit:
release_sock(sk);
return res;
* tipc_getname - get port ID of socket or peer socket
* @sock: socket structure
* @uaddr: area for returned socket address
* @uaddr_len: area for returned length of socket address
* @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
* NOTE: This routine doesn't need to take the socket lock since it only
* accesses socket information that is unchanging (or which changes in
* a completely predictable manner).
static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
int *uaddr_len, int peer)
{
struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
memset(addr, 0, sizeof(*addr));
if ((!tipc_sk_connected(sk)) &&
((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING)))
return -ENOTCONN;
addr->addr.id.ref = tsk_peer_port(tsk);
addr->addr.id.node = tsk_peer_node(tsk);
addr->addr.id.ref = tsk->portid;
addr->addr.id.node = tn->own_addr;
*uaddr_len = sizeof(*addr);
addr->addrtype = TIPC_ADDR_ID;
addr->family = AF_TIPC;
addr->scope = 0;
addr->addr.name.domain = 0;
* tipc_poll - read and possibly block on pollmask
* @file: file structure associated with the socket
* @sock: socket for which to calculate the poll bits
* @wait: ???
*
* Returns pollmask value
*
* COMMENTARY:
* It appears that the usual socket locking mechanisms are not useful here
* since the pollmask info is potentially out-of-date the moment this routine
* exits. TCP and other protocols seem to rely on higher level poll routines
* to handle any preventable race conditions, so TIPC will do the same ...
*
* IMPORTANT: The fact that a read or write operation is indicated does NOT
* imply that the operation will succeed, merely that it should be performed
* and will not block.
static unsigned int tipc_poll(struct file *file, struct socket *sock,
poll_table *wait)
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
u32 mask = 0;
sock_poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
mask |= POLLHUP;
switch (sk->sk_state) {
case TIPC_ESTABLISHED:
if (!tsk->link_cong && !tsk_conn_cong(tsk))
mask |= POLLOUT;
/* fall thru' */
case TIPC_LISTEN:
case TIPC_CONNECTING:
if (!skb_queue_empty(&sk->sk_receive_queue))
mask |= (POLLIN | POLLRDNORM);
break;
case TIPC_OPEN:
if (!tsk->link_cong)
mask |= POLLOUT;
if (tipc_sk_type_connectionless(sk) &&
(!skb_queue_empty(&sk->sk_receive_queue)))
mask |= (POLLIN | POLLRDNORM);
break;
case TIPC_DISCONNECTING:
mask = (POLLIN | POLLRDNORM | POLLHUP);
break;
/**
* tipc_sendmcast - send multicast message
* @sock: socket structure
* @seq: destination address
* @dsz: total length of message data
* @timeo: timeout to wait for wakeup
*
* Called from function tipc_sendmsg(), which has done all sanity checks
* Returns the number of bytes sent on success, or errno
*/
static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
struct msghdr *msg, size_t dsz, long timeo)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
struct net *net = sock_net(sk);
struct tipc_msg *mhdr = &tsk->phdr;
Parthasarathy Bhuvaragan
committed
struct sk_buff_head pktchain;
uint mtu;
int rc;
if (!timeo && tsk->link_cong)
return -ELINKCONG;
msg_set_type(mhdr, TIPC_MCAST_MSG);
msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE);
msg_set_destport(mhdr, 0);
msg_set_destnode(mhdr, 0);
msg_set_nametype(mhdr, seq->type);
msg_set_namelower(mhdr, seq->lower);
msg_set_nameupper(mhdr, seq->upper);
msg_set_hdr_sz(mhdr, MCAST_H_SIZE);
Parthasarathy Bhuvaragan
committed
skb_queue_head_init(&pktchain);
mtu = tipc_bcast_get_mtu(net);
Parthasarathy Bhuvaragan
committed
rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain);
if (unlikely(rc < 0))
return rc;
do {
Parthasarathy Bhuvaragan
committed
rc = tipc_bcast_xmit(net, &pktchain);
if (likely(!rc))
return dsz;
if (rc == -ELINKCONG) {
tsk->link_cong = 1;
rc = tipc_wait_for_cond(sock, &timeo, !tsk->link_cong);
if (!rc)
continue;
Parthasarathy Bhuvaragan
committed
__skb_queue_purge(&pktchain);
if (rc == -EMSGSIZE) {
msg->msg_iter = save;
break;
} while (1);
/**
* tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
* @arrvq: queue with arriving messages, to be cloned after destination lookup
* @inputq: queue with cloned messages, delivered to socket after dest lookup
*
* Multi-threaded: parallel calls with reference to same queues may occur
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
struct sk_buff_head *inputq)
struct tipc_msg *msg;
struct list_head dports;
u32 scope = TIPC_CLUSTER_SCOPE;
struct sk_buff_head tmpq;
uint hsz;
struct sk_buff *skb, *_skb;
__skb_queue_head_init(&tmpq);
INIT_LIST_HEAD(&dports);
skb = tipc_skb_peek(arrvq, &inputq->lock);
for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
msg = buf_msg(skb);
hsz = skb_headroom(skb) + msg_hdr_sz(msg);
if (in_own_node(net, msg_orignode(msg)))
scope = TIPC_NODE_SCOPE;
/* Create destination port list and message clones: */
tipc_nametbl_mc_translate(net,
msg_nametype(msg), msg_namelower(msg),
msg_nameupper(msg), scope, &dports);
portid = u32_pop(&dports);
for (; portid; portid = u32_pop(&dports)) {
_skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
if (_skb) {
msg_set_destport(buf_msg(_skb), portid);
__skb_queue_tail(&tmpq, _skb);
continue;
}
pr_warn("Failed to clone mcast rcv buffer\n");
/* Append to inputq if not already done by other thread */
spin_lock_bh(&inputq->lock);
if (skb_peek(arrvq) == skb) {
skb_queue_splice_tail_init(&tmpq, inputq);
kfree_skb(__skb_dequeue(arrvq));
}
spin_unlock_bh(&inputq->lock);
__skb_queue_purge(&tmpq);
kfree_skb(skb);
tipc_sk_rcv(net, inputq);
/**
* tipc_sk_proto_rcv - receive a connection mng protocol message
* @tsk: receiving socket
* @skb: pointer to message buffer.
static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
struct sk_buff_head *xmitq)
struct sock *sk = &tsk->sk;
struct tipc_msg *hdr = buf_msg(skb);
int mtyp = msg_type(hdr);
/* Ignore if connection cannot be validated: */
if (!tsk_peer_msg(tsk, hdr))
tsk->probe_unacked = false;
if (mtyp == CONN_PROBE) {
msg_set_type(hdr, CONN_PROBE_REPLY);
if (tipc_msg_reverse(onode, &skb, TIPC_OK))
__skb_queue_tail(xmitq, skb);
return;
} else if (mtyp == CONN_ACK) {
conn_cong = tsk_conn_cong(tsk);
tsk->snt_unacked -= msg_conn_ack(hdr);
if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
tsk->snd_win = msg_adv_win(hdr);
sk->sk_write_space(sk);
} else if (mtyp != CONN_PROBE_REPLY) {
pr_warn("Received unknown CONN_PROTO msg\n");
* tipc_sendmsg - send message in connectionless manner
* @dsz: amount of user data to be sent
* Message must have an destination specified explicitly.
* Used for SOCK_RDM and SOCK_DGRAM messages,
* and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
* (Note: 'SYN+' is prohibited on SOCK_STREAM.)
* Returns the number of bytes sent on success, or errno otherwise
*/
static int tipc_sendmsg(struct socket *sock,
struct msghdr *m, size_t dsz)
{
struct sock *sk = sock->sk;
int ret;
lock_sock(sk);
ret = __tipc_sendmsg(sock, m, dsz);
release_sock(sk);
return ret;
}
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
struct tipc_sock *tsk = tipc_sk(sk);
struct net *net = sock_net(sk);
struct tipc_msg *mhdr = &tsk->phdr;
u32 dnode, dport;
Parthasarathy Bhuvaragan
committed
struct sk_buff_head pktchain;
bool is_connectionless = tipc_sk_type_connectionless(sk);
struct sk_buff *skb;
struct tipc_name_seq *seq;
u32 mtu;
if (dsz > TIPC_MAX_USER_MSG_SIZE)
return -EMSGSIZE;
if (unlikely(!dest)) {
if (is_connectionless && tsk->peer.family == AF_TIPC)
dest = &tsk->peer;
else
return -EDESTADDRREQ;
} else if (unlikely(m->msg_namelen < sizeof(*dest)) ||
dest->family != AF_TIPC) {
return -EINVAL;
}
if (sk->sk_state == TIPC_LISTEN)
if (sk->sk_state != TIPC_OPEN)
return -EISCONN;
if (tsk->published)
return -EOPNOTSUPP;
Allan Stephens
committed
if (dest->addrtype == TIPC_ADDR_NAME) {
tsk->conn_type = dest->addr.name.name.type;
tsk->conn_instance = dest->addr.name.name.instance;
Allan Stephens
committed
}
seq = &dest->addr.nameseq;
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
if (dest->addrtype == TIPC_ADDR_MCAST) {
return tipc_sendmcast(sock, seq, m, dsz, timeo);
} else if (dest->addrtype == TIPC_ADDR_NAME) {
u32 type = dest->addr.name.name.type;
u32 inst = dest->addr.name.name.instance;
u32 domain = dest->addr.name.domain;
dnode = domain;
msg_set_type(mhdr, TIPC_NAMED_MSG);
msg_set_hdr_sz(mhdr, NAMED_H_SIZE);
msg_set_nametype(mhdr, type);
msg_set_nameinst(mhdr, inst);
msg_set_lookup_scope(mhdr, tipc_addr_scope(domain));
dport = tipc_nametbl_translate(net, type, inst, &dnode);
msg_set_destnode(mhdr, dnode);
msg_set_destport(mhdr, dport);
if (unlikely(!dport && !dnode))
return -EHOSTUNREACH;
} else if (dest->addrtype == TIPC_ADDR_ID) {
dnode = dest->addr.id.node;
msg_set_type(mhdr, TIPC_DIRECT_MSG);
msg_set_lookup_scope(mhdr, 0);
msg_set_destnode(mhdr, dnode);
msg_set_destport(mhdr, dest->addr.id.ref);
msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
}
Parthasarathy Bhuvaragan
committed
skb_queue_head_init(&pktchain);
new_mtu:
mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
Parthasarathy Bhuvaragan
committed
rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain);
if (rc < 0)
do {
Parthasarathy Bhuvaragan
committed
skb = skb_peek(&pktchain);
TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
Parthasarathy Bhuvaragan
committed
rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid);
tipc_set_sk_state(sk, TIPC_CONNECTING);
if (rc == -ELINKCONG) {
tsk->link_cong = 1;
rc = tipc_wait_for_cond(sock, &timeo, !tsk->link_cong);
if (!rc)
continue;
}
Parthasarathy Bhuvaragan
committed
__skb_queue_purge(&pktchain);
if (rc == -EMSGSIZE) {
m->msg_iter = save;
goto new_mtu;
break;
} while (1);