diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 10c39801ad5f460c13d313533434525a9b4683da..23486f382b912e1d85f034028e51fcb74df534e9 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3963,6 +3963,9 @@ static void bnxt_free_hwrm_resources(struct bnxt *bp)
 				  bp->hwrm_cmd_resp_dma_addr);
 		bp->hwrm_cmd_resp_addr = NULL;
 	}
+
+	dma_pool_destroy(bp->hwrm_dma_pool);
+	bp->hwrm_dma_pool = NULL;
 }
 
 static int bnxt_alloc_hwrm_resources(struct bnxt *bp)
@@ -3975,33 +3978,10 @@ static int bnxt_alloc_hwrm_resources(struct bnxt *bp)
 	if (!bp->hwrm_cmd_resp_addr)
 		return -ENOMEM;
 
-	return 0;
-}
-
-static void bnxt_free_hwrm_short_cmd_req(struct bnxt *bp)
-{
-	if (bp->hwrm_short_cmd_req_addr) {
-		struct pci_dev *pdev = bp->pdev;
-
-		dma_free_coherent(&pdev->dev, bp->hwrm_max_ext_req_len,
-				  bp->hwrm_short_cmd_req_addr,
-				  bp->hwrm_short_cmd_req_dma_addr);
-		bp->hwrm_short_cmd_req_addr = NULL;
-	}
-}
-
-static int bnxt_alloc_hwrm_short_cmd_req(struct bnxt *bp)
-{
-	struct pci_dev *pdev = bp->pdev;
-
-	if (bp->hwrm_short_cmd_req_addr)
-		return 0;
-
-	bp->hwrm_short_cmd_req_addr =
-		dma_alloc_coherent(&pdev->dev, bp->hwrm_max_ext_req_len,
-				   &bp->hwrm_short_cmd_req_dma_addr,
-				   GFP_KERNEL);
-	if (!bp->hwrm_short_cmd_req_addr)
+	bp->hwrm_dma_pool = dma_pool_create("bnxt_hwrm", &pdev->dev,
+					    BNXT_HWRM_DMA_SIZE,
+					    BNXT_HWRM_DMA_ALIGN, 0);
+	if (!bp->hwrm_dma_pool)
 		return -ENOMEM;
 
 	return 0;
@@ -11654,12 +11634,6 @@ static int bnxt_fw_init_one_p1(struct bnxt *bp)
 			return rc;
 	}
 
-	if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
-	    bp->hwrm_max_ext_req_len > BNXT_HWRM_MAX_REQ_LEN) {
-		rc = bnxt_alloc_hwrm_short_cmd_req(bp);
-		if (rc)
-			return rc;
-	}
 	bnxt_nvm_cfg_ver_get(bp);
 
 	rc = bnxt_hwrm_func_reset(bp);
@@ -12588,7 +12562,6 @@ static void bnxt_remove_one(struct pci_dev *pdev)
 	bnxt_clear_int_mode(bp);
 	bnxt_hwrm_func_drv_unrgtr(bp);
 	bnxt_free_hwrm_resources(bp);
-	bnxt_free_hwrm_short_cmd_req(bp);
 	bnxt_ethtool_free(bp);
 	bnxt_dcb_free(bp);
 	kfree(bp->edev);
@@ -13188,7 +13161,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 init_err_pci_clean:
 	bnxt_hwrm_func_drv_unrgtr(bp);
-	bnxt_free_hwrm_short_cmd_req(bp);
 	bnxt_free_hwrm_resources(bp);
 	bnxt_ethtool_free(bp);
 	bnxt_ptp_clear(bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 5ff71eeffdd8a77085c32a3c6a97e750cbf1eb4e..79a78a7468f33eebbdbc7cad221059928a6fcde4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1881,10 +1881,9 @@ struct bnxt {
 	u16			hwrm_cmd_seq;
 	u16                     hwrm_cmd_kong_seq;
 	u16			hwrm_intr_seq_id;
-	void			*hwrm_short_cmd_req_addr;
-	dma_addr_t		hwrm_short_cmd_req_dma_addr;
 	void			*hwrm_cmd_resp_addr;
 	dma_addr_t		hwrm_cmd_resp_dma_addr;
+	struct dma_pool		*hwrm_dma_pool;
 
 	struct rtnl_link_stats64	net_stats_prev;
 	struct bnxt_stats_mem	port_stats;
@@ -1984,7 +1983,7 @@ struct bnxt {
 	struct mutex		sriov_lock;
 #endif
 
-#if BITS_PER_LONG == 32
+#ifndef writeq
 	/* ensure atomic 64-bit doorbell writes on 32-bit systems. */
 	spinlock_t		db_lock;
 #endif
@@ -2113,7 +2112,7 @@ static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr)
 		((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask);
 }
 
-#if BITS_PER_LONG == 32
+#ifndef writeq
 #define writeq(val64, db)			\
 do {						\
 	spin_lock(&bp->db_lock);		\
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c
index b2a211b6cdd096fcf65298f5e8046dbef70d6321..a45f2a61908613003ded99877609d66047986ebd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c
@@ -35,7 +35,220 @@ void bnxt_hwrm_cmd_hdr_init(struct bnxt *bp, void *request, u16 req_type,
 	req->resp_addr = cpu_to_le64(bp->hwrm_cmd_resp_dma_addr);
 }
 
-static int bnxt_hwrm_to_stderr(u32 hwrm_err)
+static u64 hwrm_calc_sentinel(struct bnxt_hwrm_ctx *ctx, u16 req_type)
+{
+	return (((uintptr_t)ctx) + req_type) ^ BNXT_HWRM_SENTINEL;
+}
+
+/**
+ * __hwrm_req_init() - Initialize an HWRM request.
+ * @bp: The driver context.
+ * @req: A pointer to the request pointer to initialize.
+ * @req_type: The request type. This will be converted to the little endian
+ *	before being written to the req_type field of the returned request.
+ * @req_len: The length of the request to be allocated.
+ *
+ * Allocate DMA resources and initialize a new HWRM request object of the
+ * given type. The response address field in the request is configured with
+ * the DMA bus address that has been mapped for the response and the passed
+ * request is pointed to kernel virtual memory mapped for the request (such
+ * that short_input indirection can be accomplished without copying). The
+ * request’s target and completion ring are initialized to default values and
+ * can be overridden by writing to the returned request object directly.
+ *
+ * The initialized request can be further customized by writing to its fields
+ * directly, taking care to covert such fields to little endian. The request
+ * object will be consumed (and all its associated resources release) upon
+ * passing it to hwrm_req_send() unless ownership of the request has been
+ * claimed by the caller via a call to hwrm_req_hold(). If the request is not
+ * consumed, either because it is never sent or because ownership has been
+ * claimed, then it must be released by a call to hwrm_req_drop().
+ *
+ * Return: zero on success, negative error code otherwise:
+ *	E2BIG: the type of request pointer is too large to fit.
+ *	ENOMEM: an allocation failure occurred.
+ */
+int __hwrm_req_init(struct bnxt *bp, void **req, u16 req_type, u32 req_len)
+{
+	struct bnxt_hwrm_ctx *ctx;
+	dma_addr_t dma_handle;
+	u8 *req_addr;
+
+	if (req_len > BNXT_HWRM_CTX_OFFSET)
+		return -E2BIG;
+
+	req_addr = dma_pool_alloc(bp->hwrm_dma_pool, GFP_KERNEL | __GFP_ZERO,
+				  &dma_handle);
+	if (!req_addr)
+		return -ENOMEM;
+
+	ctx = (struct bnxt_hwrm_ctx *)(req_addr + BNXT_HWRM_CTX_OFFSET);
+	/* safety first, sentinel used to check for invalid requests */
+	ctx->sentinel = hwrm_calc_sentinel(ctx, req_type);
+	ctx->req_len = req_len;
+	ctx->req = (struct input *)req_addr;
+	ctx->resp = (struct output *)(req_addr + BNXT_HWRM_RESP_OFFSET);
+	ctx->dma_handle = dma_handle;
+	ctx->flags = 0; /* __GFP_ZERO, but be explicit regarding ownership */
+	ctx->timeout = bp->hwrm_cmd_timeout ?: DFLT_HWRM_CMD_TIMEOUT;
+
+	/* initialize common request fields */
+	ctx->req->req_type = cpu_to_le16(req_type);
+	ctx->req->resp_addr = cpu_to_le64(dma_handle + BNXT_HWRM_RESP_OFFSET);
+	ctx->req->cmpl_ring = cpu_to_le16(BNXT_HWRM_NO_CMPL_RING);
+	ctx->req->target_id = cpu_to_le16(BNXT_HWRM_TARGET);
+	*req = ctx->req;
+
+	return 0;
+}
+
+static struct bnxt_hwrm_ctx *__hwrm_ctx(struct bnxt *bp, u8 *req_addr)
+{
+	void *ctx_addr = req_addr + BNXT_HWRM_CTX_OFFSET;
+	struct input *req = (struct input *)req_addr;
+	struct bnxt_hwrm_ctx *ctx = ctx_addr;
+	u64 sentinel;
+
+	if (!req) {
+		/* can only be due to software bug, be loud */
+		netdev_err(bp->dev, "null HWRM request");
+		dump_stack();
+		return NULL;
+	}
+
+	/* HWRM API has no type safety, verify sentinel to validate address */
+	sentinel = hwrm_calc_sentinel(ctx, le16_to_cpu(req->req_type));
+	if (ctx->sentinel != sentinel) {
+		/* can only be due to software bug, be loud */
+		netdev_err(bp->dev, "HWRM sentinel mismatch, req_type = %u\n",
+			   (u32)le16_to_cpu(req->req_type));
+		dump_stack();
+		return NULL;
+	}
+
+	return ctx;
+}
+
+/**
+ * hwrm_req_timeout() - Set the completion timeout for the request.
+ * @bp: The driver context.
+ * @req: The request to set the timeout.
+ * @timeout: The timeout in milliseconds.
+ *
+ * Set the timeout associated with the request for subsequent calls to
+ * hwrm_req_send(). Some requests are long running and require a different
+ * timeout than the default.
+ */
+void hwrm_req_timeout(struct bnxt *bp, void *req, unsigned int timeout)
+{
+	struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+
+	if (ctx)
+		ctx->timeout = timeout;
+}
+
+/**
+ * hwrm_req_flags() - Set non internal flags of the ctx
+ * @bp: The driver context.
+ * @req: The request containing the HWRM command
+ * @flags: ctx flags that don't have BNXT_HWRM_INTERNAL_FLAG set
+ *
+ * ctx flags can be used by the callers to instruct how the subsequent
+ * hwrm_req_send() should behave. Example: callers can use hwrm_req_flags
+ * with BNXT_HWRM_CTX_SILENT to omit kernel prints of errors of hwrm_req_send()
+ * or with BNXT_HWRM_FULL_WAIT enforce hwrm_req_send() to wait for full timeout
+ * even if FW is not responding.
+ * This generic function can be used to set any flag that is not an internal flag
+ * of the HWRM module.
+ */
+void hwrm_req_flags(struct bnxt *bp, void *req, enum bnxt_hwrm_ctx_flags flags)
+{
+	struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+
+	if (ctx)
+		ctx->flags |= (flags & HWRM_API_FLAGS);
+}
+
+/**
+ * hwrm_req_hold() - Claim ownership of the request's resources.
+ * @bp: The driver context.
+ * @req: A pointer to the request to own. The request will no longer be
+ *	consumed by calls to hwrm_req_send().
+ *
+ * Take ownership of the request. Ownership places responsibility on the
+ * caller to free the resources associated with the request via a call to
+ * hwrm_req_drop(). The caller taking ownership implies that a subsequent
+ * call to hwrm_req_send() will not consume the request (ie. sending will
+ * not free the associated resources if the request is owned by the caller).
+ * Taking ownership returns a reference to the response. Retaining and
+ * accessing the response data is the most common reason to take ownership
+ * of the request. Ownership can also be acquired in order to reuse the same
+ * request object across multiple invocations of hwrm_req_send().
+ *
+ * Return: A pointer to the response object.
+ *
+ * The resources associated with the response will remain available to the
+ * caller until ownership of the request is relinquished via a call to
+ * hwrm_req_drop(). It is not possible for hwrm_req_hold() to return NULL if
+ * a valid request is provided. A returned NULL value would imply a driver
+ * bug and the implementation will complain loudly in the logs to aid in
+ * detection. It should not be necessary to check the result for NULL.
+ */
+void *hwrm_req_hold(struct bnxt *bp, void *req)
+{
+	struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+	struct input *input = (struct input *)req;
+
+	if (!ctx)
+		return NULL;
+
+	if (ctx->flags & BNXT_HWRM_INTERNAL_CTX_OWNED) {
+		/* can only be due to software bug, be loud */
+		netdev_err(bp->dev, "HWRM context already owned, req_type = %u\n",
+			   (u32)le16_to_cpu(input->req_type));
+		dump_stack();
+		return NULL;
+	}
+
+	ctx->flags |= BNXT_HWRM_INTERNAL_CTX_OWNED;
+	return ((u8 *)req) + BNXT_HWRM_RESP_OFFSET;
+}
+
+static void __hwrm_ctx_drop(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx)
+{
+	void *addr = ((u8 *)ctx) - BNXT_HWRM_CTX_OFFSET;
+	dma_addr_t dma_handle = ctx->dma_handle; /* save before invalidate */
+
+	/* invalidate, ensure ownership, sentinel and dma_handle are cleared */
+	memset(ctx, 0, sizeof(struct bnxt_hwrm_ctx));
+
+	/* return the buffer to the DMA pool */
+	if (dma_handle)
+		dma_pool_free(bp->hwrm_dma_pool, addr, dma_handle);
+}
+
+/**
+ * hwrm_req_drop() - Release all resources associated with the request.
+ * @bp: The driver context.
+ * @req: The request to consume, releasing the associated resources. The
+ *	request object and its associated response are no longer valid.
+ *
+ * It is legal to call hwrm_req_drop() on an unowned request, provided it
+ * has not already been consumed by hwrm_req_send() (for example, to release
+ * an aborted request). A given request should not be dropped more than once,
+ * nor should it be dropped after having been consumed by hwrm_req_send(). To
+ * do so is an error (the context will not be found and a stack trace will be
+ * rendered in the kernel log).
+ */
+void hwrm_req_drop(struct bnxt *bp, void *req)
+{
+	struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+
+	if (ctx)
+		__hwrm_ctx_drop(bp, ctx);
+}
+
+static int __hwrm_to_stderr(u32 hwrm_err)
 {
 	switch (hwrm_err) {
 	case HWRM_ERR_CODE_SUCCESS:
@@ -64,78 +277,71 @@ static int bnxt_hwrm_to_stderr(u32 hwrm_err)
 	}
 }
 
-static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
-				 int timeout, bool silent)
+static int __hwrm_send(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx)
 {
-	int i, intr_process, rc, tmo_count;
-	struct input *req = msg;
-	u32 *data = msg;
-	u8 *valid;
-	u16 cp_ring_id, len = 0;
-	struct hwrm_err_output *resp = bp->hwrm_cmd_resp_addr;
-	u16 max_req_len = BNXT_HWRM_MAX_REQ_LEN;
-	struct hwrm_short_input short_input = {0};
 	u32 doorbell_offset = BNXT_GRCPF_REG_CHIMP_COMM_TRIGGER;
 	u32 bar_offset = BNXT_GRCPF_REG_CHIMP_COMM;
+	struct hwrm_short_input short_input = {0};
+	u16 max_req_len = BNXT_HWRM_MAX_REQ_LEN;
+	unsigned int i, timeout, tmo_count;
 	u16 dst = BNXT_HWRM_CHNL_CHIMP;
+	int intr_process, rc = -EBUSY;
+	u32 *data = (u32 *)ctx->req;
+	u32 msg_len = ctx->req_len;
+	u16 cp_ring_id, len = 0;
+	u32 req_type;
+	u8 *valid;
 
-	if (BNXT_NO_FW_ACCESS(bp) &&
-	    le16_to_cpu(req->req_type) != HWRM_FUNC_RESET)
-		return -EBUSY;
+	if (ctx->flags & BNXT_HWRM_INTERNAL_RESP_DIRTY)
+		memset(ctx->resp, 0, PAGE_SIZE);
 
-	if (msg_len > BNXT_HWRM_MAX_REQ_LEN) {
-		if (msg_len > bp->hwrm_max_ext_req_len ||
-		    !bp->hwrm_short_cmd_req_addr)
-			return -EINVAL;
+	req_type = le16_to_cpu(ctx->req->req_type);
+	if (BNXT_NO_FW_ACCESS(bp) && req_type != HWRM_FUNC_RESET)
+		goto exit;
+
+	if (msg_len > BNXT_HWRM_MAX_REQ_LEN &&
+	    msg_len > bp->hwrm_max_ext_req_len) {
+		rc = -E2BIG;
+		goto exit;
 	}
 
-	if (bnxt_kong_hwrm_message(bp, req)) {
+	if (bnxt_kong_hwrm_message(bp, ctx->req)) {
 		dst = BNXT_HWRM_CHNL_KONG;
 		bar_offset = BNXT_GRCPF_REG_KONG_COMM;
 		doorbell_offset = BNXT_GRCPF_REG_KONG_COMM_TRIGGER;
+		if (le16_to_cpu(ctx->req->cmpl_ring) != INVALID_HW_RING_ID) {
+			netdev_err(bp->dev, "Ring completions not supported for KONG commands, req_type = %d\n",
+				   req_type);
+			rc = -EINVAL;
+			goto exit;
+		}
 	}
 
-	memset(resp, 0, PAGE_SIZE);
-	cp_ring_id = le16_to_cpu(req->cmpl_ring);
+	cp_ring_id = le16_to_cpu(ctx->req->cmpl_ring);
 	intr_process = (cp_ring_id == INVALID_HW_RING_ID) ? 0 : 1;
 
-	req->seq_id = cpu_to_le16(bnxt_get_hwrm_seq_id(bp, dst));
+	ctx->req->seq_id = cpu_to_le16(bnxt_get_hwrm_seq_id(bp, dst));
 	/* currently supports only one outstanding message */
 	if (intr_process)
-		bp->hwrm_intr_seq_id = le16_to_cpu(req->seq_id);
+		bp->hwrm_intr_seq_id = le16_to_cpu(ctx->req->seq_id);
 
 	if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
 	    msg_len > BNXT_HWRM_MAX_REQ_LEN) {
-		void *short_cmd_req = bp->hwrm_short_cmd_req_addr;
-		u16 max_msg_len;
-
-		/* Set boundary for maximum extended request length for short
-		 * cmd format. If passed up from device use the max supported
-		 * internal req length.
-		 */
-		max_msg_len = bp->hwrm_max_ext_req_len;
-
-		memcpy(short_cmd_req, req, msg_len);
-		if (msg_len < max_msg_len)
-			memset(short_cmd_req + msg_len, 0,
-			       max_msg_len - msg_len);
-
-		short_input.req_type = req->req_type;
+		short_input.req_type = ctx->req->req_type;
 		short_input.signature =
 				cpu_to_le16(SHORT_REQ_SIGNATURE_SHORT_CMD);
 		short_input.size = cpu_to_le16(msg_len);
-		short_input.req_addr =
-			cpu_to_le64(bp->hwrm_short_cmd_req_dma_addr);
+		short_input.req_addr = cpu_to_le64(ctx->dma_handle);
 
 		data = (u32 *)&short_input;
 		msg_len = sizeof(short_input);
 
-		/* Sync memory write before updating doorbell */
-		wmb();
-
 		max_req_len = BNXT_HWRM_SHORT_REQ_LEN;
 	}
 
+	/* Ensure any associated DMA buffers are written before doorbell */
+	wmb();
+
 	/* Write request msg to hwrm channel */
 	__iowrite32_copy(bp->bar0 + bar_offset, data, msg_len / 4);
 
@@ -145,13 +351,13 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 	/* Ring channel doorbell */
 	writel(1, bp->bar0 + doorbell_offset);
 
-	if (!pci_is_enabled(bp->pdev))
-		return -ENODEV;
+	if (!pci_is_enabled(bp->pdev)) {
+		rc = -ENODEV;
+		goto exit;
+	}
 
-	if (!timeout)
-		timeout = DFLT_HWRM_CMD_TIMEOUT;
 	/* Limit timeout to an upper limit */
-	timeout = min(timeout, HWRM_CMD_MAX_TIMEOUT);
+	timeout = min_t(uint, ctx->timeout, HWRM_CMD_MAX_TIMEOUT);
 	/* convert timeout to usec */
 	timeout *= 1000;
 
@@ -174,13 +380,13 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 			 * check has failed.
 			 */
 			if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
-				return -EBUSY;
+				goto exit;
 			/* on first few passes, just barely sleep */
 			if (i < HWRM_SHORT_TIMEOUT_COUNTER) {
 				usleep_range(HWRM_SHORT_MIN_TIMEOUT,
 					     HWRM_SHORT_MAX_TIMEOUT);
 			} else {
-				if (HWRM_WAIT_MUST_ABORT(bp, req))
+				if (HWRM_WAIT_MUST_ABORT(bp, ctx))
 					break;
 				usleep_range(HWRM_MIN_TIMEOUT,
 					     HWRM_MAX_TIMEOUT);
@@ -188,13 +394,13 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 		}
 
 		if (bp->hwrm_intr_seq_id != (u16)~seq_id) {
-			if (!silent)
+			if (!(ctx->flags & BNXT_HWRM_CTX_SILENT))
 				netdev_err(bp->dev, "Resp cmpl intr err msg: 0x%x\n",
-					   le16_to_cpu(req->req_type));
-			return -EBUSY;
+					   le16_to_cpu(ctx->req->req_type));
+			goto exit;
 		}
-		len = le16_to_cpu(resp->resp_len);
-		valid = ((u8 *)resp) + len - 1;
+		len = le16_to_cpu(ctx->resp->resp_len);
+		valid = ((u8 *)ctx->resp) + len - 1;
 	} else {
 		int j;
 
@@ -204,8 +410,8 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 			 * check has failed.
 			 */
 			if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
-				return -EBUSY;
-			len = le16_to_cpu(resp->resp_len);
+				goto exit;
+			len = le16_to_cpu(ctx->resp->resp_len);
 			if (len)
 				break;
 			/* on first few passes, just barely sleep */
@@ -213,7 +419,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 				usleep_range(HWRM_SHORT_MIN_TIMEOUT,
 					     HWRM_SHORT_MAX_TIMEOUT);
 			} else {
-				if (HWRM_WAIT_MUST_ABORT(bp, req))
+				if (HWRM_WAIT_MUST_ABORT(bp, ctx))
 					goto timeout_abort;
 				usleep_range(HWRM_MIN_TIMEOUT,
 					     HWRM_MAX_TIMEOUT);
@@ -222,16 +428,16 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 
 		if (i >= tmo_count) {
 timeout_abort:
-			if (!silent)
-				netdev_err(bp->dev, "Error (timeout: %d) msg {0x%x 0x%x} len:%d\n",
-					   HWRM_TOTAL_TIMEOUT(i),
-					   le16_to_cpu(req->req_type),
-					   le16_to_cpu(req->seq_id), len);
-			return -EBUSY;
+			if (!(ctx->flags & BNXT_HWRM_CTX_SILENT))
+				netdev_err(bp->dev, "Error (timeout: %u) msg {0x%x 0x%x} len:%d\n",
+					   hwrm_total_timeout(i),
+					   le16_to_cpu(ctx->req->req_type),
+					   le16_to_cpu(ctx->req->seq_id), len);
+			goto exit;
 		}
 
 		/* Last byte of resp contains valid bit */
-		valid = ((u8 *)resp) + len - 1;
+		valid = ((u8 *)ctx->resp) + len - 1;
 		for (j = 0; j < HWRM_VALID_BIT_DELAY_USEC; j++) {
 			/* make sure we read from updated DMA memory */
 			dma_rmb();
@@ -241,13 +447,13 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 		}
 
 		if (j >= HWRM_VALID_BIT_DELAY_USEC) {
-			if (!silent)
-				netdev_err(bp->dev, "Error (timeout: %d) msg {0x%x 0x%x} len:%d v:%d\n",
-					   HWRM_TOTAL_TIMEOUT(i),
-					   le16_to_cpu(req->req_type),
-					   le16_to_cpu(req->seq_id), len,
+			if (!(ctx->flags & BNXT_HWRM_CTX_SILENT))
+				netdev_err(bp->dev, "Error (timeout: %u) msg {0x%x 0x%x} len:%d v:%d\n",
+					   hwrm_total_timeout(i),
+					   le16_to_cpu(ctx->req->req_type),
+					   le16_to_cpu(ctx->req->seq_id), len,
 					   *valid);
-			return -EBUSY;
+			goto exit;
 		}
 	}
 
@@ -256,12 +462,53 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 	 * a new field not implemented by old spec will read zero.
 	 */
 	*valid = 0;
-	rc = le16_to_cpu(resp->error_code);
-	if (rc && !silent)
+	rc = le16_to_cpu(ctx->resp->error_code);
+	if (rc && !(ctx->flags & BNXT_HWRM_CTX_SILENT)) {
 		netdev_err(bp->dev, "hwrm req_type 0x%x seq id 0x%x error 0x%x\n",
-			   le16_to_cpu(resp->req_type),
-			   le16_to_cpu(resp->seq_id), rc);
-	return bnxt_hwrm_to_stderr(rc);
+			   le16_to_cpu(ctx->resp->req_type),
+			   le16_to_cpu(ctx->resp->seq_id), rc);
+	}
+	rc = __hwrm_to_stderr(rc);
+exit:
+	if (ctx->flags & BNXT_HWRM_INTERNAL_CTX_OWNED)
+		ctx->flags |= BNXT_HWRM_INTERNAL_RESP_DIRTY;
+	else
+		__hwrm_ctx_drop(bp, ctx);
+	return rc;
+}
+
+static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
+				 int timeout, bool silent)
+{
+	struct bnxt_hwrm_ctx default_ctx = {0};
+	struct bnxt_hwrm_ctx *ctx = &default_ctx;
+	struct input *req = msg;
+	int rc;
+
+	if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
+	    msg_len > BNXT_HWRM_MAX_REQ_LEN) {
+		rc = __hwrm_req_init(bp, (void **)&req,
+				     le16_to_cpu(req->req_type), msg_len);
+		if (rc)
+			return rc;
+		memcpy(req, msg, msg_len); /* also copies resp_addr */
+		ctx = __hwrm_ctx(bp, (u8 *)req);
+		/* belts and brances, NULL ctx shouldn't be possible here */
+		if (!ctx)
+			return -ENOMEM;
+	}
+
+	ctx->req = req;
+	ctx->req_len = msg_len;
+	ctx->resp = bp->hwrm_cmd_resp_addr;
+	/* global response is not reallocated __GFP_ZERO between requests */
+	ctx->flags = BNXT_HWRM_INTERNAL_RESP_DIRTY;
+	ctx->timeout = timeout ?: DFLT_HWRM_CMD_TIMEOUT;
+	if (silent)
+		ctx->flags |= BNXT_HWRM_CTX_SILENT;
+
+	/* will consume req if allocated with __hwrm_req_init() */
+	return __hwrm_send(bp, ctx);
 }
 
 int _hwrm_send_message(struct bnxt *bp, void *msg, u32 msg_len, int timeout)
@@ -295,3 +542,64 @@ int hwrm_send_message_silent(struct bnxt *bp, void *msg, u32 msg_len,
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
 }
+
+/**
+ * hwrm_req_send() - Execute an HWRM command.
+ * @bp: The driver context.
+ * @req: A pointer to the request to send. The DMA resources associated with
+ *	the request will be released (ie. the request will be consumed) unless
+ *	ownership of the request has been assumed by the caller via a call to
+ *	hwrm_req_hold().
+ *
+ * Send an HWRM request to the device and wait for a response. The request is
+ * consumed if it is not owned by the caller. This function will block until
+ * the request has either completed or times out due to an error.
+ *
+ * Return: A result code.
+ *
+ * The result is zero on success, otherwise the negative error code indicates
+ * one of the following errors:
+ *	E2BIG: The request was too large.
+ *	EBUSY: The firmware is in a fatal state or the request timed out
+ *	EACCESS: HWRM access denied.
+ *	ENOSPC: HWRM resource allocation error.
+ *	EINVAL: Request parameters are invalid.
+ *	ENOMEM: HWRM has no buffers.
+ *	EAGAIN: HWRM busy or reset in progress.
+ *	EOPNOTSUPP: Invalid request type.
+ *	EIO: Any other error.
+ * Error handling is orthogonal to request ownership. An unowned request will
+ * still be consumed on error. If the caller owns the request, then the caller
+ * is responsible for releasing the resources. Otherwise, hwrm_req_send() will
+ * always consume the request.
+ */
+int hwrm_req_send(struct bnxt *bp, void *req)
+{
+	struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+	int rc;
+
+	if (!ctx)
+		return -EINVAL;
+
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = __hwrm_send(bp, ctx);
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
+/**
+ * hwrm_req_send_silent() - A silent version of hwrm_req_send().
+ * @bp: The driver context.
+ * @req: The request to send without logging.
+ *
+ * The same as hwrm_req_send(), except that the request is silenced using
+ * hwrm_req_silence() prior the call. This version of the function is
+ * provided solely to preserve the legacy API’s flavor for this functionality.
+ *
+ * Return: A result code, see hwrm_req_send().
+ */
+int hwrm_req_send_silent(struct bnxt *bp, void *req)
+{
+	hwrm_req_flags(bp, req, BNXT_HWRM_CTX_SILENT);
+	return hwrm_req_send(bp, req);
+}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h
index 940c792b54c7806acf34c7508eecc7bc7b4b71bc..199c646f5e71293b685d65e1590decf87c78dca5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h
@@ -12,6 +12,26 @@
 
 #include "bnxt_hsi.h"
 
+enum bnxt_hwrm_ctx_flags {
+	/* Update the HWRM_API_FLAGS right below for any new non-internal bit added here */
+	BNXT_HWRM_INTERNAL_CTX_OWNED	= BIT(0), /* caller owns the context */
+	BNXT_HWRM_INTERNAL_RESP_DIRTY	= BIT(1), /* response contains data */
+	BNXT_HWRM_CTX_SILENT		= BIT(2), /* squelch firmware errors */
+	BNXT_HWRM_FULL_WAIT		= BIT(3), /* wait for full timeout of HWRM command */
+};
+
+#define HWRM_API_FLAGS (BNXT_HWRM_CTX_SILENT | BNXT_HWRM_FULL_WAIT)
+
+struct bnxt_hwrm_ctx {
+	u64 sentinel;
+	dma_addr_t dma_handle;
+	struct output *resp;
+	struct input *req;
+	u32 req_len;
+	enum bnxt_hwrm_ctx_flags flags;
+	unsigned int timeout;
+};
+
 #define BNXT_HWRM_MAX_REQ_LEN		(bp->hwrm_max_req_len)
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 #define HWRM_CMD_MAX_TIMEOUT		40000
@@ -19,7 +39,17 @@
 #define HWRM_CMD_TIMEOUT		(bp->hwrm_cmd_timeout)
 #define HWRM_RESET_TIMEOUT		((HWRM_CMD_TIMEOUT) * 4)
 #define HWRM_COREDUMP_TIMEOUT		((HWRM_CMD_TIMEOUT) * 12)
+#define BNXT_HWRM_TARGET		0xffff
+#define BNXT_HWRM_NO_CMPL_RING		-1
 #define BNXT_HWRM_REQ_MAX_SIZE		128
+#define BNXT_HWRM_DMA_SIZE		(2 * PAGE_SIZE) /* space for req+resp */
+#define BNXT_HWRM_RESP_RESERVED		PAGE_SIZE
+#define BNXT_HWRM_RESP_OFFSET		(BNXT_HWRM_DMA_SIZE -		\
+					 BNXT_HWRM_RESP_RESERVED)
+#define BNXT_HWRM_CTX_OFFSET		(BNXT_HWRM_RESP_OFFSET -	\
+					 sizeof(struct bnxt_hwrm_ctx))
+#define BNXT_HWRM_DMA_ALIGN		16
+#define BNXT_HWRM_SENTINEL		0xb6e1f68a12e9a7eb /* arbitrary value */
 #define BNXT_HWRM_REQS_PER_PAGE		(BNXT_PAGE_SIZE /	\
 					 BNXT_HWRM_REQ_MAX_SIZE)
 #define HWRM_SHORT_MIN_TIMEOUT		3
@@ -29,14 +59,17 @@
 #define HWRM_MIN_TIMEOUT		25
 #define HWRM_MAX_TIMEOUT		40
 
-#define HWRM_WAIT_MUST_ABORT(bp, req)					\
-	(le16_to_cpu((req)->req_type) != HWRM_VER_GET &&		\
+#define HWRM_WAIT_MUST_ABORT(bp, ctx)					\
+	(le16_to_cpu((ctx)->req->req_type) != HWRM_VER_GET &&		\
 	 !bnxt_is_fw_healthy(bp))
 
-#define HWRM_TOTAL_TIMEOUT(n)	(((n) <= HWRM_SHORT_TIMEOUT_COUNTER) ?	\
-	((n) * HWRM_SHORT_MIN_TIMEOUT) :				\
-	(HWRM_SHORT_TIMEOUT_COUNTER * HWRM_SHORT_MIN_TIMEOUT +		\
-	 ((n) - HWRM_SHORT_TIMEOUT_COUNTER) * HWRM_MIN_TIMEOUT))
+static inline unsigned int hwrm_total_timeout(unsigned int n)
+{
+	return n <= HWRM_SHORT_TIMEOUT_COUNTER ? n * HWRM_SHORT_MIN_TIMEOUT :
+		HWRM_SHORT_TIMEOUT_COUNTER * HWRM_SHORT_MIN_TIMEOUT +
+		(n - HWRM_SHORT_TIMEOUT_COUNTER) * HWRM_MIN_TIMEOUT;
+}
+
 
 #define HWRM_VALID_BIT_DELAY_USEC	150
 
@@ -97,4 +130,13 @@ int _hwrm_send_message(struct bnxt *bp, void *msg, u32 len, int timeout);
 int _hwrm_send_message_silent(struct bnxt *bp, void *msg, u32 len, int timeout);
 int hwrm_send_message(struct bnxt *bp, void *msg, u32 len, int timeout);
 int hwrm_send_message_silent(struct bnxt *bp, void *msg, u32 len, int timeout);
+int __hwrm_req_init(struct bnxt *bp, void **req, u16 req_type, u32 req_len);
+#define hwrm_req_init(bp, req, req_type) \
+	__hwrm_req_init((bp), (void **)&(req), (req_type), sizeof(*(req)))
+void *hwrm_req_hold(struct bnxt *bp, void *req);
+void hwrm_req_drop(struct bnxt *bp, void *req);
+void hwrm_req_flags(struct bnxt *bp, void *req, enum bnxt_hwrm_ctx_flags flags);
+void hwrm_req_timeout(struct bnxt *bp, void *req, unsigned int timeout);
+int hwrm_req_send(struct bnxt *bp, void *req);
+int hwrm_req_send_silent(struct bnxt *bp, void *req);
 #endif