diff --git a/iscsi-scst/Makefile b/iscsi-scst/Makefile index 81a7e79e..b9f16e35 100644 --- a/iscsi-scst/Makefile +++ b/iscsi-scst/Makefile @@ -24,6 +24,7 @@ RCDIR := /etc/rc.d MANDIR ?= $(PREFIX)/man KMOD := $(shell pwd)/kernel INCDIR := $(shell pwd)/include +ISERTMOD := $(KMOD)/isert-scst ifeq ($(KVER),) ifeq ($(KDIR),) @@ -50,8 +51,58 @@ INSTALL_DIR := $(INSTALL_MOD_PATH)/lib/modules/$(KVER)/extra all: include/iscsi_scst_itf_ver.h progs mods +ISER_SYMVERS:=$(KMOD)/Module.symvers +OFED_CFLAGS:= + +OFED_FLAVOR=$(shell if [ -e /usr/bin/ofed_info ]; then /usr/bin/ofed_info 2>/dev/null | head -n1 | sed -n 's/^\(MLNX_OFED\|OFED-internal\).*/MOFED/p;s/^OFED-.*/OFED/p'; else echo in-tree; fi) + +ifeq ($(OFED_FLAVOR),MOFED) + # Whether MLNX_OFED for ubuntu has been installed + MLNX_OFED_IB_UBUNTU_INSTALLED:=$(shell if dpkg -s mlnx-ofed-kernel-dkms >/dev/null 2>/dev/null; then echo true; else echo false; fi) + + # Whether MLNX_OFED for RedHat has been installed + MLNX_OFED_IB_RH_INSTALLED:=$(shell if rpm -q mlnx-ofa_kernel-devel >&/dev/null; then echo true; else echo false; fi) + + # Check if we have custom compiled kernel modules + ifeq ($(MLNX_OFED_IB_RH_INSTALLED),false) + MLNX_OFED_IB_RH_INSTALLED:=$(shell if rpm -q kernel-ib-devel >&/dev/null; then echo true; else echo false; fi) + endif + + ifeq ($(MLNX_OFED_IB_UBUNTU_INSTALLED),true) + OFED_VERS=$(shell dpkg -s mlnx-ofed-kernel-dkms | awk -F\- '/Version/ {print $$1}' | awk '{print $$2}') + OFED_CFLAGS:=-I/var/lib/dkms/mlnx-ofed-kernel/$(OFED_VERS)/build/include -include /var/lib/dkms/mlnx-ofed-kernel/$(OFED_VERS)/build/include/linux/compat-2.6.h + ISER_SYMVERS:="$(ISER_SYMVERS) /var/lib/dkms/mlnx-ofed-kernel/$(OFED_VERS)/build/Module.symvers" + endif + + ifeq ($(MLNX_OFED_IB_RH_INSTALLED),true) + OFED_CFLAGS:=-I/usr/src/ofa_kernel/default/include -include /usr/src/ofa_kernel/default/include/linux/compat-2.6.h + ISER_SYMVERS:="$(ISER_SYMVERS) /usr/src/ofa_kernel/default/Module.symvers" + endif +else + # Whether or not the OFED kernel-ib-devel RPM has been installed. + OFED_KERNEL_IB_DEVEL_RPM_INSTALLED:=$(shell if rpm -q kernel-ib-devel 2>/dev/null | grep -q $$(uname -r | sed 's/-/_/g'); then echo true; else echo false; fi) + + # Whether or not the OFED compat-rdma-devel RPM has been installed. + OFED_COMPAT_RDMA_DEVEL_RPM_INSTALLED:=$(shell if rpm -q compat-rdma-devel 2>/dev/null | grep -q $$(uname -r | sed 's/-/_/g'); then echo true; else echo false; fi) + + ifeq ($(OFED_KERNEL_IB_DEVEL_RPM_INSTALLED),true) + # Read OFED's config.mk, which contains the definition of the variable + # BACKPORT_INCLUDES. + include /usr/src/ofa_kernel/config.mk + OFED_CFLAGS:=$(shell echo $(BACKPORT_INCLUDES) -I/usr/src/ofa_kernel/include) + ISER_SYMVERS:="$(ISER_SYMVERS) /usr/src/ofa_kernel/Module.symvers" + endif + + ifeq ($(OFED_COMPAT_RDMA_DEVEL_RPM_INSTALLED),true) + OFED_CFLAGS:=-I/usr/src/compat-rdma/include -include /usr/src/compat-rdma/include/linux/compat-2.6.h + ISER_SYMVERS:="$(ISER_SYMVERS) /usr/src/compat-rdma/Module.symvers" + endif +endif + mods: Modules.symvers Module.symvers + echo " Building against $(OFED_FLAVOR) InfiniBand kernel headers." $(MAKE) -C $(KDIR) SCST_INC_DIR=$(SCST_INC_DIR) SUBDIRS=$(KMOD) modules + $(MAKE) -C $(KDIR) SCST_INC_DIR=$(SCST_INC_DIR) SUBDIRS=$(ISERTMOD) PRE_CFLAGS="$(OFED_CFLAGS) -DOFED_FLAVOR=$(OFED_FLAVOR)" KBUILD_EXTRA_SYMBOLS=$(ISER_SYMVERS) modules progs: $(MAKE) -C usr SCST_INC_DIR=$(SCST_INC_DIR) @@ -72,6 +123,9 @@ install: all $(MAKE) -C $(KDIR) SCST_INC_DIR=$(SCST_INC_DIR) SUBDIRS=$(KMOD) \ $$([ -n "$(DESTDIR)$(INSTALL_MOD_PATH)" ] && echo DEPMOD=true) \ modules_install + $(MAKE) -C $(KDIR) SCST_INC_DIR=$(SCST_INC_DIR) SUBDIRS=$(ISERTMOD) \ + $$([ -n "$(DESTDIR)$(INSTALL_MOD_PATH)" ] && echo DEPMOD=true) \ + modules_install uninstall: rm -f $(DESTDIR)$(SBINDIR)/iscsi-scstd \ @@ -79,7 +133,8 @@ uninstall: $(DESTDIR)$(MANDIR)/man8/iscsi-scstd.8 \ $(DESTDIR)$(SBINDIR)/iscsi-scst-adm \ $(DESTDIR)$(MANDIR)/man8/iscsi-scst-adm.8 \ - $(INSTALL_DIR)/iscsi-scst.ko + $(INSTALL_DIR)/iscsi-scst.ko \ + $(INSTALL_DIR)/isert-scst.ko -/sbin/depmod -b $(INSTALL_MOD_PATH)/ -a $(KVER) SCST_MOD_VERS := $(shell ls $(SCST_DIR)/Modules.symvers 2>/dev/null) @@ -87,6 +142,7 @@ ifneq ($(SCST_MOD_VERS),) Modules.symvers: $(SCST_DIR)/Modules.symvers echo $(SCST_MOD_VERS) cp $(SCST_DIR)/Modules.symvers kernel/ + cp $(SCST_DIR)/Modules.symvers kernel/isert-scst else .PHONY: Modules.symvers endif @@ -96,6 +152,7 @@ SCST_MOD_VERS := $(shell ls $(SCST_DIR)/Module.symvers 2>/dev/null) ifneq ($(SCST_MOD_VERS),) Module.symvers: $(SCST_DIR)/Module.symvers cp $(SCST_DIR)/Module.symvers kernel/ + cp $(SCST_DIR)/Module.symvers kernel/isert-scst else .PHONY: Module.symvers endif @@ -103,17 +160,24 @@ endif clean: $(MAKE) -C usr $@ $(MAKE) -C $(KDIR) SUBDIRS=$(KMOD) $@ + $(MAKE) -C $(KDIR) SUBDIRS=$(ISERTMOD) $@ rm -f kernel/Modules.symvers kernel/Module.symvers \ kernel/Module.markers kernel/modules.order \ + kernel/isert-scst/Modules.symvers kernel/isert-scst/Module.symvers \ + kernel/isert-scst/Module.markers kernel/isert-scst/modules.order \ include/iscsi_scst_itf_ver.h extraclean: $(MAKE) -C usr $@ $(MAKE) -C $(KDIR) SUBDIRS=$(KMOD) clean + $(MAKE) -C $(KDIR) SUBDIRS=$(ISERTMOD) clean rm -f kernel/Modules.symvers kernel/Module.symvers \ kernel/Module.markers kernel/modules.order \ + kernel/isert-scst/Modules.symvers kernel/isert-scst/Module.symvers \ + kernel/isert-scst/Module.markers kernel/isert-scst/modules.order \ include/iscsi_scst_itf_ver.h \ - kernel/*.orig kernel/*.rej + kernel/*.orig kernel/*.rej \ + kernel/isert-scst/*.orig kernel/isert-scst/*.rej 2release: sed -i.aa s/"^E\?XTRA_CFLAGS += \-DCONFIG_SCST_EXTRACHECKS"/"#EXTRA_CFLAGS += \-DCONFIG_SCST_EXTRACHECKS"/ $(KMOD)/Makefile @@ -123,6 +187,13 @@ extraclean: sed -i.aa s/"^E\?XTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions"/"#EXTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions"/ $(KMOD)/Makefile grep "^#EXTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions" $(KMOD)/Makefile >/dev/null rm $(KMOD)/Makefile.aa + sed -i.aa s/"^E\?XTRA_CFLAGS += \-DCONFIG_SCST_EXTRACHECKS"/"#EXTRA_CFLAGS += \-DCONFIG_SCST_EXTRACHECKS"/ $(ISERTMOD)/Makefile + grep "^#EXTRA_CFLAGS += \-DCONFIG_SCST_EXTRACHECKS" $(ISERTMOD)/Makefile >/dev/null + sed -i.aa s/"^#\?EXTRA_CFLAGS += \-DCONFIG_SCST_TRACING"/"EXTRA_CFLAGS += \-DCONFIG_SCST_TRACING"/ $(ISERTMOD)/Makefile + grep "^EXTRA_CFLAGS += \-DCONFIG_SCST_TRACING" $(ISERTMOD)/Makefile >/dev/null + sed -i.aa s/"^E\?XTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions"/"#EXTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions"/ $(ISERTMOD)/Makefile + grep "^#EXTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions" $(ISERTMOD)/Makefile >/dev/null + rm $(ISERTMOD)/Makefile.aa 2debug: sed -i.aa s/"^#\?EXTRA_CFLAGS += \-DCONFIG_SCST_EXTRACHECKS"/"EXTRA_CFLAGS += \-DCONFIG_SCST_EXTRACHECKS"/ $(KMOD)/Makefile @@ -132,6 +203,13 @@ extraclean: sed -i.aa s/"^#\?EXTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions"/"EXTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions"/ $(KMOD)/Makefile grep "^EXTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions" $(KMOD)/Makefile >/dev/null rm $(KMOD)/Makefile.aa + sed -i.aa s/"^#\?EXTRA_CFLAGS += \-DCONFIG_SCST_EXTRACHECKS"/"EXTRA_CFLAGS += \-DCONFIG_SCST_EXTRACHECKS"/ $(ISERTMOD)/Makefile + grep "^EXTRA_CFLAGS += \-DCONFIG_SCST_EXTRACHECKS" $(ISERTMOD)/Makefile >/dev/null + sed -i.aa s/"^E\?XTRA_CFLAGS += \-DCONFIG_SCST_TRACING"/"#EXTRA_CFLAGS += \-DCONFIG_SCST_TRACING"/ $(ISERTMOD)/Makefile + grep "^#EXTRA_CFLAGS += \-DCONFIG_SCST_TRACING" $(ISERTMOD)/Makefile >/dev/null + sed -i.aa s/"^#\?EXTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions"/"EXTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions"/ $(ISERTMOD)/Makefile + grep "^EXTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions" $(ISERTMOD)/Makefile >/dev/null + rm $(ISERTMOD)/Makefile.aa 2perf: sed -i.aa s/"^E\?XTRA_CFLAGS += \-DCONFIG_SCST_EXTRACHECKS"/"#EXTRA_CFLAGS += \-DCONFIG_SCST_EXTRACHECKS"/ $(KMOD)/Makefile @@ -141,6 +219,13 @@ extraclean: sed -i.aa s/"^E\?XTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions"/"#EXTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions"/ $(KMOD)/Makefile grep "^#EXTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions" $(KMOD)/Makefile >/dev/null rm $(KMOD)/Makefile.aa + sed -i.aa s/"^E\?XTRA_CFLAGS += \-DCONFIG_SCST_EXTRACHECKS"/"#EXTRA_CFLAGS += \-DCONFIG_SCST_EXTRACHECKS"/ $(ISERTMOD)/Makefile + grep "^#EXTRA_CFLAGS += \-DCONFIG_SCST_EXTRACHECKS" $(ISERTMOD)/Makefile >/dev/null + sed -i.aa s/"^E\?XTRA_CFLAGS += \-DCONFIG_SCST_TRACING"/"#EXTRA_CFLAGS += \-DCONFIG_SCST_TRACING"/ $(ISERTMOD)/Makefile + grep "^#EXTRA_CFLAGS += \-DCONFIG_SCST_TRACING" $(ISERTMOD)/Makefile >/dev/null + sed -i.aa s/"^E\?XTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions"/"#EXTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions"/ $(ISERTMOD)/Makefile + grep "^#EXTRA_CFLAGS += \-DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions" $(ISERTMOD)/Makefile >/dev/null + rm $(ISERTMOD)/Makefile.aa disable_proc: sed -i.aa s/"^#\?define CONFIG_SCST_PROC"/"\/* #define CONFIG_SCST_PROC *\/"/ $(INCDIR)/iscsi_scst_ver.h diff --git a/iscsi-scst/README.iser b/iscsi-scst/README.iser new file mode 100644 index 00000000..a52d1a8b --- /dev/null +++ b/iscsi-scst/README.iser @@ -0,0 +1,123 @@ +iSCSI extensions for RDMA driver +================================ + +Installation & Configuration: +--------------------------- +For installation and configuration, see iscsi README. +There are no specific configuration options for iSER. +See below for performance optimizations as well as troubleshooting. +There is also a HOWTO on http://community.mellanox.com/docs/DOC-1479 + +Performance considerations: +--------------------------- + +In order to achieve better performance, it is recommended to specify +"QueuedCommands 128" parameter per iSER target, since the transport +is very fast and you usually want to connect it to fast backstorage. + +For performance tuning of initiator and target machines, see +http://community.mellanox.com/docs/DOC-1483 + +Note that if you have an SSD controller that is close to a particular +NUMA node, you want the HCA to be close to the same node. + +Limitations: +------------- +* Bidirectional commands are not supported +* Block size over 512KB is not supported +* Maximum number of concurent login requests that can be handled is 127 by default. + Note that there may be more connections, but only up to 127 login requests + can be handled at the same time. If you wish to increase this, load isert_scst with + module parameter isert_nr_devs set to the number of login requests you need to handle. + + +Troubleshooting: +----------------- +* Initiator fails to connect to target. The following message is seen in dmesg: + Failed to accept conn request, err: -22 + The cause of this is often compilation issues if you have OFED or MLNX_OFED installed: + If you are compiling for OFED/MLNX_OFED, make sure OFED is installed for + the kernel you are running. Also, make sure you followed ALL steps described + in README.iser_ofed. + If you are compiling for non-OFED kernel, make sure you don't have + OFED/MLNX_OFED installed. + + +* Discovery of iSER targets takes a long time or login to all discovered targets fails. + iSCSI discovery does not have a way to determine between iSCSI and iSER + enabled portals. Thus, initiator tries to connect to all interfaces it + discovered (by default discovery is done over iSCSI TCP). + In order to prevent this behaviour, you should specify + "allowed_portal " parameter for each target you want + to export through specific RDMA capable adapters. + + +* Initiator keeps connecting and disconnecting from target in a loop + with constant interval after target reboot. + The problem may be that connection requests from initiator are received + on wrong port/HCA. This can be one due to one (or both) of the following issues: + 1) net.ipv4.conf.all.arp_ignore sysclt is not set to 2 + rdma-cm relies on ARP responses being received on the same interface + that sent the request. Linux default does not do that. + In order to make Linux behave good for rdma-cm, you _MUST_ add + "net.ipv4.conf.all.arp_ignore = 2" to /etc/sysctl.conf + 2) You have more than 1 HCA and PCI mappings to netdev devices is not + persistent between reboots. Possible solution is to have udev rules + for mapping the ibX devices in persistent way. + See below for udev scripts example: + +/lib/udev/net.sh +------------------- +#!/bin/sh + +. /etc/sysconfig/net.conf + +type_fd="/sys/${DEVPATH}/type" +if [ ! -f $type_fd ]; then + exit +fi +type=`cat /sys/${DEVPATH}/type` + +if [ "$type" = "32" ]; then # IPoIB interface + i=0 + CONFDEV="DEV${i}" + CONFPCI=${!CONFDEV} + PCI=`basename $PHYSDEVPATH` + while [ -n "$CONFPCI" ]; do + if [ "$CONFPCI" = "$PCI" ]; then + devid=$(printf "%d\n" `cat /sys/$DEVPATH/dev_id`) + let id=$i*2+$devid + DEV="ib$id" + echo "$DEV" + exit + fi + let i=i+1 + CONFDEV="DEV$i" + CONFPCI=${!CONFDEV} + done +fi + +/etc/sysconfig/net.conf +----------------------- +DEV0="0000:01:00.0" +DEV1="0000:02:00.0" + +/etc/udev/rules.d/90-network.rules +------------------------------------- +ACTION=="add", SUBSYSTEM=="net", PROGRAM="/lib/udev/net.sh", RESULT=="?*", NAME="$result" + + +* Login to all targets from initiator sometimes times out. + It may be a network problem (try running tools like ibdiagnet + and rping between target and initiator hosts). The description of those tools + is beyond the scope of this readme. + Another issue may be that you failed to set net.ipv4.conf.all.arp_ignore sysctl + to the value of 2 (see above problem for more detailed explanation). + + +* When running IO, latency is getting higher and higher all the time. + If you have enabled intel_iommu either in kernel command line or in + kernel config (it may be enabled by default), you should specify + iommu=pt on kernel command line to avoid the latency issue. + + diff --git a/iscsi-scst/README.iser_ofed b/iscsi-scst/README.iser_ofed new file mode 100644 index 00000000..7ff2a930 --- /dev/null +++ b/iscsi-scst/README.iser_ofed @@ -0,0 +1,115 @@ +iSCSI Extensins for RDMA (iSER) Target driver for Linux +======================================================= + +Introduction +------------ + +The iSER target driver has been designed to work on top of the Linux +InfiniBand kernel drivers. While all recent Linux distributions +include recent versions of the InfiniBand drivers, the only way to +obtain the latest available InfiniBand drivers is by installing the +OFED or MLNX_OFED (for Mellanox drivers) software stack. + +The OFED stack is distributed by the OpenFabrics Alliance (OFA). The +mission of the OpenFabrics Alliance is to is to develop, distribute +and promote a unified, transport-independent, open-source software +stack for RDMA-capable fabrics and networks, including InfiniBand and +Ethernet. + +The MLNX_OFED is distributed by Mellanox and can be obtained from +http://www.mellanox.com/page/products_dyn?product_family=26 + +Note: because during OFED installation the distro-provided InfiniBand +kernel drivers are replaced, doing so voids the support contract +offered by your Linux distributor. + +Please follow the instructions below carefully. Skipping a step may +result in kernel modules that fail to load, a kernel oops or even a +system that does no longer boot. + + +Verifying the kernel version +---------------------------- + +Before installing the OFED distribution, it is very important to check +the OFED release notes. Each OFED distribution has been tested +carefully, but only against the kernel versions specified in +docs/OFED_release_notes.txt (you can find this document in the OFED +distribution). Make sure that you are using a supported kernel / OFED +combination. As an example, if you want to use OFED 1.5.1 on an Ubuntu +system, you will have to start with replacing the Ubuntu kernel by a +kernel from kernel.org since OFED 1.5.1 has not been tested on any +Ubuntu kernel. + + +Compiling iSER against OFED +-------------------------- + +Make sure that all necessary packages needed for kernel compilation +have been installed (kernel headers, gcc, binutils, ...). + +Unload any loaded InfiniBand drivers: + + /etc/init.d/opensmd stop + /etc/init.d/openibd stop + +Remove any distro-provided InfiniBand drivers: + + rm -rf /lib/modules/$(uname -r)/kernel/drivers/infiniband + rm -rf /lib/modules/$(uname -r)/kernel/drivers/net/mlx4 + +Next, download and install an OFED pacakge. + +For MLNX_OFED, just run the mlnxofedinstall script inside the MLNX_OFED directory. + +NOTE TO ADVANCED USERS: +------------------------ +If you are installing MLNX_OFED by manually selecting which RPMs/DEBs to install, +make sure ofed_scripts package is one of them, since it is required for correct OFED +version detection by iscsi-scst makefile. + + +For the OFED package.Make sure to enable +at least the kernel-ib and kernel-ib-devel packages (compat-rdma and compat-rdma-devel for OFED 3.5 and above). +An example: + + wget http://www.openfabrics.org/downloads/OFED/ofed-1.5.1/OFED-1.5.1.tgz + tar xzf OFED-1.5.1.tgz + cd OFED-1.5.1 + cat <ofed.conf + libibverbs=y + libibverbs-utils=y + libmthca=y + libmlx4=y + libcxgb3=y + libnes=y + libipathverbs=y + librdmacm=y + librdmacm-utils=y + mstflint=y + ofed-docs=y + ofed-scripts=y + kernel-ib=y + kernel-ib-devel=y + ibvexdmtools=y + qlgc_vnic_daemon=y + core=y + mthca=y + mlx4=y + mlx4_en=y + cxgb3=y + nes=y + ipath=y + ipoib=y + opensm=y + opensm-libs=y + srpt=n + srptools=y + perftest=y + EOF + ./install.pl -c ofed.conf + +Now continue with the installation instructions you can find in the +ISCSI-SCST README file. The Makefile included with ISCSI-SCST detects +whether OFED has been installed, and if so, compiles ISCSIS-SCST with +the OFED kernel headers instead of with the regular kernel headers. diff --git a/iscsi-scst/include/iscsi_scst.h b/iscsi-scst/include/iscsi_scst.h index 19ae2c2b..f64d5009 100644 --- a/iscsi-scst/include/iscsi_scst.h +++ b/iscsi-scst/include/iscsi_scst.h @@ -61,6 +61,13 @@ enum { key_ifmarker, key_ofmarkint, key_ifmarkint, + key_rdma_extensions, + key_target_recv_data_length, + key_initiator_recv_data_length, + key_max_ahs_length, + key_tagged_buffer_for_solicited_data_only, + key_iser_hello_required, + key_max_outstanding_unexpected_pdus, session_key_last, }; diff --git a/iscsi-scst/include/iscsit_transport.h b/iscsi-scst/include/iscsit_transport.h new file mode 100644 index 00000000..64eade89 --- /dev/null +++ b/iscsi-scst/include/iscsit_transport.h @@ -0,0 +1,71 @@ + +#ifndef __ISCSI_TRANSPORT_H__ +#define __ISCSI_TRANSPORT_H__ + +#include +#include + +#ifdef INSIDE_KERNEL_TREE +#include +#else +#include +#endif + +/* Forward declarations */ +struct iscsi_session; +struct iscsi_kern_conn_info; +struct iscsi_conn; + +enum iscsit_transport_type { + ISCSI_TCP, + ISCSI_RDMA, +}; + +struct iscsit_transport { + struct iscsi_cmnd* (*iscsit_alloc_cmd)(struct iscsi_conn *conn, + struct iscsi_cmnd *parent); + void (*iscsit_preprocessing_done)(struct iscsi_cmnd *cmnd); + void (*iscsit_send_data_rsp)(struct iscsi_cmnd *req, u8 *sense, + int sense_len, u8 status, + int send_status); + int (*iscsit_send_locally)(struct iscsi_cmnd *cmnd, + unsigned int cmd_count); + void (*iscsit_set_sense_data)(struct iscsi_cmnd *rsp, + const u8 *sense_buf, int sense_len); + int (*iscsit_receive_cmnd_data)(struct iscsi_cmnd *cmnd); + void (*iscsit_make_conn_wr_active)(struct iscsi_conn *conn); + void (*iscsit_free_cmd)(struct iscsi_cmnd *cmnd); + + void (*iscsit_set_req_data)(struct iscsi_cmnd *req, + struct iscsi_cmnd *rsp); + + int (*iscsit_conn_alloc)(struct iscsi_session *session, + struct iscsi_kern_conn_info *info, + struct iscsi_conn **new_conn, + struct iscsit_transport *transport); + int (*iscsit_conn_activate)(struct iscsi_conn *conn); + void (*iscsit_conn_free)(struct iscsi_conn *conn); + void (*iscsit_conn_close)(struct iscsi_conn *conn, int flags); + void (*iscsit_mark_conn_closed)(struct iscsi_conn *conn, int flags); + + ssize_t (*iscsit_get_initiator_ip)(struct iscsi_conn *conn, char *buf, + int size); + + void (*iscsit_close_all_portals)(void); + +#if !defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) + unsigned int need_alloc_write_buf:1; +#endif + + struct module *owner; + const char name[SCST_MAX_NAME]; + enum iscsit_transport_type transport_type; + struct list_head transport_list_entry; +} ____cacheline_aligned; + +extern int iscsit_reg_transport(struct iscsit_transport *t); +extern void iscsit_unreg_transport(struct iscsit_transport *t); +extern struct iscsit_transport *iscsit_get_transport(enum iscsit_transport_type type); + +#endif /* __ISCSI_TRANSPORT_H__ */ + diff --git a/iscsi-scst/include/isert_scst.h b/iscsi-scst/include/isert_scst.h new file mode 100644 index 00000000..1477cfdf --- /dev/null +++ b/iscsi-scst/include/isert_scst.h @@ -0,0 +1,24 @@ +#ifndef _ISERT_SCST_U_H +#define _ISERT_SCST_U_H + +#ifdef __KERNEL__ +#include +#include +#else +#include +#include +#endif + +struct isert_addr_info { + struct sockaddr_storage addr; + size_t addr_len; +}; + +#define ISERT_MAX_PORTALS 32 + +#define SET_LISTEN_ADDR _IOW('y', 0, struct isert_addr_info) +#define RDMA_CORK _IOW('y', 1, int) +#define GET_PORTAL_ADDR _IOW('y', 2, struct isert_addr_info) +#define DISCOVERY_SESSION _IOW('y', 3, int) + +#endif diff --git a/iscsi-scst/kernel/Makefile b/iscsi-scst/kernel/Makefile index 9ee51b4e..39c837a2 100644 --- a/iscsi-scst/kernel/Makefile +++ b/iscsi-scst/kernel/Makefile @@ -37,5 +37,6 @@ EXTRA_CFLAGS += -DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions obj-m += iscsi-scst.o iscsi-scst-objs := iscsi.o nthread.o config.o digest.o \ - conn.o session.o target.o event.o param.o + conn.o session.o target.o event.o param.o \ + iscsit_transport.o diff --git a/iscsi-scst/kernel/conn.c b/iscsi-scst/kernel/conn.c index 2a0c3159..309e2d4a 100644 --- a/iscsi-scst/kernel/conn.c +++ b/iscsi-scst/kernel/conn.c @@ -20,6 +20,7 @@ #include "iscsi.h" #include "digest.h" +#include "iscsit_transport.h" #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) #if defined(CONFIG_LOCKDEP) && !defined(CONFIG_SCST_PROC) @@ -157,45 +158,7 @@ struct kobj_type iscsi_conn_ktype = { static ssize_t iscsi_get_initiator_ip(struct iscsi_conn *conn, char *buf, int size) { - int pos; - struct sock *sk; - - TRACE_ENTRY(); - - sk = conn->sock->sk; - switch (sk->sk_family) { - case AF_INET: - pos = scnprintf(buf, size, -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) - "%u.%u.%u.%u", NIPQUAD(inet_sk(sk)->daddr)); -#else - "%pI4", &inet_sk(sk)->inet_daddr); -#endif - break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) - pos = scnprintf(buf, size, - "[%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]", - NIP6(inet6_sk(sk)->daddr)); -#else -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0) && \ - (!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 7) - pos = scnprintf(buf, size, "[%p6]", &inet6_sk(sk)->daddr); -#else - pos = scnprintf(buf, size, "[%p6]", &sk->sk_v6_daddr); -#endif -#endif - break; -#endif - default: - pos = scnprintf(buf, size, "Unknown family %d", - sk->sk_family); - break; - } - - TRACE_EXIT_RES(pos); - return pos; + return conn->transport->iscsit_get_initiator_ip(conn, buf, size); } static ssize_t iscsi_conn_ip_show(struct kobject *kobj, @@ -273,7 +236,7 @@ static void conn_sysfs_del(struct iscsi_conn *conn) return; } -static int conn_sysfs_add(struct iscsi_conn *conn) +int conn_sysfs_add(struct iscsi_conn *conn) { int res; struct iscsi_session *session = conn->session; @@ -345,6 +308,7 @@ static int conn_sysfs_add(struct iscsi_conn *conn) conn_sysfs_del(conn); goto out; } +EXPORT_SYMBOL(conn_sysfs_add); #endif /* CONFIG_SCST_PROC */ @@ -429,7 +393,7 @@ void iscsi_make_conn_wr_active(struct iscsi_conn *conn) return; } -void __mark_conn_closed(struct iscsi_conn *conn, int flags) +void iscsi_tcp_mark_conn_closed(struct iscsi_conn *conn, int flags) { spin_lock_bh(&conn->conn_thr_pool->rd_lock); conn->closing = 1; @@ -442,10 +406,16 @@ void __mark_conn_closed(struct iscsi_conn *conn, int flags) iscsi_make_conn_rd_active(conn); } +void __mark_conn_closed(struct iscsi_conn *conn, int flags) +{ + conn->transport->iscsit_mark_conn_closed(conn, flags); +} + void mark_conn_closed(struct iscsi_conn *conn) { __mark_conn_closed(conn, ISCSI_CONN_ACTIVE_CLOSE); } +EXPORT_SYMBOL(mark_conn_closed); static void __iscsi_state_change(struct sock *sk) { @@ -752,7 +722,7 @@ void conn_reinst_finished(struct iscsi_conn *conn) return; } -static void conn_activate(struct iscsi_conn *conn) +int conn_activate(struct iscsi_conn *conn) { TRACE_MGMT_DBG("Enabling conn %p", conn); @@ -778,7 +748,7 @@ static void conn_activate(struct iscsi_conn *conn) */ __iscsi_state_change(conn->sock->sk); - return; + return 0; } /* @@ -820,8 +790,19 @@ static int conn_setup_sock(struct iscsi_conn *conn) return res; } +void iscsi_tcp_conn_free(struct iscsi_conn *conn) +{ + fput(conn->file); + conn->file = NULL; + conn->sock = NULL; + + free_page((unsigned long)conn->read_iov); + + kmem_cache_free(iscsi_conn_cache, conn); +} + /* target_mutex supposed to be locked */ -int conn_free(struct iscsi_conn *conn) +void conn_free(struct iscsi_conn *conn) { struct iscsi_session *session = conn->session; @@ -860,46 +841,19 @@ int conn_free(struct iscsi_conn *conn) list_del(&conn->conn_list_entry); - fput(conn->file); - conn->file = NULL; - conn->sock = NULL; - - free_page((unsigned long)conn->read_iov); - - kmem_cache_free(iscsi_conn_cache, conn); + conn->transport->iscsit_conn_free(conn); if (list_empty(&session->conn_list)) { sBUG_ON(session->sess_reinst_successor != NULL); session_free(session, true); } - - return 0; } -/* target_mutex supposed to be locked */ -static int iscsi_conn_alloc(struct iscsi_session *session, - struct iscsi_kern_conn_info *info, struct iscsi_conn **new_conn) +int iscsi_init_conn(struct iscsi_session *session, + struct iscsi_kern_conn_info *info, + struct iscsi_conn *conn) { - struct iscsi_conn *conn; - int res = 0; - - lockdep_assert_held(&session->target->target_mutex); - - conn = kmem_cache_zalloc(iscsi_conn_cache, GFP_KERNEL); - if (!conn) { - res = -ENOMEM; - goto out_err; - } - - TRACE_MGMT_DBG("Creating connection %p for sid %#Lx, cid %u", conn, - (unsigned long long int)session->sid, info->cid); - - /* Changing it, change ISCSI_CONN_IOV_MAX as well !! */ - conn->read_iov = (void *)get_zeroed_page(GFP_KERNEL); - if (conn->read_iov == NULL) { - res = -ENOMEM; - goto out_err_free_conn; - } + int res; atomic_set(&conn->conn_ref_cnt, 0); conn->session = session; @@ -915,7 +869,7 @@ static int iscsi_conn_alloc(struct iscsi_session *session, conn->ddigest_type = session->sess_params.data_digest; res = digest_init(conn); if (res != 0) - goto out_free_iov; + return res; conn->target = session->target; spin_lock_init(&conn->cmd_list_lock); @@ -950,6 +904,42 @@ static int iscsi_conn_alloc(struct iscsi_session *session, conn->nop_in_interval + ISCSI_ADD_SCHED_TIME); } + return 0; +} +EXPORT_SYMBOL(iscsi_init_conn); + +/* target_mutex supposed to be locked */ +int iscsi_conn_alloc(struct iscsi_session *session, + struct iscsi_kern_conn_info *info, struct iscsi_conn **new_conn, + struct iscsit_transport *t) +{ + struct iscsi_conn *conn; + int res = 0; + + lockdep_assert_held(&session->target->target_mutex); + + conn = kmem_cache_zalloc(iscsi_conn_cache, GFP_KERNEL); + if (!conn) { + res = -ENOMEM; + goto out_err; + } + + TRACE_MGMT_DBG("Creating connection %p for sid %#Lx, cid %u", conn, + (unsigned long long int)session->sid, info->cid); + + conn->transport = t; + + /* Changing it, change ISCSI_CONN_IOV_MAX as well !! */ + conn->read_iov = (struct iovec *)get_zeroed_page(GFP_KERNEL); + if (conn->read_iov == NULL) { + res = -ENOMEM; + goto out_err_free_conn; + } + + res = iscsi_init_conn(session, info, conn); + if (res != 0) + goto out_free_iov; + conn->file = fget(info->fd); res = conn_setup_sock(conn); @@ -988,6 +978,7 @@ int __add_conn(struct iscsi_session *session, struct iscsi_kern_conn_info *info) struct iscsi_conn *conn, *new_conn = NULL; int err; bool reinstatement = false; + struct iscsit_transport *t; lockdep_assert_held(&session->target->target_mutex); @@ -1001,7 +992,16 @@ int __add_conn(struct iscsi_session *session, struct iscsi_kern_conn_info *info) goto out; } - err = iscsi_conn_alloc(session, info, &new_conn); + if (session->sess_params.rdma_extensions) + t = iscsit_get_transport(ISCSI_RDMA); + else + t = iscsit_get_transport(ISCSI_TCP); + if (!t) { + err = -ENOENT; + goto out; + } + + err = t->iscsit_conn_alloc(session, info, &new_conn, t); if (err != 0) goto out; @@ -1013,7 +1013,7 @@ int __add_conn(struct iscsi_session *session, struct iscsi_kern_conn_info *info) __mark_conn_closed(conn, 0); } - conn_activate(new_conn); + err = t->iscsit_conn_activate(new_conn); out: return err; diff --git a/iscsi-scst/kernel/iscsi.c b/iscsi-scst/kernel/iscsi.c index 512ab9aa..cdd7c831 100644 --- a/iscsi-scst/kernel/iscsi.c +++ b/iscsi-scst/kernel/iscsi.c @@ -26,6 +26,7 @@ #include "iscsi.h" #include "digest.h" +#include "iscsit_transport.h" #ifndef GENERATING_UPSTREAM_PATCH #if !defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) @@ -68,7 +69,6 @@ static void req_cmnd_release(struct iscsi_cmnd *req); static int cmnd_insert_data_wait_hash(struct iscsi_cmnd *cmnd); static void iscsi_cmnd_init_write(struct iscsi_cmnd *rsp, int flags); static void iscsi_set_resid_no_scst_cmd(struct iscsi_cmnd *rsp); -static void iscsi_set_resid(struct iscsi_cmnd *rsp); static void iscsi_set_not_received_data_len(struct iscsi_cmnd *req, unsigned int not_received) @@ -242,7 +242,7 @@ static struct iscsi_cmnd *iscsi_create_tm_clone(struct iscsi_cmnd *cmnd) TRACE_ENTRY(); - tm_clone = cmnd_alloc(cmnd->conn, NULL); + tm_clone = cmnd->conn->transport->iscsit_alloc_cmd(cmnd->conn, NULL); if (tm_clone != NULL) { set_bit(ISCSI_CMD_ABORTED, &tm_clone->prelim_compl_flags); tm_clone->pdu = cmnd->pdu; @@ -309,14 +309,9 @@ void iscsi_fail_data_waiting_cmnd(struct iscsi_cmnd *cmnd) return; } -struct iscsi_cmnd *cmnd_alloc(struct iscsi_conn *conn, - struct iscsi_cmnd *parent) +void iscsi_cmnd_init(struct iscsi_conn *conn, struct iscsi_cmnd *cmnd, + struct iscsi_cmnd *parent) { - struct iscsi_cmnd *cmnd; - - /* ToDo: __GFP_NOFAIL?? */ - cmnd = kmem_cache_zalloc(iscsi_cmnd_cache, GFP_KERNEL|__GFP_NOFAIL); - atomic_set(&cmnd->ref_cnt, 1); cmnd->scst_state = ISCSI_CMD_STATE_NEW; cmnd->conn = conn; @@ -325,9 +320,6 @@ struct iscsi_cmnd *cmnd_alloc(struct iscsi_conn *conn, if (parent == NULL) { conn_get(conn); -#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) - atomic_set(&cmnd->net_ref_cnt, 0); -#endif INIT_LIST_HEAD(&cmnd->rsp_cmd_list); INIT_LIST_HEAD(&cmnd->rx_ddigest_cmd_list); cmnd->target_task_tag = ISCSI_RESERVED_TAG_CPU32; @@ -336,6 +328,24 @@ struct iscsi_cmnd *cmnd_alloc(struct iscsi_conn *conn, list_add_tail(&cmnd->cmd_list_entry, &conn->cmd_list); spin_unlock_bh(&conn->cmd_list_lock); } +} +EXPORT_SYMBOL(iscsi_cmnd_init); + +struct iscsi_cmnd *cmnd_alloc(struct iscsi_conn *conn, + struct iscsi_cmnd *parent) +{ + struct iscsi_cmnd *cmnd; + + /* ToDo: __GFP_NOFAIL?? */ + cmnd = kmem_cache_zalloc(iscsi_cmnd_cache, GFP_KERNEL|__GFP_NOFAIL); + + iscsi_cmnd_init(conn, cmnd, parent); + + if (parent == NULL) { +#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) + atomic_set(&cmnd->net_ref_cnt, 0); +#endif + } TRACE_DBG("conn %p, parent %p, cmnd %p", conn, parent, cmnd); return cmnd; @@ -496,10 +506,10 @@ void cmnd_done(struct iscsi_cmnd *cmnd) list_for_each_entry_safe(rsp, t, &cmnd->rsp_cmd_list, rsp_cmd_list_entry) { - cmnd_free(rsp); + cmnd->conn->transport->iscsit_free_cmd(rsp); } - cmnd_free(cmnd); + cmnd->conn->transport->iscsit_free_cmd(cmnd); } else { struct iscsi_cmnd *parent = cmnd->parent_req; @@ -532,6 +542,7 @@ void cmnd_done(struct iscsi_cmnd *cmnd) TRACE_EXIT(); return; } +EXPORT_SYMBOL(cmnd_done); /* * Corresponding conn may also get destroyed after this function, except only @@ -633,8 +644,9 @@ void req_cmnd_release_force(struct iscsi_cmnd *req) TRACE_EXIT(); return; } +EXPORT_SYMBOL(req_cmnd_release_force); -static void req_cmnd_pre_release(struct iscsi_cmnd *req) +void req_cmnd_pre_release(struct iscsi_cmnd *req) { struct iscsi_cmnd *c, *t; @@ -694,6 +706,7 @@ static void req_cmnd_pre_release(struct iscsi_cmnd *req) TRACE_EXIT(); return; } +EXPORT_SYMBOL(req_cmnd_pre_release); /* * Corresponding conn may also get destroyed after this function, except only @@ -728,6 +741,7 @@ void rsp_cmnd_release(struct iscsi_cmnd *cmnd) cmnd_put(cmnd); return; } +EXPORT_SYMBOL(rsp_cmnd_release); static struct iscsi_cmnd *iscsi_alloc_rsp(struct iscsi_cmnd *parent) { @@ -735,7 +749,7 @@ static struct iscsi_cmnd *iscsi_alloc_rsp(struct iscsi_cmnd *parent) TRACE_ENTRY(); - rsp = cmnd_alloc(parent->conn, parent); + rsp = parent->conn->transport->iscsit_alloc_cmd(parent->conn, parent); TRACE_DBG("Adding rsp %p to parent %p", rsp, parent); list_add_tail(&rsp->rsp_cmd_list_entry, &parent->rsp_cmd_list); @@ -797,7 +811,7 @@ static void iscsi_cmnds_init_write(struct list_head *send, int flags) spin_unlock_bh(&conn->write_list_lock); if (flags & ISCSI_INIT_WRITE_WAKE) - iscsi_make_conn_wr_active(conn); + conn->transport->iscsit_make_conn_wr_active(conn); return; } @@ -876,7 +890,7 @@ static void iscsi_set_resid_no_scst_cmd(struct iscsi_cmnd *rsp) return; } -static void iscsi_set_resid(struct iscsi_cmnd *rsp) +void iscsi_set_resid(struct iscsi_cmnd *rsp) { struct iscsi_cmnd *req = rsp->parent_req; struct scst_cmd *scst_cmd = req->scst_cmd; @@ -930,6 +944,7 @@ static void iscsi_set_resid(struct iscsi_cmnd *rsp) TRACE_EXIT(); return; } +EXPORT_SYMBOL(iscsi_set_resid); static void send_data_rsp(struct iscsi_cmnd *req, u8 status, int send_status) { @@ -992,12 +1007,25 @@ static void send_data_rsp(struct iscsi_cmnd *req, u8 status, int send_status) return; } +static void iscsi_tcp_set_sense_data(struct iscsi_cmnd *rsp, + const u8 *sense_buf, int sense_len) +{ + struct scatterlist *sg; + + sg = rsp->sg = rsp->rsp_sg; + rsp->sg_cnt = 2; + rsp->own_sg = 1; + + sg_init_table(sg, 2); + sg_set_buf(&sg[0], &rsp->sense_hdr, sizeof(rsp->sense_hdr)); + sg_set_buf(&sg[1], (u8 *)sense_buf, sense_len); +} + static void iscsi_init_status_rsp(struct iscsi_cmnd *rsp, int status, const u8 *sense_buf, int sense_len) { struct iscsi_cmnd *req = rsp->parent_req; struct iscsi_scsi_rsp_hdr *rsp_hdr; - struct scatterlist *sg; TRACE_ENTRY(); @@ -1011,16 +1039,11 @@ static void iscsi_init_status_rsp(struct iscsi_cmnd *rsp, if (scst_sense_valid(sense_buf)) { TRACE_DBG("%s", "SENSE VALID"); - sg = rsp->sg = rsp->rsp_sg; - rsp->sg_cnt = 2; - rsp->own_sg = 1; - - sg_init_table(sg, 2); - sg_set_buf(&sg[0], &rsp->sense_hdr, sizeof(rsp->sense_hdr)); - sg_set_buf(&sg[1], (u8 *)sense_buf, sense_len); - rsp->sense_hdr.length = cpu_to_be16(sense_len); + rsp->conn->transport->iscsit_set_sense_data(rsp, sense_buf, + sense_len); + rsp->pdu.datasize = sizeof(rsp->sense_hdr) + sense_len; rsp->bufflen = rsp->pdu.datasize; } else { @@ -1032,7 +1055,7 @@ static void iscsi_init_status_rsp(struct iscsi_cmnd *rsp, return; } -static inline struct iscsi_cmnd *create_status_rsp(struct iscsi_cmnd *req, +struct iscsi_cmnd *create_status_rsp(struct iscsi_cmnd *req, int status, const u8 *sense_buf, int sense_len) { struct iscsi_cmnd *rsp; @@ -1048,6 +1071,26 @@ static inline struct iscsi_cmnd *create_status_rsp(struct iscsi_cmnd *req, TRACE_EXIT_HRES((unsigned long)rsp); return rsp; } +EXPORT_SYMBOL(create_status_rsp); + +static void iscsi_tcp_send_data_rsp(struct iscsi_cmnd *req, u8 *sense, + int sense_len, u8 status, + int is_send_status) +{ + if ((status != SAM_STAT_CHECK_CONDITION) && + ((cmnd_hdr(req)->flags & (ISCSI_CMD_WRITE|ISCSI_CMD_READ)) != + (ISCSI_CMD_WRITE|ISCSI_CMD_READ))) { + send_data_rsp(req, status, is_send_status); + } else { + struct iscsi_cmnd *rsp; + send_data_rsp(req, 0, 0); + if (is_send_status) { + rsp = create_status_rsp(req, status, sense, + sense_len); + iscsi_cmnd_init_write(rsp, 0); + } + } +} /* * Initializes data receive fields. Can be called only when they have not been @@ -1224,7 +1267,7 @@ static inline int iscsi_get_allowed_cmds(struct iscsi_session *sess) return res; } -static __be32 cmnd_set_sn(struct iscsi_cmnd *cmnd, int set_stat_sn) +__be32 cmnd_set_sn(struct iscsi_cmnd *cmnd, int set_stat_sn) { struct iscsi_conn *conn = cmnd->conn; struct iscsi_session *sess = conn->session; @@ -1243,6 +1286,7 @@ static __be32 cmnd_set_sn(struct iscsi_cmnd *cmnd, int set_stat_sn) spin_unlock(&sess->sn_lock); return res; } +EXPORT_SYMBOL(cmnd_set_sn); /* Called under sn_lock */ static void update_stat_sn(struct iscsi_cmnd *cmnd) @@ -1750,7 +1794,7 @@ static int nop_out_start(struct iscsi_cmnd *cmnd) size = cmnd->pdu.datasize; - if (size) { + if (size && !conn->session->sess_params.rdma_extensions) { if (cmnd->pdu.bhs.itt != ISCSI_RESERVED_TAG) { struct scatterlist *sg; @@ -1810,16 +1854,87 @@ static int nop_out_start(struct iscsi_cmnd *cmnd) return err; } -int cmnd_rx_continue(struct iscsi_cmnd *req) +int iscsi_cmnd_set_write_buf(struct iscsi_cmnd *req) { struct iscsi_conn *conn = req->conn; struct iscsi_session *session = conn->session; struct iscsi_scsi_cmd_hdr *req_hdr = cmnd_hdr(req); struct scst_cmd *scst_cmd = req->scst_cmd; - scst_data_direction dir; bool unsolicited_data_expected = false; int res = 0; + req->bufflen = scst_cmd_get_write_fields(scst_cmd, &req->sg, + &req->sg_cnt); + unsolicited_data_expected = !(req_hdr->flags & ISCSI_CMD_FINAL); + + if (unlikely(session->sess_params.initial_r2t && + unsolicited_data_expected)) { + PRINT_ERROR("Initiator %s violated negotiated " + "parameters: initial R2T is required (ITT %x, " + "op %x)", session->initiator_name, + req->pdu.bhs.itt, req_hdr->scb[0]); + res = -EINVAL; + goto out_close; + } + + if (unlikely(!session->sess_params.immediate_data && + req->pdu.datasize)) { + PRINT_ERROR("Initiator %s violated negotiated " + "parameters: forbidden immediate data sent " + "(ITT %x, op %x)", session->initiator_name, + req->pdu.bhs.itt, req_hdr->scb[0]); + res = -EINVAL; + goto out_close; + } + + if (unlikely(session->sess_params.first_burst_length < req->pdu.datasize)) { + PRINT_ERROR("Initiator %s violated negotiated " + "parameters: immediate data len (%d) > " + "first_burst_length (%d) (ITT %x, op %x)", + session->initiator_name, + req->pdu.datasize, + session->sess_params.first_burst_length, + req->pdu.bhs.itt, req_hdr->scb[0]); + res = -EINVAL; + goto out_close; + } + + req->r2t_len_to_receive = be32_to_cpu(req_hdr->data_length) - + req->pdu.datasize; + + /* + * In case of residual overflow req->r2t_len_to_receive and + * req->pdu.datasize might be > req->bufflen + */ + + res = cmnd_insert_data_wait_hash(req); + + if (unsolicited_data_expected) { + req->outstanding_r2t = 1; + req->r2t_len_to_send = req->r2t_len_to_receive - + min_t(unsigned int, + session->sess_params.first_burst_length - + req->pdu.datasize, + req->r2t_len_to_receive); + } else + req->r2t_len_to_send = req->r2t_len_to_receive; + + if (likely(res == 0)) + req_add_to_write_timeout_list(req); + +out_close: + return res; +} +EXPORT_SYMBOL(iscsi_cmnd_set_write_buf); + +int cmnd_rx_continue(struct iscsi_cmnd *req) +{ + struct iscsi_conn *conn = req->conn; + struct iscsi_scsi_cmd_hdr *req_hdr = cmnd_hdr(req); + struct scst_cmd *scst_cmd = req->scst_cmd; + scst_data_direction dir; + int res = 0; + TRACE_ENTRY(); TRACE_DBG("scsi command: %x", req_hdr->scb[0]); @@ -1845,48 +1960,7 @@ int cmnd_rx_continue(struct iscsi_cmnd *req) /* For prelim completed commands sg & K can be already set! */ if (dir & SCST_DATA_WRITE) { - req->bufflen = scst_cmd_get_write_fields(scst_cmd, &req->sg, - &req->sg_cnt); - unsolicited_data_expected = !(req_hdr->flags & ISCSI_CMD_FINAL); - - if (unlikely(session->sess_params.initial_r2t && - unsolicited_data_expected)) { - PRINT_ERROR("Initiator %s violated negotiated " - "parameters: initial R2T is required (ITT %x, " - "op %x)", session->initiator_name, - req->pdu.bhs.itt, req_hdr->scb[0]); - goto out_close; - } - - if (unlikely(!session->sess_params.immediate_data && - req->pdu.datasize)) { - PRINT_ERROR("Initiator %s violated negotiated " - "parameters: forbidden immediate data sent " - "(ITT %x, op %x)", session->initiator_name, - req->pdu.bhs.itt, req_hdr->scb[0]); - goto out_close; - } - - if (unlikely(session->sess_params.first_burst_length < req->pdu.datasize)) { - PRINT_ERROR("Initiator %s violated negotiated " - "parameters: immediate data len (%d) > " - "first_burst_length (%d) (ITT %x, op %x)", - session->initiator_name, - req->pdu.datasize, - session->sess_params.first_burst_length, - req->pdu.bhs.itt, req_hdr->scb[0]); - goto out_close; - } - - req->r2t_len_to_receive = be32_to_cpu(req_hdr->data_length) - - req->pdu.datasize; - - /* - * In case of residual overflow req->r2t_len_to_receive and - * req->pdu.datasize might be > req->bufflen - */ - - res = cmnd_insert_data_wait_hash(req); + res = iscsi_cmnd_set_write_buf(req); if (unlikely(res != 0)) { /* * We have to close connection, because otherwise a data @@ -1897,18 +1971,6 @@ int cmnd_rx_continue(struct iscsi_cmnd *req) goto out_close; } - if (unsolicited_data_expected) { - req->outstanding_r2t = 1; - req->r2t_len_to_send = req->r2t_len_to_receive - - min_t(unsigned int, - session->sess_params.first_burst_length - - req->pdu.datasize, - req->r2t_len_to_receive); - } else - req->r2t_len_to_send = req->r2t_len_to_receive; - - req_add_to_write_timeout_list(req); - if (req->pdu.datasize) { res = cmnd_prepare_recv_pdu(conn, req, 0, req->pdu.datasize); /* For performance better to send R2Ts ASAP */ @@ -1930,11 +1992,9 @@ int cmnd_rx_continue(struct iscsi_cmnd *req) } trace: - TRACE_DBG("req=%p, dir=%d, unsolicited_data_expected=%d, " - "r2t_len_to_receive=%d, r2t_len_to_send=%d, bufflen=%d, " - "own_sg %d", req, dir, unsolicited_data_expected, - req->r2t_len_to_receive, req->r2t_len_to_send, req->bufflen, - req->own_sg); + TRACE_DBG("req=%p, dir=%d, r2t_len_to_receive=%d, r2t_len_to_send=%d, " + "bufflen=%d, own_sg %d", req, dir, req->r2t_len_to_receive, + req->r2t_len_to_send, req->bufflen, req->own_sg); out: TRACE_EXIT_RES(res); @@ -1994,7 +2054,8 @@ static int scsi_cmnd_start(struct iscsi_cmnd *req) scst_cmd_set_expected_out_transfer_len(scst_cmd, be32_to_cpu(req_hdr->data_length)); #if !defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) - scst_cmd_set_tgt_need_alloc_data_buf(scst_cmd); + if (conn->transport->need_alloc_write_buf) + scst_cmd_set_tgt_need_alloc_data_buf(scst_cmd); #endif } } else if (req_hdr->flags & ISCSI_CMD_READ) { @@ -2002,7 +2063,8 @@ static int scsi_cmnd_start(struct iscsi_cmnd *req) scst_cmd_set_expected(scst_cmd, dir, be32_to_cpu(req_hdr->data_length)); #if !defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) - scst_cmd_set_tgt_need_alloc_data_buf(scst_cmd); + if (conn->transport->need_alloc_write_buf) + scst_cmd_set_tgt_need_alloc_data_buf(scst_cmd); #endif } else if (req_hdr->flags & ISCSI_CMD_WRITE) { dir = SCST_DATA_WRITE; @@ -2069,7 +2131,7 @@ static int scsi_cmnd_start(struct iscsi_cmnd *req) scst_cmd_init_stage1_done(scst_cmd, SCST_CONTEXT_DIRECT, 0); if (req->scst_state != ISCSI_CMD_STATE_RX_CMD) - res = cmnd_rx_continue(req); + res = req->conn->transport->iscsit_receive_cmnd_data(req); else { TRACE_DBG("Delaying req %p post processing (scst_state %d)", req, req->scst_state); @@ -2680,6 +2742,14 @@ static void execute_task_management(struct iscsi_cmnd *req) return; } +static void iscsi_tcp_set_req_data(struct iscsi_cmnd *req, + struct iscsi_cmnd *rsp) +{ + rsp->sg = req->sg; + rsp->sg_cnt = req->sg_cnt; + rsp->bufflen = req->bufflen; +} + static void nop_out_exec(struct iscsi_cmnd *req) { struct iscsi_cmnd *rsp; @@ -2703,11 +2773,8 @@ static void nop_out_exec(struct iscsi_cmnd *req) else sBUG_ON(req->sg != NULL); - if (req->sg) { - rsp->sg = req->sg; - rsp->sg_cnt = req->sg_cnt; - rsp->bufflen = req->bufflen; - } + if (req->bufflen) + req->conn->transport->iscsit_set_req_data(req, rsp); /* We already checked it in check_segment_length() */ sBUG_ON(get_pgcnt(req->pdu.datasize, 0) > ISCSI_CONN_IOV_MAX); @@ -3160,6 +3227,7 @@ int cmnd_rx_start(struct iscsi_cmnd *cmnd) TRACE_EXIT_RES(res); return res; } +EXPORT_SYMBOL(cmnd_rx_start); void cmnd_rx_end(struct iscsi_cmnd *cmnd) { @@ -3191,6 +3259,51 @@ void cmnd_rx_end(struct iscsi_cmnd *cmnd) TRACE_EXIT(); return; } +EXPORT_SYMBOL(cmnd_rx_end); + +static ssize_t iscsi_tcp_get_initiator_ip(struct iscsi_conn *conn, + char *buf, int size) +{ + int pos; + struct sock *sk; + + TRACE_ENTRY(); + + sk = conn->sock->sk; + switch (sk->sk_family) { + case AF_INET: + pos = scnprintf(buf, size, +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) + "%u.%u.%u.%u", NIPQUAD(inet_sk(sk)->daddr)); +#else + "%pI4", &inet_sk(sk)->inet_daddr); +#endif + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) + pos = scnprintf(buf, size, + "[%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]", + NIP6(inet6_sk(sk)->daddr)); +#else +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0) && \ + (!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 7) + pos = scnprintf(buf, size, "[%p6]", &inet6_sk(sk)->daddr); +#else + pos = scnprintf(buf, size, "[%p6]", &sk->sk_v6_daddr); +#endif +#endif + break; +#endif + default: + pos = scnprintf(buf, size, "Unknown family %d", + sk->sk_family); + break; + } + + TRACE_EXIT_RES(pos); + return pos; +} #if !defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) static int iscsi_alloc_data_buf(struct scst_cmd *cmd) @@ -3208,11 +3321,8 @@ static int iscsi_alloc_data_buf(struct scst_cmd *cmd) } #endif -static void iscsi_preprocessing_done(struct scst_cmd *scst_cmd) +static void iscsi_tcp_preprocessing_done(struct iscsi_cmnd *req) { - struct iscsi_cmnd *req = (struct iscsi_cmnd *) - scst_cmd_get_tgt_priv(scst_cmd); - TRACE_DBG("req %p", req); if (req->conn->rx_task == current) @@ -3239,8 +3349,14 @@ static void iscsi_preprocessing_done(struct scst_cmd *scst_cmd) } cmnd_put(req); } +} - return; +static void iscsi_preprocessing_done(struct scst_cmd *scst_cmd) +{ + struct iscsi_cmnd *req = (struct iscsi_cmnd *) + scst_cmd_get_tgt_priv(scst_cmd); + + req->conn->transport->iscsit_preprocessing_done(req); } /* No locks */ @@ -3301,6 +3417,52 @@ static void iscsi_try_local_processing(struct iscsi_cmnd *req) return; } +static int iscsi_tcp_send_locally(struct iscsi_cmnd *req, + unsigned int cmd_count) +{ + struct iscsi_conn *conn = req->conn; + struct iscsi_cmnd *wr_rsp, *our_rsp; + int ret = 0; + + /* + * There's no need for protection, since we are not going to + * dereference them. + */ + wr_rsp = list_first_entry(&conn->write_list, struct iscsi_cmnd, + write_list_entry); + our_rsp = list_first_entry(&req->rsp_cmd_list, struct iscsi_cmnd, + rsp_cmd_list_entry); + if (wr_rsp == our_rsp) { + /* + * This is our rsp, so let's try to process it locally to + * decrease latency. We need to call pre_release before + * processing to handle some error recovery cases. + */ + if (cmd_count <= 2) { + req_cmnd_pre_release(req); + iscsi_try_local_processing(req); + cmnd_put(req); + } else { + /* + * There's too much backend activity, so it could be + * better to push it to the write thread. + */ + ret = 1; + } + } else + ret = 1; + + return ret; +} + +static void iscsi_tcp_conn_close(struct iscsi_conn *conn, int flags) +{ + if (!flags) + conn->sock->sk->sk_prot->disconnect(conn->sock->sk, 0); + else + conn->sock->ops->shutdown(conn->sock, flags); +} + static int iscsi_xmit_response(struct scst_cmd *scst_cmd) { int is_send_status = scst_cmd_get_is_send_status(scst_cmd); @@ -3310,7 +3472,6 @@ static int iscsi_xmit_response(struct scst_cmd *scst_cmd) int status = scst_cmd_get_status(scst_cmd); u8 *sense = scst_cmd_get_sense_buffer(scst_cmd); int sense_len = scst_cmd_get_sense_buffer_len(scst_cmd); - struct iscsi_cmnd *wr_rsp, *our_rsp; EXTRACHECKS_BUG_ON(scst_cmd_atomic(scst_cmd)); @@ -3385,19 +3546,9 @@ static int iscsi_xmit_response(struct scst_cmd *scst_cmd) * so status is valid here, but in future that could change. * ToDo */ - if ((status != SAM_STAT_CHECK_CONDITION) && - ((cmnd_hdr(req)->flags & (ISCSI_CMD_WRITE|ISCSI_CMD_READ)) != - (ISCSI_CMD_WRITE|ISCSI_CMD_READ))) { - send_data_rsp(req, status, is_send_status); - } else { - struct iscsi_cmnd *rsp; - send_data_rsp(req, 0, 0); - if (is_send_status) { - rsp = create_status_rsp(req, status, sense, - sense_len); - iscsi_cmnd_init_write(rsp, 0); - } - } + req->conn->transport->iscsit_send_data_rsp(req, sense, + sense_len, status, + is_send_status); } else if (is_send_status) { struct iscsi_cmnd *rsp; rsp = create_status_rsp(req, status, sense, sense_len); @@ -3408,32 +3559,7 @@ static int iscsi_xmit_response(struct scst_cmd *scst_cmd) sBUG(); #endif - /* - * There's no need for protection, since we are not going to - * dereference them. - */ - wr_rsp = list_first_entry(&conn->write_list, struct iscsi_cmnd, - write_list_entry); - our_rsp = list_first_entry(&req->rsp_cmd_list, struct iscsi_cmnd, - rsp_cmd_list_entry); - if (wr_rsp == our_rsp) { - /* - * This is our rsp, so let's try to process it locally to - * decrease latency. We need to call pre_release before - * processing to handle some error recovery cases. - */ - if (scst_get_active_cmd_count(scst_cmd) <= 2) { - req_cmnd_pre_release(req); - iscsi_try_local_processing(req); - cmnd_put(req); - } else { - /* - * There's too much backend activity, so it could be - * better to push it to the write thread. - */ - goto out_push_to_wr_thread; - } - } else + if (conn->transport->iscsit_send_locally(req, scst_get_active_cmd_count(scst_cmd))) goto out_push_to_wr_thread; out: @@ -3442,7 +3568,7 @@ static int iscsi_xmit_response(struct scst_cmd *scst_cmd) out_push_to_wr_thread: TRACE_DBG("Waking up write thread (conn %p)", conn); req_cmnd_release(req); - iscsi_make_conn_wr_active(conn); + conn->transport->iscsit_make_conn_wr_active(conn); goto out; } @@ -3645,7 +3771,6 @@ static int iscsi_scsi_aen(struct scst_aen *aen) bool found; struct iscsi_cmnd *fake_req, *rsp; struct iscsi_async_msg_hdr *rsp_hdr; - struct scatterlist *sg; TRACE_ENTRY(); @@ -3668,7 +3793,7 @@ static int iscsi_scsi_aen(struct scst_aen *aen) } /* Create a fake request */ - fake_req = cmnd_alloc(conn, NULL); + fake_req = conn->transport->iscsit_alloc_cmd(conn, NULL); if (fake_req == NULL) { PRINT_ERROR("%s", "Unable to alloc fake AEN request"); goto out_err_unlock; @@ -3693,15 +3818,10 @@ static int iscsi_scsi_aen(struct scst_aen *aen) rsp_hdr->ffffffff = cpu_to_be32(0xffffffff); rsp_hdr->async_event = ISCSI_ASYNC_SCSI; - sg = rsp->sg = rsp->rsp_sg; - rsp->sg_cnt = 2; - rsp->own_sg = 1; + rsp->sense_hdr.length = cpu_to_be16(sense_len); - sg_init_table(sg, 2); - sg_set_buf(&sg[0], &rsp->sense_hdr, sizeof(rsp->sense_hdr)); - sg_set_buf(&sg[1], (u8 *)sense, sense_len); + rsp->conn->transport->iscsit_set_sense_data(rsp, sense, sense_len); - rsp->sense_hdr.length = cpu_to_be16(sense_len); rsp->pdu.datasize = sizeof(rsp->sense_hdr) + sense_len; rsp->bufflen = rsp->pdu.datasize; @@ -3823,7 +3943,7 @@ void iscsi_send_nop_in(struct iscsi_conn *conn) TRACE_ENTRY(); - req = cmnd_alloc(conn, NULL); + req = conn->transport->iscsit_alloc_cmd(conn, NULL); if (req == NULL) { PRINT_ERROR("%s", "Unable to alloc fake Nop-In request"); goto out_err; @@ -3958,6 +4078,30 @@ struct scst_tgt_template iscsi_template = { .get_scsi_transport_version = iscsi_get_scsi_transport_version, }; +static struct iscsit_transport iscsi_tcp_transport = { + .owner = THIS_MODULE, + .name = "iSCSI-TCP", + .transport_type = ISCSI_TCP, +#if !defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) + .need_alloc_write_buf = 1, +#endif + .iscsit_conn_alloc = iscsi_conn_alloc, + .iscsit_conn_activate = conn_activate, + .iscsit_conn_free = iscsi_tcp_conn_free, + .iscsit_alloc_cmd = cmnd_alloc, + .iscsit_free_cmd = cmnd_free, + .iscsit_preprocessing_done = iscsi_tcp_preprocessing_done, + .iscsit_send_data_rsp = iscsi_tcp_send_data_rsp, + .iscsit_make_conn_wr_active = iscsi_make_conn_wr_active, + .iscsit_mark_conn_closed = iscsi_tcp_mark_conn_closed, + .iscsit_conn_close = iscsi_tcp_conn_close, + .iscsit_get_initiator_ip = iscsi_tcp_get_initiator_ip, + .iscsit_send_locally = iscsi_tcp_send_locally, + .iscsit_set_sense_data = iscsi_tcp_set_sense_data, + .iscsit_set_req_data = iscsi_tcp_set_req_data, + .iscsit_receive_cmnd_data = cmnd_rx_continue, +}; + static void __iscsi_threads_pool_put(struct iscsi_thread_pool *p) { struct iscsi_thread *t, *tt; @@ -4119,6 +4263,10 @@ static int __init iscsi_init(void) PRINT_INFO("iSCSI SCST Target - version %s", ISCSI_VERSION_STRING); + err = iscsit_reg_transport(&iscsi_tcp_transport); + if (err) + goto out; + dummy_page = alloc_pages(GFP_KERNEL, 0); if (dummy_page == NULL) { PRINT_ERROR("%s", "Dummy page allocation failed"); @@ -4269,6 +4417,8 @@ static void __exit iscsi_exit(void) scst_unregister_target_template(&iscsi_template); + iscsit_unreg_transport(&iscsi_tcp_transport); + #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) net_set_get_put_page_callbacks(NULL, NULL); #endif diff --git a/iscsi-scst/kernel/iscsi.h b/iscsi-scst/kernel/iscsi.h index 7f29fb8c..84aefeb2 100644 --- a/iscsi-scst/kernel/iscsi.h +++ b/iscsi-scst/kernel/iscsi.h @@ -32,6 +32,7 @@ #endif #include "iscsi_hdr.h" #include "iscsi_dbg.h" +#include "iscsit_transport.h" #define iscsi_sense_crc_error ABORTED_COMMAND, 0x47, 0x05 #define iscsi_sense_unexpected_unsolicited_data ABORTED_COMMAND, 0x0C, 0x0C @@ -57,6 +58,9 @@ struct iscsi_sess_params { int ifmarker; int ofmarkint; int ifmarkint; + int rdma_extensions; + int target_recv_data_length; + int initiator_recv_data_length; }; struct iscsi_tgt_params { @@ -192,6 +196,8 @@ struct iscsi_session { #define ISCSI_CONN_WR_STATE_PROCESSING 3 struct iscsi_conn { + struct iscsit_transport *transport; + struct iscsi_session *session; /* owning session */ /* Both protected by session->sn_lock */ @@ -314,6 +320,7 @@ struct iscsi_conn { #else struct work_struct nop_in_delayed_work; #endif + struct work_struct close_work; unsigned int nop_in_interval; /* in jiffies */ unsigned int nop_in_timeout; /* in jiffies */ struct list_head nop_req_list; @@ -561,7 +568,7 @@ extern struct iscsi_conn *conn_lookup(struct iscsi_session *, u16); extern void conn_reinst_finished(struct iscsi_conn *); extern int __add_conn(struct iscsi_session *, struct iscsi_kern_conn_info *); extern int __del_conn(struct iscsi_session *, struct iscsi_kern_conn_info *); -extern int conn_free(struct iscsi_conn *); +extern void conn_free(struct iscsi_conn *); extern void iscsi_make_conn_rd_active(struct iscsi_conn *conn); #define ISCSI_CONN_ACTIVE_CLOSE 1 #define ISCSI_CONN_DELETING 2 @@ -613,6 +620,7 @@ extern void target_del_all(void); extern int iscsi_procfs_init(void); extern void iscsi_procfs_exit(void); #else +extern int conn_sysfs_add(struct iscsi_conn *conn); extern const struct attribute *iscsi_attrs[]; extern int iscsi_add_attr(struct iscsi_target *target, const struct iscsi_kern_attr *user_info); @@ -835,4 +843,23 @@ static inline void iscsi_extracheck_is_rd_thread(struct iscsi_conn *conn) {} static inline void iscsi_extracheck_is_wr_thread(struct iscsi_conn *conn) {} #endif +extern int iscsi_conn_alloc(struct iscsi_session *session, + struct iscsi_kern_conn_info *info, struct iscsi_conn **new_conn, + struct iscsit_transport *t); + +extern int conn_activate(struct iscsi_conn *conn); +extern void iscsi_tcp_mark_conn_closed(struct iscsi_conn *conn, int flags); +extern void iscsi_tcp_conn_free(struct iscsi_conn *conn); +extern void iscsi_cmnd_init(struct iscsi_conn *conn, struct iscsi_cmnd *cmnd, + struct iscsi_cmnd *parent); +extern struct iscsi_cmnd *iscsi_get_send_cmnd(struct iscsi_conn *conn); +extern void start_close_conn(struct iscsi_conn *conn); +extern __be32 cmnd_set_sn(struct iscsi_cmnd *cmnd, int set_stat_sn); +extern void iscsi_set_resid(struct iscsi_cmnd *rsp); +extern int iscsi_init_conn(struct iscsi_session *session, + struct iscsi_kern_conn_info *info, struct iscsi_conn *conn); +extern void req_cmnd_pre_release(struct iscsi_cmnd *req); +extern struct iscsi_cmnd *create_status_rsp(struct iscsi_cmnd *req, + int status, const u8 *sense_buf, int sense_len); +extern int iscsi_cmnd_set_write_buf(struct iscsi_cmnd *req); #endif /* __ISCSI_H__ */ diff --git a/iscsi-scst/kernel/iscsi_dbg.h b/iscsi-scst/kernel/iscsi_dbg.h index 493720d6..6f6e247b 100644 --- a/iscsi-scst/kernel/iscsi_dbg.h +++ b/iscsi-scst/kernel/iscsi_dbg.h @@ -16,6 +16,10 @@ #ifndef ISCSI_DBG_H #define ISCSI_DBG_H +#ifdef LOG_PREFIX +#undef LOG_PREFIX +#endif + #define LOG_PREFIX "iscsi-scst" #ifdef INSIDE_KERNEL_TREE @@ -53,8 +57,10 @@ extern unsigned long iscsi_get_flow_ctrl_or_mgmt_dbg_log_flag( #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING) extern unsigned long iscsi_trace_flag; +#ifndef trace_flag #define trace_flag iscsi_trace_flag #endif +#endif #define TRACE_CONN_CLOSE(args...) TRACE_DBG_FLAG(TRACE_DEBUG|TRACE_CONN_OC, args) #define TRACE_CONN_CLOSE_DBG(args...) TRACE(TRACE_CONN_OC_DBG, args) diff --git a/iscsi-scst/kernel/iscsit_transport.c b/iscsi-scst/kernel/iscsit_transport.c new file mode 100644 index 00000000..5e3c44ca --- /dev/null +++ b/iscsi-scst/kernel/iscsit_transport.c @@ -0,0 +1,64 @@ + +#include +#include "iscsit_transport.h" +#include "iscsi.h" + +static LIST_HEAD(transport_list); +static DEFINE_MUTEX(transport_mutex); + +static struct iscsit_transport *__iscsit_get_transport(enum iscsit_transport_type type) +{ + struct iscsit_transport *t; + + list_for_each_entry(t, &transport_list, transport_list_entry) { + if (t->transport_type == type) + return t; + } + + return NULL; +} + +struct iscsit_transport *iscsit_get_transport(enum iscsit_transport_type type) +{ + struct iscsit_transport *t; + + mutex_lock(&transport_mutex); + t = __iscsit_get_transport(type); + mutex_unlock(&transport_mutex); + + return t; +} + +int iscsit_reg_transport(struct iscsit_transport *t) +{ + struct iscsit_transport *tmp; + int ret = 0; + + INIT_LIST_HEAD(&t->transport_list_entry); + + mutex_lock(&transport_mutex); + tmp = __iscsit_get_transport(t->transport_type); + if (tmp) { + PRINT_ERROR("Unable to register transport type %d - Already registered\n", + t->transport_type); + ret = -EEXIST; + } else { + list_add_tail(&t->transport_list_entry, &transport_list); + PRINT_INFO("Registered iSCSI transport: %s\n", t->name); + } + mutex_unlock(&transport_mutex); + + return ret; +} +EXPORT_SYMBOL(iscsit_reg_transport); + +void iscsit_unreg_transport(struct iscsit_transport *t) +{ + mutex_lock(&transport_mutex); + list_del(&t->transport_list_entry); + mutex_unlock(&transport_mutex); + + PRINT_INFO("Unregistered iSCSI transport: %s\n", t->name); +} +EXPORT_SYMBOL(iscsit_unreg_transport); + diff --git a/iscsi-scst/kernel/isert-scst/Kconfig b/iscsi-scst/kernel/isert-scst/Kconfig new file mode 100644 index 00000000..99ff7a97 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/Kconfig @@ -0,0 +1,8 @@ +config SCST_ISER + tristate "ISCSI Target" + depends on SCST && SCST_ISCSI + default SCST + help + ISER target driver for SCST framework. The iSCSI iSER extension + has been defined in RFC 5046. + diff --git a/iscsi-scst/kernel/isert-scst/Makefile b/iscsi-scst/kernel/isert-scst/Makefile new file mode 100644 index 00000000..b2a08e8c --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/Makefile @@ -0,0 +1,40 @@ +# +# Makefile for the kernel part of iSER-SCST. +# +# Copyright (C) 2007 - 2014 Vladislav Bolkhovitin +# Copyright (C) 2007 - 2014 Fusion-io, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, version 2 +# of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# Note! Dependencies are done automatically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile. + +cc-option = $(shell if $(CC) $(CFLAGS) $(1) -S -o /dev/null -xc /dev/null \ + > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi ;) +enable-Wextra = $(shell uname_r="$$(uname -r)"; if [ "$${uname_r%.el5}" = "$${uname_r}" ]; then echo "$(1)"; fi) + +LINUXINCLUDE := $(PRE_CFLAGS) $(LINUXINCLUDE) +EXTRA_CFLAGS += -I$(src)/../../include -I$(src)/../ -I$(SCST_INC_DIR) +EXTRA_CFLAGS += $(call enable-Wextra,-Wextra \ + $(call cc-option,-Wno-old-style-declaration) \ + -Wno-unused-parameter -Wno-missing-field-initializers) + +EXTRA_CFLAGS += -DCONFIG_SCST_EXTRACHECKS +#EXTRA_CFLAGS += -DCONFIG_SCST_TRACING +EXTRA_CFLAGS += -DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions + +obj-m += isert-scst.o +isert-scst-objs := isert.o isert_login.o \ + iser_datamover.o iser_rdma.o iser_buf.o iser_pdu.o iser_global.o + diff --git a/iscsi-scst/kernel/isert-scst/Makefile.in-kernel b/iscsi-scst/kernel/isert-scst/Makefile.in-kernel new file mode 100644 index 00000000..e6597007 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/Makefile.in-kernel @@ -0,0 +1,4 @@ +isert-scst-y := isert.o isert_login.o \ + iser_datamover.o iser_rdma.o iser_buf.o iser_pdu.o iser_global.o + +obj-$(CONFIG_SCST_ISER) += isert-scst.o diff --git a/iscsi-scst/kernel/isert-scst/TODO b/iscsi-scst/kernel/isert-scst/TODO new file mode 100644 index 00000000..479c6b76 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/TODO @@ -0,0 +1,9 @@ +* Add suppport for immediate data in iSER +* Add suppport for data-out in iSER +* Look into allocating wr and sg entries dynamically from kmem_cache instead of embedding them into iser_cmnd +* Look into seperating between RX pdu and TX pdu +* Do not signal every "response sent" notification +* Make the code NUMA aware +* Add support for AHS +* Add support for bidi commands + diff --git a/iscsi-scst/kernel/isert-scst/iser.h b/iscsi-scst/kernel/isert-scst/iser.h new file mode 100644 index 00000000..c01e6af6 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/iser.h @@ -0,0 +1,360 @@ +/* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#ifndef __ISER_H__ +#define __ISER_H__ + +#include +#include +#include +#include +#include + +#include "iser_hdr.h" + +enum isert_portal_state { + ISERT_PORTAL_ACTIVE, + ISERT_PORTAL_INACTIVE +}; + +struct isert_portal { + struct rdma_cm_id *cm_id; + struct sockaddr_storage addr; + struct list_head list_node; /* in portals list */ + /* protected by dev_list_mutex */ + struct list_head conn_list; /* head of conns list */ + enum isert_portal_state state; +}; + +struct isert_buf { + int sg_cnt ____cacheline_aligned; + struct scatterlist *sg; + u8 *addr; + size_t size; + enum dma_data_direction dma_dir; + unsigned int is_alloced:1; + unsigned int is_pgalloced:1; + unsigned int is_malloced:1; +}; + +enum isert_wr_op { + ISER_WR_RECV, + ISER_WR_SEND, + ISER_WR_RDMA_WRITE, + ISER_WR_RDMA_READ, +}; + +struct isert_device; +struct isert_connection; + +struct isert_wr { + enum isert_wr_op wr_op; + struct isert_buf *buf; + + struct isert_connection *conn; + struct isert_cmnd *pdu; + + struct isert_device *isert_dev; + + struct ib_sge *sge_list; + union { + struct ib_recv_wr recv_wr; + struct ib_send_wr send_wr; + }; +} ____cacheline_aligned; + +#define ISER_MAX_SGE 128 +#define ISER_MAX_RDMAS 5 + +#define ISER_SQ_SIZE 128 +#define ISER_MAX_WCE 2048 + +#define ISER_MIN_SQ_SIZE 16 + +struct isert_cmnd { + struct iscsi_cmnd iscsi ____cacheline_aligned; + + struct isert_buf buf; + struct isert_buf rdma_buf; + struct isert_wr wr[ISER_MAX_RDMAS]; + struct ib_sge sg_pool[ISER_MAX_SGE]; + + struct isert_hdr *isert_hdr ____cacheline_aligned; + struct iscsi_hdr *bhs; + void *ahs; + void *data; + + u8 isert_opcode; + u8 iscsi_opcode; + u8 is_rstag_valid; + u8 is_wstag_valid; + + u32 rem_write_stag; /* write rkey */ + u64 rem_write_va; + u32 rem_read_stag; /* read rkey */ + u64 rem_read_va; + + int is_fake_rx; + struct list_head pool_node; /* pool list */ +}; + +enum isert_conn_state { + ISER_CONN_INIT = 0, + ISER_CONN_HANDSHAKE, + ISER_CONN_ACTIVE, + ISER_CONN_CLOSING, +}; + +struct isert_cq { + struct ib_cq *cq ____cacheline_aligned; + struct ib_wc wc[ISER_SQ_SIZE]; + struct isert_device *dev; + struct workqueue_struct *cq_workqueue; + struct work_struct cq_comp_work; + int idx; +}; + +#define ISERT_CONNECTION_ABORTED 0 +#define ISERT_DRAIN_POSTED 1 +#define ISERT_DRAIN_FAILED 2 + +struct isert_connection { + struct iscsi_conn iscsi ____cacheline_aligned; + + int repost_threshold ____cacheline_aligned; + /* access to the following 3 fields is guarded by post_recv_lock */ + int to_post_recv; + struct isert_wr *post_recv_first; + struct isert_wr *post_recv_curr; + + spinlock_t post_recv_lock; + + + spinlock_t tx_lock ____cacheline_aligned; + + /* Following two protected by tx_lock */ + struct list_head tx_free_list; + struct list_head tx_busy_list; + + struct rdma_cm_id *cm_id; + struct isert_device *isert_dev; + struct ib_qp *qp; + struct isert_cq *cq_desc; + + enum isert_conn_state state; + + u32 responder_resources; + u32 initiator_depth; + u32 max_sge; + + /* + * Unprotected. Accessed only before login response is sent and when + * freeing connection + */ + struct list_head rx_buf_list; + + struct isert_cmnd *login_req_pdu; + struct isert_cmnd *login_rsp_pdu; + struct isert_wr *saved_wr; + + int queue_depth; + int immediate_data; + unsigned int target_recv_data_length; + int initiator_recv_data_length; + int initial_r2t; + unsigned int first_burst_length; + struct sockaddr_storage peer_addr; + size_t peer_addrsz; + struct sockaddr_storage self_addr; + + struct list_head portal_node; + + unsigned long flags; + struct work_struct close_work; + struct work_struct drain_work; + struct isert_wr drain_wr; + struct kref kref; + + struct isert_portal *portal; + void *priv_data; /* for connection tracking */ +}; + +struct isert_device { + struct ib_device *ib_dev; + struct ib_pd *pd; + struct ib_mr *mr; + + struct list_head devs_node; + /* conn_list and refcnt protected by dev_list_mutex */ + struct list_head conn_list; + int refcnt; + struct ib_device_attr device_attr; + + int num_cqs; + int *cq_qps; + struct isert_cq *cq_desc; +}; + +struct isert_global { + spinlock_t portal_lock; + /* protected by portal_lock */ + struct list_head portal_list; + /* protected by dev_list_mutex */ + struct list_head dev_list; + struct workqueue_struct *conn_wq; +}; + +#define _ptr_to_u64(p) (u64)(unsigned long)(p) +#define _u64_to_ptr(v) (void *)(unsigned long)(v) + +/* global iser scope */ +int isert_global_init(void); +int isert_datamover_cleanup(void); + +void isert_portal_list_add(struct isert_portal *portal); +void isert_portal_list_remove(struct isert_portal *portal); + +void isert_dev_list_add(struct isert_device *isert_dev); +void isert_dev_list_remove(struct isert_device *isert_dev); +struct isert_device *isert_device_find(struct ib_device *ib_dev); + +void isert_conn_queue_work(struct work_struct *w); + +extern struct kmem_cache *isert_cmnd_cache; +extern struct kmem_cache *isert_conn_cache; + +/* iser portal */ +struct isert_portal *isert_portal_create(void); +int isert_portal_listen(struct isert_portal *portal, + struct sockaddr *sa, + size_t addr_len); +void isert_portal_release(struct isert_portal *portal); +void isert_portal_list_release_all(void); +struct isert_portal *isert_portal_start(struct sockaddr *sa, size_t addr_len); + +/* iser connection */ +int isert_post_recv(struct isert_connection *isert_conn, + struct isert_wr *first_wr, int num_wr); +int isert_post_send(struct isert_connection *isert_conn, + struct isert_wr *first_wr, int num_wr); + +int isert_alloc_conn_resources(struct isert_connection *isert_conn); +void isert_free_conn_resources(struct isert_connection *isert_conn); +void isert_conn_free(struct isert_connection *isert_conn); +void isert_conn_disconnect(struct isert_connection *isert_conn); + +static inline struct isert_connection *isert_conn_alloc(void) +{ + return kmem_cache_zalloc(isert_conn_cache, GFP_KERNEL); +} + +static inline void isert_conn_kfree(struct isert_connection *isert_conn) +{ + kmem_cache_free(isert_conn_cache, isert_conn); +} + +/* iser buf */ +int isert_buf_alloc_data_buf(struct ib_device *ib_dev, + struct isert_buf *isert_buf, size_t size, + enum dma_data_direction dma_dir); +void isert_wr_set_fields(struct isert_wr *wr, + struct isert_connection *isert_conn, + struct isert_cmnd *pdu); +int isert_wr_init(struct isert_wr *wr, + enum isert_wr_op wr_op, + struct isert_buf *isert_buf, + struct isert_connection *isert_conn, + struct isert_cmnd *pdu, + struct ib_sge *sge, + int sg_offset, + int sg_cnt, + int buff_offset); +void isert_wr_release(struct isert_wr *wr); + +void isert_buf_release(struct isert_buf *isert_buf); + +static inline void isert_buf_init_sg(struct isert_buf *isert_buf, + struct scatterlist *sg, + int sg_cnt, size_t size) +{ + isert_buf->sg_cnt = sg_cnt; + isert_buf->sg = sg; + isert_buf->size = size; +} + +/* iser pdu */ +static inline struct isert_cmnd *isert_pdu_alloc(void) +{ + return kmem_cache_zalloc(isert_cmnd_cache, GFP_KERNEL); +} + +static inline void isert_pdu_kfree(struct isert_cmnd *cmnd) +{ + kmem_cache_free(isert_cmnd_cache, cmnd); +} + +struct isert_cmnd *isert_rx_pdu_alloc(struct isert_connection *isert_conn, + size_t size); +struct isert_cmnd *isert_tx_pdu_alloc(struct isert_connection *isert_conn, + size_t size); +void isert_tx_pdu_init(struct isert_cmnd *isert_pdu, + struct isert_connection *isert_conn); +int isert_pdu_send(struct isert_connection *isert_conn, + struct isert_cmnd *tx_pdu); + +int isert_prepare_rdma(struct isert_cmnd *isert_pdu, + struct isert_connection *isert_conn, + enum isert_wr_op op); +int isert_pdu_post_rdma_write(struct isert_connection *isert_conn, + struct isert_cmnd *isert_cmd, + struct isert_cmnd *isert_rsp, + int wr_cnt); +int isert_pdu_post_rdma_read(struct isert_connection *isert_conn, + struct isert_cmnd *isert_cmd, + int wr_cnt); + +void isert_pdu_free(struct isert_cmnd *pdu); +int isert_rx_pdu_done(struct isert_cmnd *pdu); + +void isert_tx_pdu_convert_from_iscsi(struct isert_cmnd *isert_cmnd, + struct iscsi_cmnd *iscsi_cmnd); + +void isert_tx_pdu_init_iscsi(struct isert_cmnd *isert_pdu); + +/* global */ +void isert_global_cleanup(void); +int isert_get_addr_size(struct sockaddr *sa, size_t *size); + +#endif diff --git a/iscsi-scst/kernel/isert-scst/iser_buf.c b/iscsi-scst/kernel/isert-scst/iser_buf.c new file mode 100644 index 00000000..39aaff73 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/iser_buf.c @@ -0,0 +1,306 @@ +/* +* isert_buf.c +* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#include + +#include "iser.h" + +static int isert_buf_alloc_pg(struct ib_device *ib_dev, + struct isert_buf *isert_buf, size_t size, + enum dma_data_direction dma_dir) +{ + int res = 0; + int i; + struct page *page; + + isert_buf->sg_cnt = DIV_ROUND_UP(size, PAGE_SIZE); + isert_buf->sg = kmalloc(sizeof(*isert_buf->sg) * isert_buf->sg_cnt, + GFP_KERNEL); + if (unlikely(!isert_buf->sg)) { + pr_err("Failed to allocate buffer SG\n"); + res = -ENOMEM; + goto out; + } + + sg_init_table(isert_buf->sg, isert_buf->sg_cnt); + for (i = 0; i < isert_buf->sg_cnt; ++i) { + size_t page_len = min_t(size_t, size, PAGE_SIZE); + + page = alloc_page(GFP_KERNEL); + if (unlikely(!page)) { + pr_err("Failed to allocate page\n"); + res = -ENOMEM; + goto out_map_failed; + } + sg_set_page(&isert_buf->sg[i], page, page_len, 0); + size -= page_len; + } + + res = ib_dma_map_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, dma_dir); + if (unlikely(!res)) { + --i; /* do not overrun isert_buf->sg */ + pr_err("Failed to DMA map iser sg:%p len:%d\n", + isert_buf->sg, isert_buf->sg_cnt); + res = -ENOMEM; + goto out_map_failed; + } + + isert_buf->addr = sg_virt(&isert_buf->sg[0]); + + res = 0; + goto out; + +out_map_failed: + for (; i >= 0; --i) + __free_page(sg_page(&isert_buf->sg[i])); + kfree(isert_buf->sg); + isert_buf->sg = NULL; +out: + return res; +} + +static void isert_buf_release_pg(struct isert_buf *isert_buf) +{ + int i; + + for (i = 0; i < isert_buf->sg_cnt; ++i) + __free_page(sg_page(&isert_buf->sg[i])); +} + +static int isert_buf_malloc(struct ib_device *ib_dev, + struct isert_buf *isert_buf, size_t size, + enum dma_data_direction dma_dir) +{ + int res = 0; + + isert_buf->sg_cnt = 1; + isert_buf->sg = kmalloc(sizeof(isert_buf->sg[0]), GFP_KERNEL); + if (unlikely(!isert_buf->sg)) { + pr_err("Failed to allocate buffer SG\n"); + res = -ENOMEM; + goto out; + } + + isert_buf->addr = kmalloc(size, GFP_KERNEL); + if (unlikely(!isert_buf->addr)) { + pr_err("Failed to allocate data buffer\n"); + res = -ENOMEM; + goto data_malloc_failed; + } + + sg_init_one(&isert_buf->sg[0], isert_buf->addr, size); + + res = ib_dma_map_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, dma_dir); + if (unlikely(!res)) { + pr_err("Failed to DMA map iser sg:%p len:%d\n", + isert_buf->sg, isert_buf->sg_cnt); + res = -ENOMEM; + goto out_map_failed; + } + + res = 0; + goto out; + +out_map_failed: + kfree(isert_buf->addr); + isert_buf->addr = NULL; +data_malloc_failed: + kfree(isert_buf->addr); + isert_buf->addr = NULL; +out: + return res; +} + +static void isert_buf_release_kmalloc(struct isert_buf *isert_buf) +{ + kfree(isert_buf->addr); + isert_buf->addr = NULL; +} + +int isert_buf_alloc_data_buf(struct ib_device *ib_dev, + struct isert_buf *isert_buf, size_t size, + enum dma_data_direction dma_dir) +{ + int res = 0; + + isert_buf->is_alloced = 0; + if (size >= PAGE_SIZE) { + res = isert_buf_alloc_pg(ib_dev, isert_buf, size, dma_dir); + if (unlikely(res)) + goto out; + isert_buf->is_pgalloced = 1; + isert_buf->is_malloced = 0; + isert_buf->is_alloced = 1; + } else if (size) { + res = isert_buf_malloc(ib_dev, isert_buf, size, dma_dir); + if (unlikely(res)) + goto out; + isert_buf->is_pgalloced = 0; + isert_buf->is_malloced = 1; + isert_buf->is_alloced = 1; + } + + isert_buf->size = size; + isert_buf->dma_dir = dma_dir; +out: + return res; +} + +void isert_buf_release(struct isert_buf *isert_buf) +{ + if (isert_buf->is_alloced) { + if (isert_buf->is_pgalloced) + isert_buf_release_pg(isert_buf); + + if (isert_buf->is_malloced) + isert_buf_release_kmalloc(isert_buf); + + isert_buf->is_alloced = 0; + kfree(isert_buf->sg); + isert_buf->sg = NULL; + } +} + +void isert_wr_set_fields(struct isert_wr *wr, + struct isert_connection *isert_conn, + struct isert_cmnd *pdu) +{ + struct isert_device *isert_dev = isert_conn->isert_dev; + + wr->conn = isert_conn; + wr->pdu = pdu; + wr->isert_dev = isert_dev; +} + +int isert_wr_init(struct isert_wr *wr, + enum isert_wr_op wr_op, + struct isert_buf *isert_buf, + struct isert_connection *isert_conn, + struct isert_cmnd *pdu, + struct ib_sge *sge, + int sg_offset, + int sg_cnt, + int buff_offset) +{ + enum ib_wr_opcode send_wr_op = IB_WR_SEND; + struct scatterlist *sg_tmp; + int i; + + TRACE_ENTRY(); + + switch (wr_op) { + case ISER_WR_RECV: + case ISER_WR_SEND: + break; + case ISER_WR_RDMA_READ: + send_wr_op = IB_WR_RDMA_READ; + if (unlikely(!pdu->is_wstag_valid)) { + pr_err("No write tag/va specified for RDMA op\n"); + isert_buf_release(isert_buf); + buff_offset = -EFAULT; + goto out; + } + wr->send_wr.wr.rdma.remote_addr = pdu->rem_write_va + + buff_offset; + wr->send_wr.wr.rdma.rkey = pdu->rem_write_stag; + break; + case ISER_WR_RDMA_WRITE: + send_wr_op = IB_WR_RDMA_WRITE; + if (unlikely(!pdu->is_rstag_valid)) { + pr_err("No read tag/va specified for RDMA op\n"); + isert_buf_release(isert_buf); + buff_offset = -EFAULT; + goto out; + } + wr->send_wr.wr.rdma.remote_addr = pdu->rem_read_va + + buff_offset; + wr->send_wr.wr.rdma.rkey = pdu->rem_read_stag; + break; + default: + BUG(); + } + + EXTRACHECKS_BUG_ON(isert_buf->sg_cnt == 0); + + wr->wr_op = wr_op; + wr->buf = isert_buf; + + wr->sge_list = sge + sg_offset; + + sg_tmp = &isert_buf->sg[sg_offset]; + for (i = 0; i < sg_cnt; i++, sg_tmp++) { + wr->sge_list[i].addr = sg_dma_address(sg_tmp); + wr->sge_list[i].length = sg_dma_len(sg_tmp); + buff_offset += wr->sge_list[i].length; + } + + if (wr_op == ISER_WR_RECV) { + wr->recv_wr.next = NULL; + wr->recv_wr.wr_id = _ptr_to_u64(wr); + wr->recv_wr.sg_list = wr->sge_list; + wr->recv_wr.num_sge = sg_cnt; + } else { + wr->send_wr.next = NULL; + wr->send_wr.wr_id = _ptr_to_u64(wr); + wr->send_wr.sg_list = wr->sge_list; + wr->send_wr.num_sge = sg_cnt; + wr->send_wr.opcode = send_wr_op; + wr->send_wr.send_flags = IB_SEND_SIGNALED; + } + +out: + TRACE_EXIT_RES(buff_offset); + return buff_offset; +} + +void isert_wr_release(struct isert_wr *wr) +{ + struct isert_buf *isert_buf = wr->buf; + if (isert_buf && isert_buf->is_alloced) { + struct isert_device *isert_dev = wr->isert_dev; + struct ib_device *ib_dev; + + if (isert_buf->sg_cnt) { + ib_dev = isert_dev->ib_dev; + ib_dma_unmap_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, + isert_buf->dma_dir); + } + isert_buf_release(isert_buf); + } + memset(wr, 0, sizeof(*wr)); +} + diff --git a/iscsi-scst/kernel/isert-scst/iser_datamover.c b/iscsi-scst/kernel/isert-scst/iser_datamover.c new file mode 100644 index 00000000..7c6809b6 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/iser_datamover.c @@ -0,0 +1,282 @@ +/* +* isert_datamover.c +* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#include + +#include "iser.h" +#include "iser_datamover.h" + +int isert_datamover_init(void) +{ + int err; + + err = isert_global_init(); + if (unlikely(err)) { + pr_err("iser datamover init failed, err:%d\n", err); + return err; + } + return 0; +} + +int isert_datamover_cleanup(void) +{ + isert_global_cleanup(); + return 0; +} + +int isert_get_peer_addr(struct iscsi_conn *iscsi_conn, struct sockaddr *sa, + size_t *addr_len) +{ + int ret; + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + struct sockaddr *peer_sa = (struct sockaddr *)&isert_conn->peer_addr; + + ret = isert_get_addr_size(peer_sa, addr_len); + if (unlikely(ret)) + goto out; + + memcpy(sa, peer_sa, *addr_len); +out: + return ret; +} + +int isert_get_target_addr(struct iscsi_conn *iscsi_conn, struct sockaddr *sa, + size_t *addr_len) +{ + int ret; + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + struct sockaddr *self_sa = (struct sockaddr *)&isert_conn->self_addr; + + ret = isert_get_addr_size(self_sa, addr_len); + if (unlikely(ret)) + goto out; + + memcpy(sa, self_sa, *addr_len); +out: + return ret; +} + +void *isert_portal_add(struct sockaddr *saddr, size_t addr_len) +{ + return isert_portal_start(saddr, addr_len); +} + +int isert_portal_remove(void *portal_h) +{ + struct isert_portal *portal = portal_h; + + isert_portal_release(portal); + return 0; +} + +void isert_free_connection(struct iscsi_conn *iscsi_conn) +{ + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + isert_conn_free(isert_conn); +} + +struct iscsi_cmnd *isert_alloc_login_rsp_pdu(struct iscsi_conn *iscsi_conn) +{ + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + struct isert_cmnd *isert_pdu = isert_conn->login_rsp_pdu; + + isert_tx_pdu_init(isert_pdu, isert_conn); + return &isert_pdu->iscsi; +} + +static struct iscsi_cmnd *isert_alloc_scsi_pdu(struct iscsi_conn *iscsi_conn, + int fake) +{ + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + struct isert_cmnd *isert_pdu; + + spin_lock(&isert_conn->tx_lock); + isert_pdu = list_first_entry(&isert_conn->tx_free_list, + struct isert_cmnd, pool_node); + list_move(&isert_pdu->pool_node, &isert_conn->tx_busy_list); + spin_unlock(&isert_conn->tx_lock); + + isert_pdu->is_fake_rx = fake; + return &isert_pdu->iscsi; +} + +struct iscsi_cmnd *isert_alloc_scsi_rsp_pdu(struct iscsi_conn *iscsi_conn) +{ + return isert_alloc_scsi_pdu(iscsi_conn, 0); +} + +struct iscsi_cmnd *isert_alloc_scsi_fake_pdu(struct iscsi_conn *iscsi_conn) +{ + return isert_alloc_scsi_pdu(iscsi_conn, 1); +} + +void isert_release_tx_pdu(struct iscsi_cmnd *iscsi_pdu) +{ + struct isert_cmnd *isert_pdu = (struct isert_cmnd *)iscsi_pdu; + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_pdu->conn; + + isert_tx_pdu_init_iscsi(isert_pdu); + + spin_lock(&isert_conn->tx_lock); + list_move(&isert_pdu->pool_node, &isert_conn->tx_free_list); + spin_unlock(&isert_conn->tx_lock); +} + +void isert_release_rx_pdu(struct iscsi_cmnd *iscsi_pdu) +{ + struct isert_cmnd *isert_pdu = (struct isert_cmnd *)iscsi_pdu; + + if (likely(!isert_pdu->is_fake_rx)) + isert_rx_pdu_done(isert_pdu); +} + +/* if last transition into FF (Fully Featured) state */ +int isert_login_rsp_tx(struct iscsi_cmnd *login_rsp, int last, int discovery) +{ + struct isert_connection *isert_conn = (struct isert_connection *)login_rsp->conn; + int err; + + if (last && !discovery) { + err = isert_alloc_conn_resources(isert_conn); + if (unlikely(err)) { + pr_err("Failed to init conn resources\n"); + return err; + } + isert_pdu_free(isert_conn->login_req_pdu); + isert_conn->login_req_pdu = NULL; + } else { + err = isert_post_recv(isert_conn, + &isert_conn->login_req_pdu->wr[0], + 1); + if (unlikely(err)) { + pr_err("Failed to post recv login req rx buf, err:%d\n", err); + return err; + } + } + + return isert_pdu_tx(login_rsp); +} + +int isert_set_session_params(struct iscsi_conn *iscsi_conn, + struct iscsi_sess_params *sess_params, + struct iscsi_tgt_params *tgt_params) +{ + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + + isert_conn->queue_depth = tgt_params->queued_cmnds; + + isert_conn->immediate_data = sess_params->immediate_data; + isert_conn->target_recv_data_length = sess_params->target_recv_data_length; + isert_conn->initial_r2t = sess_params->initial_r2t; + isert_conn->first_burst_length = sess_params->first_burst_length; + isert_conn->initiator_recv_data_length = sess_params->initiator_recv_data_length; + + return 0; +} + +int isert_pdu_tx(struct iscsi_cmnd *iscsi_cmnd) +{ + struct isert_cmnd *isert_cmnd = (struct isert_cmnd *)iscsi_cmnd; + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_cmnd->conn; + int err; + + isert_tx_pdu_convert_from_iscsi(isert_cmnd, iscsi_cmnd); + err = isert_pdu_send(isert_conn, isert_cmnd); + + return err; +} + +int isert_request_data_out(struct iscsi_cmnd *iscsi_cmnd) +{ + struct isert_cmnd *isert_cmnd = (struct isert_cmnd *)iscsi_cmnd; + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_cmnd->conn; + int ret; + + ret = isert_prepare_rdma(isert_cmnd, isert_conn, ISER_WR_RDMA_READ); + if (unlikely(ret < 0)) + return ret; + + ret = isert_pdu_post_rdma_read(isert_conn, isert_cmnd, ret); + + return ret; +} + +int isert_send_data_in(struct iscsi_cmnd *iscsi_cmnd, + struct iscsi_cmnd *iscsi_rsp) +{ + struct isert_cmnd *isert_cmnd = (struct isert_cmnd *)iscsi_cmnd; + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_cmnd->conn; + struct isert_cmnd *isert_rsp = (struct isert_cmnd *)iscsi_rsp; + int ret; + + ret = isert_prepare_rdma(isert_cmnd, isert_conn, ISER_WR_RDMA_WRITE); + if (unlikely(ret < 0)) + return ret; + + isert_tx_pdu_convert_from_iscsi(isert_rsp, iscsi_rsp); + ret = isert_pdu_post_rdma_write(isert_conn, isert_cmnd, isert_rsp, ret); + + return ret; +} + +int isert_close_connection(struct iscsi_conn *iscsi_conn) +{ + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + + isert_conn_disconnect(isert_conn); + + return 0; +} + +int isert_task_abort(struct iscsi_cmnd *cmnd) +{ + return 0; +} + +void *isert_get_priv(struct iscsi_conn *iscsi_conn) +{ + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + + return isert_conn->priv_data; +} + +void isert_set_priv(struct iscsi_conn *iscsi_conn, void *priv) +{ + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + + isert_conn->priv_data = priv; +} diff --git a/iscsi-scst/kernel/isert-scst/iser_datamover.h b/iscsi-scst/kernel/isert-scst/iser_datamover.h new file mode 100644 index 00000000..8beda2cd --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/iser_datamover.h @@ -0,0 +1,94 @@ +/* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#ifndef __ISER_DATAMOVER_H__ +#define __ISER_DATAMOVER_H__ + +#include "iscsi.h" + +/* iscsi layer calling iser */ +int isert_datamover_init(void); +int isert_datamover_cleanup(void); + +void *isert_portal_add(struct sockaddr *sa, size_t addr_len); +int isert_portal_remove(void *portal_h); + +struct iscsi_cmnd *isert_alloc_login_rsp_pdu(struct iscsi_conn *iscsi_conn); + +int isert_get_peer_addr(struct iscsi_conn *iscsi_conn, struct sockaddr *sa, + size_t *addr_len); + +int isert_get_target_addr(struct iscsi_conn *iscsi_conn, struct sockaddr *sa, + size_t *addr_len); + + /* last: if last transition into FF (Fully Featured) state */ +int isert_login_rsp_tx(struct iscsi_cmnd *login_rsp, + int last, int discovery); +int isert_set_session_params(struct iscsi_conn *iscsi_conn, + struct iscsi_sess_params *sess_params, + struct iscsi_tgt_params *tgt_params); + +struct iscsi_cmnd *isert_alloc_scsi_rsp_pdu(struct iscsi_conn *iscsi_conn); +struct iscsi_cmnd *isert_alloc_scsi_fake_pdu(struct iscsi_conn *iscsi_conn); + +int isert_pdu_tx(struct iscsi_cmnd *pdu); + +int isert_request_data_out(struct iscsi_cmnd *cmd); +int isert_send_data_in(struct iscsi_cmnd *cmd, struct iscsi_cmnd *rsp); +int isert_send_status(struct iscsi_cmnd *rsp); + +int isert_close_connection(struct iscsi_conn *iscsi_conn); +int isert_task_abort(struct iscsi_cmnd *cmnd); +void isert_free_connection(struct iscsi_conn *iscsi_conn); + +void isert_release_tx_pdu(struct iscsi_cmnd *iscsi_pdu); +void isert_release_rx_pdu(struct iscsi_cmnd *cmnd); + +/* iser calling iscsi layer */ +int isert_conn_established(struct iscsi_conn *iscsi_conn, + struct sockaddr *from_addr, int addr_len); +int isert_login_req_rx(struct iscsi_cmnd *login_req); +int isert_pdu_rx(struct iscsi_cmnd *pdu); +int isert_data_out_ready(struct iscsi_cmnd *cmd); +int isert_data_in_sent(struct iscsi_cmnd *cmd); +int isert_pdu_sent(struct iscsi_cmnd *pdu); +void isert_pdu_err(struct iscsi_cmnd *pdu); + +int isert_connection_closed(struct iscsi_conn *iscsi_conn); + +void *isert_get_priv(struct iscsi_conn *iscsi_conn); +void isert_set_priv(struct iscsi_conn *iscsi_conn, void *priv); + +#endif diff --git a/iscsi-scst/kernel/isert-scst/iser_global.c b/iscsi-scst/kernel/isert-scst/iser_global.c new file mode 100644 index 00000000..71f85cd6 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/iser_global.c @@ -0,0 +1,161 @@ +/* +* isert_global.c +* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#include + +#include "iser.h" + +static struct isert_global isert_glob; + +struct kmem_cache *isert_cmnd_cache; +struct kmem_cache *isert_conn_cache; + +void isert_portal_list_add(struct isert_portal *portal) +{ + spin_lock(&isert_glob.portal_lock); + list_add_tail(&portal->list_node, &isert_glob.portal_list); + spin_unlock(&isert_glob.portal_lock); +} + +void isert_portal_list_remove(struct isert_portal *portal) +{ + spin_lock(&isert_glob.portal_lock); + list_del_init(&portal->list_node); + spin_unlock(&isert_glob.portal_lock); +} + +void isert_dev_list_add(struct isert_device *isert_dev) +{ + list_add_tail(&isert_dev->devs_node, &isert_glob.dev_list); +} + +void isert_dev_list_remove(struct isert_device *isert_dev) +{ + list_del_init(&isert_dev->devs_node); +} + +struct isert_device *isert_device_find(struct ib_device *ib_dev) +{ + struct isert_device *isert_dev; + struct isert_device *res = NULL; + + list_for_each_entry(isert_dev, &isert_glob.dev_list, devs_node) { + if (isert_dev->ib_dev == ib_dev) { + res = isert_dev; + break; + } + } + + return res; +} + +void isert_portal_list_release_all(void) +{ + struct isert_portal *portal, *n; + + list_for_each_entry_safe(portal, n, &isert_glob.portal_list, list_node) + isert_portal_release(portal); +} + +void isert_conn_queue_work(struct work_struct *w) +{ + queue_work(isert_glob.conn_wq, w); +} + +int isert_global_init(void) +{ + INIT_LIST_HEAD(&isert_glob.portal_list); + INIT_LIST_HEAD(&isert_glob.dev_list); + + spin_lock_init(&isert_glob.portal_lock); + + isert_glob.conn_wq = create_workqueue("isert_conn_wq"); + if (!isert_glob.conn_wq) { + pr_err("Failed to alloc iser conn work queue\n"); + return -ENOMEM; + } + + isert_cmnd_cache = KMEM_CACHE(isert_cmnd, + SCST_SLAB_FLAGS|SLAB_HWCACHE_ALIGN); + if (!isert_cmnd_cache) { + destroy_workqueue(isert_glob.conn_wq); + pr_err("Failed to alloc iser command cache\n"); + return -ENOMEM; + } + + isert_conn_cache = KMEM_CACHE(isert_connection, + SCST_SLAB_FLAGS|SLAB_HWCACHE_ALIGN); + if (!isert_conn_cache) { + destroy_workqueue(isert_glob.conn_wq); + kmem_cache_destroy(isert_cmnd_cache); + pr_err("Failed to alloc iser connection cache\n"); + return -ENOMEM; + } + + return 0; +} + +void isert_global_cleanup(void) +{ + isert_portal_list_release_all(); + if (isert_glob.conn_wq) + destroy_workqueue(isert_glob.conn_wq); + if (isert_cmnd_cache) + kmem_cache_destroy(isert_cmnd_cache); + if (isert_conn_cache) + kmem_cache_destroy(isert_conn_cache); +} + +int isert_get_addr_size(struct sockaddr *sa, size_t *addr_len) +{ + int ret = 0; + + switch (sa->sa_family) { + case AF_INET: + *addr_len = sizeof(struct sockaddr_in); + break; + case AF_INET6: + *addr_len = sizeof(struct sockaddr_in6); + break; + default: + pr_err("Unknown address family\n"); + ret = -EINVAL; + goto out; + } +out: + return ret; +} diff --git a/iscsi-scst/kernel/isert-scst/iser_hdr.h b/iscsi-scst/kernel/isert-scst/iser_hdr.h new file mode 100644 index 00000000..4cfd74a9 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/iser_hdr.h @@ -0,0 +1,72 @@ +/* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#ifndef __ISER_HDR_H__ +#define __ISER_HDR_H__ + +#include "iscsi.h" + +#define ISCSI_LOGIN_MAX_RDSL (8 * 1024) + +struct isert_hdr { + u8 flags; + u8 rsvd[3]; + __be32 write_stag; /* write rkey */ + __be64 write_va; + __be32 read_stag; /* read rkey */ + __be64 read_va; +} __packed; + +#define ISER_WSV 0x08 +#define ISER_RSV 0x04 + +#define ISER_ISCSI_CTRL 0x10 +#define ISER_HELLO 0x20 +#define ISER_HELLORPLY 0x30 + +#define ISER_HDRS_SZ (sizeof(struct isert_hdr) + sizeof(struct iscsi_hdr)) + +#define ISER_MAX_LOGIN_RDSL (ISCSI_LOGIN_MAX_RDSL + ISER_HDRS_SZ) + +#define ISER_ZBVA_NOT_SUPPORTED 0x80 +#define ISER_SEND_W_INV_NOT_SUPPORTED 0x40 + +struct isert_cm_hdr { + u8 flags; + u8 rsvd[3]; +} __packed; + +#endif + diff --git a/iscsi-scst/kernel/isert-scst/iser_pdu.c b/iscsi-scst/kernel/isert-scst/iser_pdu.c new file mode 100644 index 00000000..ef4c60e2 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/iser_pdu.c @@ -0,0 +1,572 @@ +/* +* isert_pdu.c +* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#include + +#include "iser.h" +#include "iscsi.h" +#include "iser_datamover.h" + +static inline int isert_pdu_rx_buf_init(struct isert_cmnd *isert_pdu, + struct isert_connection *isert_conn) +{ + struct isert_buf *isert_buf = &isert_pdu->buf; + + return isert_wr_init(&isert_pdu->wr[0], ISER_WR_RECV, isert_buf, + isert_conn, isert_pdu, isert_pdu->sg_pool, + 0, isert_buf->sg_cnt, 0); +} + +static inline int isert_pdu_tx_buf_init(struct isert_cmnd *isert_pdu, + struct isert_connection *isert_conn) +{ + struct isert_buf *isert_buf = &isert_pdu->buf; + + return isert_wr_init(&isert_pdu->wr[0], ISER_WR_SEND, isert_buf, + isert_conn, isert_pdu, isert_pdu->sg_pool, + 0, isert_buf->sg_cnt, 0); +} + +static inline void isert_pdu_set_hdr_plain(struct isert_cmnd *isert_pdu) +{ + struct isert_hdr *isert_hdr = isert_pdu->isert_hdr; + + isert_hdr->flags = ISER_ISCSI_CTRL; + isert_hdr->write_stag = 0; + isert_hdr->write_va = 0; + isert_hdr->read_stag = 0; + isert_hdr->read_va = 0; +} + +/* rx pdu should be initialized to get the posted buffer and + * the associated pointers right; after a pdu is received + * it should be parsed to setup isert_cmnd + iscsi_cmnd in full + */ +static int isert_rx_pdu_init(struct isert_cmnd *isert_pdu, + struct isert_connection *isert_conn) +{ + struct iscsi_cmnd *iscsi_cmnd = &isert_pdu->iscsi; + int err = isert_pdu_rx_buf_init(isert_pdu, isert_conn); + if (unlikely(err < 0)) + return err; + iscsi_cmnd->conn = &isert_conn->iscsi; + return 0; +} + +void isert_tx_pdu_init_iscsi(struct isert_cmnd *isert_pdu) +{ + struct iscsi_cmnd *iscsi_cmnd = &isert_pdu->iscsi; + struct isert_buf *isert_buf = &isert_pdu->buf; + + memset(iscsi_cmnd, 0, sizeof(*iscsi_cmnd)); + + iscsi_cmnd->sg_cnt = isert_buf->sg_cnt; + iscsi_cmnd->sg = isert_buf->sg; + iscsi_cmnd->bufflen = isert_buf->size; +} + +/* tx pdu should set most of the pointers to enable filling out + * of the iscsi pdu struct + */ +void isert_tx_pdu_init(struct isert_cmnd *isert_pdu, + struct isert_connection *isert_conn) +{ + struct iscsi_cmnd *iscsi_cmnd = &isert_pdu->iscsi; + struct isert_buf *isert_buf = &isert_pdu->buf; + void *addr = isert_buf->addr; + struct iscsi_hdr *bhs = (struct iscsi_hdr *)(addr + sizeof(struct isert_hdr)); + + isert_pdu->isert_hdr = (struct isert_hdr *)addr; + isert_pdu->bhs = bhs; + isert_pdu->ahs = NULL; + + isert_tx_pdu_init_iscsi(isert_pdu); + iscsi_cmnd->conn = &isert_conn->iscsi; +} + +void isert_tx_pdu_convert_from_iscsi(struct isert_cmnd *isert_cmnd, + struct iscsi_cmnd *iscsi_cmnd) +{ + struct iscsi_pdu *iscsi_pdu = &iscsi_cmnd->pdu; + + TRACE_ENTRY(); + + memcpy(isert_cmnd->bhs, &iscsi_pdu->bhs, sizeof(*isert_cmnd->bhs)); + if (unlikely(iscsi_pdu->ahssize)) { + isert_cmnd->ahs = isert_cmnd->bhs + 1; + memcpy(isert_cmnd->ahs, iscsi_pdu->ahs, iscsi_pdu->ahssize); + } + +#ifdef CONFIG_SCST_EXTRACHECKS + if (iscsi_cmnd->bufflen) + EXTRACHECKS_BUG_ON(!iscsi_cmnd->sg); +#endif + + TRACE_EXIT(); + return; +} + +static inline int isert_pdu_prepare_send(struct isert_connection *isert_conn, + struct isert_cmnd *tx_pdu) +{ + struct isert_device *isert_dev = isert_conn->isert_dev; + struct ib_sge *sge = tx_pdu->wr[0].sge_list; + size_t to_sync, size; + int sg_cnt = 0; + + size = ISER_HDRS_SZ + tx_pdu->iscsi.pdu.ahssize + + tx_pdu->iscsi.pdu.datasize; + while (size) { + to_sync = size > PAGE_SIZE ? PAGE_SIZE : size; + ib_dma_sync_single_for_device(isert_dev->ib_dev, sge->addr, + to_sync, + DMA_TO_DEVICE); + + sge->length = to_sync; + size -= to_sync; + ++sge; + ++sg_cnt; + } + + return sg_cnt; +} + +static inline void isert_link_send_wrs(struct isert_wr *from_wr, + struct isert_wr *to_wr) +{ + from_wr->send_wr.next = &to_wr->send_wr; + from_wr->send_wr.send_flags = 0; /* not signaled */ + + to_wr->send_wr.next = NULL; + to_wr->send_wr.send_flags = IB_SEND_SIGNALED; +} + +static inline void isert_link_send_pdu_wrs(struct isert_cmnd *from_pdu, + struct isert_cmnd *to_pdu, + int wr_cnt) +{ + isert_link_send_wrs(&from_pdu->wr[wr_cnt - 1], &to_pdu->wr[0]); +} + +int isert_prepare_rdma(struct isert_cmnd *isert_pdu, + struct isert_connection *isert_conn, + enum isert_wr_op op) +{ + struct isert_buf *isert_buf = &isert_pdu->rdma_buf; + struct isert_device *isert_dev = isert_conn->isert_dev; + struct ib_device *ib_dev = isert_dev->ib_dev; + int err; + int buff_offset; + int sg_offset, sg_cnt; + int wr_cnt, i; + + isert_buf_init_sg(isert_buf, isert_pdu->iscsi.sg, + isert_pdu->iscsi.sg_cnt, + isert_pdu->iscsi.bufflen); + + if (op == ISER_WR_RDMA_WRITE) + isert_buf->dma_dir = DMA_TO_DEVICE; + else + isert_buf->dma_dir = DMA_FROM_DEVICE; + + if (unlikely(isert_buf->sg_cnt > ISER_MAX_SGE)) { + pr_err("Scatterlist too large: %d\n", isert_buf->sg_cnt); + wr_cnt = -EOPNOTSUPP; + goto out; + } + + err = ib_dma_map_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, + isert_buf->dma_dir); + if (unlikely(!err)) { + pr_err("Failed to DMA map iser sg:%p len:%d\n", + isert_buf->sg, isert_buf->sg_cnt); + wr_cnt = -EFAULT; + goto out; + } + + buff_offset = 0; + sg_cnt = 0; + for (wr_cnt = 0, sg_offset = 0; sg_offset < isert_buf->sg_cnt; ++wr_cnt) { + sg_cnt = min((int)isert_conn->max_sge, + isert_buf->sg_cnt - sg_offset); + err = isert_wr_init(&isert_pdu->wr[wr_cnt], op, isert_buf, + isert_conn, isert_pdu, isert_pdu->sg_pool, + sg_offset, sg_cnt, buff_offset); + if (unlikely(err < 0)) { + wr_cnt = err; + goto out; + } + buff_offset = err; + sg_offset += sg_cnt; + } + + for (i = 1; i < wr_cnt; ++i) + isert_link_send_wrs(&isert_pdu->wr[i - 1], &isert_pdu->wr[i]); + +out: + TRACE_EXIT_RES(wr_cnt); + return wr_cnt; +} + +void isert_pdu_free(struct isert_cmnd *pdu) +{ + unsigned int i; + + list_del(&pdu->pool_node); + for (i = 0; i < ARRAY_SIZE(pdu->wr); ++i) + isert_wr_release(&pdu->wr[i]); + + isert_pdu_kfree(pdu); +} + +struct isert_cmnd *isert_rx_pdu_alloc(struct isert_connection *isert_conn, + size_t size) +{ + struct isert_cmnd *pdu = NULL; + int err; + unsigned int i; + + TRACE_ENTRY(); + + pdu = isert_pdu_alloc(); + if (unlikely(!pdu)) { + pr_err("Failed to alloc pdu\n"); + goto out; + } + + err = isert_buf_alloc_data_buf(isert_conn->isert_dev->ib_dev, + &pdu->buf, size, DMA_FROM_DEVICE); + if (unlikely(err)) { + pr_err("Failed to alloc rx pdu buf sz:%zd\n", size); + goto buf_alloc_failed; + } + + err = isert_rx_pdu_init(pdu, isert_conn); + if (unlikely(err)) { + pr_err("Failed to init rx pdu wr:%p size:%zd err:%d\n", + &pdu->wr, size, err); + goto pdu_init_failed; + } + + for (i = 0; i < ARRAY_SIZE(pdu->wr); ++i) + isert_wr_set_fields(&pdu->wr[i], isert_conn, pdu); + + for (i = 0; i < ARRAY_SIZE(pdu->sg_pool); ++i) + pdu->sg_pool[i].lkey = isert_conn->isert_dev->mr->lkey; + + list_add_tail(&pdu->pool_node, &isert_conn->rx_buf_list); + + goto out; + +pdu_init_failed: + isert_buf_release(&pdu->buf); +buf_alloc_failed: + isert_pdu_kfree(pdu); + pdu = NULL; +out: + TRACE_EXIT(); + return pdu; +} + +struct isert_cmnd *isert_tx_pdu_alloc(struct isert_connection *isert_conn, + size_t size) +{ + struct isert_cmnd *pdu = NULL; + int err; + unsigned int i; + + TRACE_ENTRY(); + + pdu = isert_pdu_alloc(); + if (unlikely(!pdu)) { + pr_err("Failed to alloc pdu\n"); + goto out; + } + + err = isert_buf_alloc_data_buf(isert_conn->isert_dev->ib_dev, + &pdu->buf, size, DMA_TO_DEVICE); + if (unlikely(err)) { + pr_err("Failed to alloc tx pdu buf sz:%zd\n", size); + goto buf_alloc_failed; + } + + err = isert_pdu_tx_buf_init(pdu, isert_conn); + if (unlikely(err < 0)) { + pr_err("Failed to init tx pdu wr:%p size:%zd err:%d\n", + &pdu->wr, size, err); + goto buf_init_failed; + } + isert_tx_pdu_init(pdu, isert_conn); + + for (i = 0; i < ARRAY_SIZE(pdu->wr); ++i) + isert_wr_set_fields(&pdu->wr[i], isert_conn, pdu); + + for (i = 0; i < ARRAY_SIZE(pdu->sg_pool); ++i) + pdu->sg_pool[i].lkey = isert_conn->isert_dev->mr->lkey; + + isert_pdu_set_hdr_plain(pdu); + + list_add_tail(&pdu->pool_node, &isert_conn->tx_free_list); + + goto out; + +buf_init_failed: + isert_buf_release(&pdu->buf); +buf_alloc_failed: + isert_pdu_kfree(pdu); + pdu = NULL; +out: + TRACE_EXIT(); + return pdu; +} + +static inline void isert_link_recv_wrs(struct isert_wr *from_wr, + struct isert_wr *to_wr) +{ + from_wr->recv_wr.next = &to_wr->recv_wr; + + to_wr->recv_wr.next = NULL; +} + +static inline void isert_link_recv_pdu_wrs(struct isert_cmnd *from_pdu, + struct isert_cmnd *to_pdu) +{ + isert_link_recv_wrs(&from_pdu->wr[0], &to_pdu->wr[0]); +} + +int isert_alloc_conn_resources(struct isert_connection *isert_conn) +{ + struct isert_cmnd *pdu, *prev_pdu = NULL, *first_pdu = NULL; + int t_datasz = 512; /* RFC states that minimum receive data size is 512 */ + int i_datasz = ISER_HDRS_SZ + SCST_SENSE_BUFFERSIZE; + int i, err = 0; + int to_alloc; + + TRACE_ENTRY(); + + isert_conn->repost_threshold = 32; + to_alloc = isert_conn->queue_depth * 2 + isert_conn->repost_threshold; + + if (unlikely(to_alloc > ISER_MAX_WCE)) { + pr_err("QueuedCommands larger than %d not supported\n", + (ISER_MAX_WCE - isert_conn->repost_threshold) / 2); + err = -EINVAL; + goto out; + } + + for (i = 0; i < to_alloc; i++) { + pdu = isert_rx_pdu_alloc(isert_conn, t_datasz); + if (unlikely(!pdu)) { + err = -ENOMEM; + goto clean_pdus; + } + + if (unlikely(first_pdu == NULL)) + first_pdu = pdu; + else + isert_link_recv_pdu_wrs(prev_pdu, pdu); + + prev_pdu = pdu; + + pdu = isert_tx_pdu_alloc(isert_conn, i_datasz); + if (unlikely(!pdu)) { + err = -ENOMEM; + goto clean_pdus; + } + } + + err = isert_post_recv(isert_conn, &first_pdu->wr[0], to_alloc); + if (unlikely(err)) { + pr_err("Failed to post recv err:%d\n", err); + goto clean_pdus; + } + +out: + TRACE_EXIT_RES(err); + return err; + +clean_pdus: + isert_free_conn_resources(isert_conn); + goto out; +} + +static int isert_reinit_rx_pdu(struct isert_cmnd *pdu) +{ + struct isert_connection *isert_conn = (struct isert_connection *)pdu->iscsi.conn; + + pdu->is_rstag_valid = 0; + pdu->is_wstag_valid = 0; + + memset(&pdu->iscsi, 0, sizeof(pdu->iscsi)); + + return isert_rx_pdu_init(pdu, isert_conn); +} + +int isert_rx_pdu_done(struct isert_cmnd *pdu) +{ + int err; + struct isert_connection *isert_conn = (struct isert_connection *)pdu->iscsi.conn; + + TRACE_ENTRY(); + + err = isert_reinit_rx_pdu(pdu); + if (unlikely(err)) + goto out; + + spin_lock(&isert_conn->post_recv_lock); + if (unlikely(isert_conn->to_post_recv == 0)) + isert_conn->post_recv_first = &pdu->wr[0]; + else + isert_link_recv_wrs(isert_conn->post_recv_curr, &pdu->wr[0]); + + isert_conn->post_recv_curr = &pdu->wr[0]; + + if (++isert_conn->to_post_recv > isert_conn->repost_threshold) { + err = isert_post_recv(isert_conn, isert_conn->post_recv_first, + isert_conn->to_post_recv); + isert_conn->to_post_recv = 0; + } + spin_unlock(&isert_conn->post_recv_lock); + +out: + TRACE_EXIT_RES(err); + return err; +} + +void isert_free_conn_resources(struct isert_connection *isert_conn) +{ + struct isert_cmnd *pdu; + + TRACE_ENTRY(); + + if (isert_conn->login_rsp_pdu) { + isert_pdu_free(isert_conn->login_rsp_pdu); + isert_conn->login_rsp_pdu = NULL; + } + if (isert_conn->login_req_pdu) { + isert_pdu_free(isert_conn->login_req_pdu); + isert_conn->login_req_pdu = NULL; + } + + while (!list_empty(&isert_conn->rx_buf_list)) { + pdu = list_first_entry(&isert_conn->rx_buf_list, + struct isert_cmnd, pool_node); + isert_pdu_free(pdu); /* releases buffer as well */ + } + + spin_lock(&isert_conn->tx_lock); + while (!list_empty(&isert_conn->tx_free_list)) { + pdu = list_first_entry(&isert_conn->tx_free_list, + struct isert_cmnd, pool_node); + isert_pdu_free(pdu); /* releases buffer as well */ + } + + while (!list_empty(&isert_conn->tx_busy_list)) { + pdu = list_first_entry(&isert_conn->tx_busy_list, + struct isert_cmnd, pool_node); + isert_pdu_free(pdu); /* releases buffer as well */ + } + spin_unlock(&isert_conn->tx_lock); + + TRACE_EXIT(); +} + +int isert_pdu_send(struct isert_connection *isert_conn, + struct isert_cmnd *tx_pdu) +{ + int err; + struct isert_wr *wr; + + TRACE_ENTRY(); + +#ifdef CONFIG_SCST_EXTRACHECKS + EXTRACHECKS_BUG_ON(!isert_conn); + EXTRACHECKS_BUG_ON(!tx_pdu); +#endif + + wr = &tx_pdu->wr[0]; + wr->send_wr.num_sge = isert_pdu_prepare_send(isert_conn, tx_pdu); + + err = isert_post_send(isert_conn, wr, 1); + if (unlikely(err)) { + pr_err("Failed to send pdu conn:%p pdu:%p err:%d\n", + isert_conn, tx_pdu, err); + } + + TRACE_EXIT_RES(err); + return err; +} + +int isert_pdu_post_rdma_write(struct isert_connection *isert_conn, + struct isert_cmnd *isert_cmd, + struct isert_cmnd *isert_rsp, + int wr_cnt) +{ + int err; + + TRACE_ENTRY(); + + isert_rsp->wr[0].send_wr.num_sge = isert_pdu_prepare_send(isert_conn, + isert_rsp); + isert_link_send_pdu_wrs(isert_cmd, isert_rsp, wr_cnt); + err = isert_post_send(isert_conn, &isert_cmd->wr[0], wr_cnt + 1); + if (unlikely(err)) { + pr_err("Failed to send pdu conn:%p pdu:%p err:%d\n", + isert_conn, isert_cmd, err); + } + + TRACE_EXIT_RES(err); + return err; +} + +int isert_pdu_post_rdma_read(struct isert_connection *isert_conn, + struct isert_cmnd *isert_cmd, int wr_cnt) +{ + int err; + + TRACE_ENTRY(); + + err = isert_post_send(isert_conn, &isert_cmd->wr[0], wr_cnt); + if (unlikely(err)) { + pr_err("Failed to send pdu conn:%p pdu:%p err:%d\n", + isert_conn, isert_cmd, err); + } + + TRACE_EXIT_RES(err); + return err; +} + diff --git a/iscsi-scst/kernel/isert-scst/iser_rdma.c b/iscsi-scst/kernel/isert-scst/iser_rdma.c new file mode 100644 index 00000000..ad0f402b --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/iser_rdma.c @@ -0,0 +1,1708 @@ +/* +* isert_rdma.c +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#include +#include + +#include "iser.h" +#include "iser_datamover.h" + +#define ISER_CQ_ENTRIES (128 * 1024) +#define ISER_LISTEN_BACKLOG 8 + +static DEFINE_MUTEX(dev_list_mutex); + +void isert_portal_free(struct isert_portal *portal); + +static int isert_num_recv_posted_on_err(struct ib_recv_wr *first_ib_wr, + struct ib_recv_wr *bad_wr) +{ + struct ib_recv_wr *wr; + int num_posted = 0; + + for (wr = first_ib_wr; wr != NULL && wr != bad_wr; wr = wr->next) + num_posted++; + + return num_posted; +} + +int isert_post_recv(struct isert_connection *isert_conn, + struct isert_wr *first_wr, + int num_wr) +{ + struct ib_recv_wr *first_ib_wr = &first_wr->recv_wr; + struct ib_recv_wr *bad_wr; + int num_posted; + int err; + + TRACE_ENTRY(); + + err = ib_post_recv(isert_conn->qp, first_ib_wr, &bad_wr); + if (unlikely(err)) { + num_posted = isert_num_recv_posted_on_err(first_ib_wr, bad_wr); + + pr_err("conn:%p recv posted:%d/%d 1st wr_id:0x%llx sz:%d err:%d\n", + isert_conn, num_posted, num_wr, first_ib_wr->wr_id, + first_ib_wr->sg_list->length, err); + } + + TRACE_EXIT_RES(err); + return err; +} + +static int isert_num_send_posted_on_err(struct ib_send_wr *first_ib_wr, + struct ib_send_wr *bad_wr) +{ + struct ib_send_wr *wr; + int num_posted = 0; + + for (wr = first_ib_wr; wr != NULL && wr != bad_wr; wr = wr->next) + num_posted++; + + return num_posted; +} + +int isert_post_send(struct isert_connection *isert_conn, + struct isert_wr *first_wr, + int num_wr) +{ + struct ib_send_wr *first_ib_wr = &first_wr->send_wr; + struct ib_send_wr *bad_wr; + int num_posted; + int err; + + TRACE_ENTRY(); + + err = ib_post_send(isert_conn->qp, first_ib_wr, &bad_wr); + if (unlikely(err)) { + num_posted = isert_num_send_posted_on_err(first_ib_wr, bad_wr); + + pr_err("conn:%p send posted:%d/%d bad wr_id:0x%llx sz:%d num_sge: %d err:%d\n", + isert_conn, num_posted, num_wr, bad_wr->wr_id, + bad_wr->sg_list->length, bad_wr->num_sge, err); + } + + TRACE_EXIT_RES(err); + return err; +} + +void isert_conn_disconnect(struct isert_connection *isert_conn) +{ + struct ib_send_wr *bad_wr; + int err = rdma_disconnect(isert_conn->cm_id); + if (unlikely(err)) + pr_err("Failed to rdma disconnect, err:%d\n", err); + + if (!test_and_set_bit(ISERT_DRAIN_POSTED, &isert_conn->flags)) { + isert_wr_set_fields(&isert_conn->drain_wr, isert_conn, NULL); + isert_conn->drain_wr.wr_op = ISER_WR_SEND; + isert_conn->drain_wr.send_wr.wr_id = _ptr_to_u64(&isert_conn->drain_wr); + isert_conn->drain_wr.send_wr.opcode = IB_WR_SEND; + err = ib_post_send(isert_conn->qp, &isert_conn->drain_wr.send_wr, &bad_wr); + if (unlikely(err)) { + pr_err("Failed to post drain wr, err:%d\n", err); + /* We need to decrement iser_conn->kref in order to be able to cleanup + * the connection */ + set_bit(ISERT_DRAIN_FAILED, &isert_conn->flags); + isert_conn_free(isert_conn); + } + } +} + +static int isert_pdu_handle_hello_req(struct isert_cmnd *pdu) +{ + pr_info("iSER Hello not supported\n"); + return -EINVAL; /* meanwhile disconnect immediately */ +} + +static int isert_pdu_handle_login_req(struct isert_cmnd *isert_pdu) +{ + return isert_login_req_rx(&isert_pdu->iscsi); +} + +static int isert_pdu_handle_text(struct isert_cmnd *pdu) +{ + struct iscsi_cmnd *iscsi_cmnd = &pdu->iscsi; + + iscsi_cmnd->sg_cnt = pdu->buf.sg_cnt; + iscsi_cmnd->sg = pdu->buf.sg; + return isert_login_req_rx(iscsi_cmnd); +} + +static int isert_pdu_handle_nop_out(struct isert_cmnd *pdu) +{ + return isert_pdu_rx(&pdu->iscsi); +} + +static int isert_pdu_handle_scsi_cmd(struct isert_cmnd *pdu) +{ + return isert_pdu_rx(&pdu->iscsi); +} + +static int isert_pdu_handle_tm_func(struct isert_cmnd *pdu) +{ + return isert_pdu_rx(&pdu->iscsi); +} + +static int isert_pdu_handle_data_out(struct isert_cmnd *pdu) +{ + pr_info("iser iscsi data out not supported\n"); + return -EINVAL; /* meanwhile disconnect immediately */ +} + +static int isert_pdu_handle_logout(struct isert_cmnd *pdu) +{ + return isert_pdu_rx(&pdu->iscsi); +} + +static int isert_pdu_handle_snack(struct isert_cmnd *pdu) +{ + pr_info("iser iscsi SNACK not supported\n"); + return -EINVAL; /* meanwhile disconnect immediately */ +} + +static void isert_rx_pdu_parse_headers(struct isert_cmnd *isert_pdu) +{ + struct iscsi_cmnd *iscsi_cmnd = &isert_pdu->iscsi; + struct isert_buf *isert_buf = &isert_pdu->buf; + u8 *addr = isert_buf->addr; + struct isert_hdr *isert_hdr = (struct isert_hdr *)addr; + struct iscsi_hdr *bhs = (struct iscsi_hdr *)(addr + sizeof(*isert_hdr)); + unsigned int data_offset = ISER_HDRS_SZ; + unsigned int ahssize; + + TRACE_ENTRY(); + + isert_pdu->isert_hdr = isert_hdr; + isert_pdu->isert_opcode = isert_hdr->flags & 0xf0; + isert_pdu->is_rstag_valid = isert_hdr->flags & ISER_RSV ? 1 : 0; + isert_pdu->is_wstag_valid = isert_hdr->flags & ISER_WSV ? 1 : 0; + + if (isert_pdu->is_rstag_valid) { + isert_pdu->rem_read_stag = be32_to_cpu(isert_hdr->read_stag); + isert_pdu->rem_read_va = be64_to_cpu(isert_hdr->read_va); + } + + if (isert_pdu->is_wstag_valid) { + isert_pdu->rem_write_stag = be32_to_cpu(isert_hdr->write_stag); + isert_pdu->rem_write_va = be64_to_cpu(isert_hdr->write_va); + } + + isert_pdu->bhs = bhs; + isert_pdu->iscsi_opcode = bhs->opcode & ISCSI_OPCODE_MASK; + + memcpy(&iscsi_cmnd->pdu.bhs, bhs, sizeof(iscsi_cmnd->pdu.bhs)); + iscsi_cmnd_get_length(&iscsi_cmnd->pdu); /* get ahssize and datasize */ + + ahssize = isert_pdu->iscsi.pdu.ahssize; + if (likely(!ahssize)) { + isert_pdu->ahs = NULL; + } else { + isert_pdu->ahs = addr + ISER_HDRS_SZ; + data_offset += ahssize; + } + iscsi_cmnd->pdu.ahs = isert_pdu->ahs; + + iscsi_cmnd->bufflen = iscsi_cmnd->pdu.datasize; + iscsi_cmnd->bufflen = (iscsi_cmnd->bufflen + 3) & ~3; + if (iscsi_cmnd->bufflen) { + iscsi_cmnd->sg_cnt = isert_pdu->buf.sg_cnt; + iscsi_cmnd->sg = isert_pdu->buf.sg; + } else { + iscsi_cmnd->sg = NULL; + } + + TRACE_EXIT(); +} + +static void isert_dma_sync_data_for_cpu(struct ib_device *ib_dev, + struct ib_sge *sge, size_t size) +{ + size_t to_sync = size > (PAGE_SIZE - ISER_HDRS_SZ) ? + (PAGE_SIZE - ISER_HDRS_SZ) : size; + ib_dma_sync_single_for_cpu(ib_dev, sge->addr + ISER_HDRS_SZ, + to_sync, + DMA_FROM_DEVICE); + + size -= to_sync; + while (size) { + ++sge; + to_sync = size > PAGE_SIZE ? PAGE_SIZE : size; + ib_dma_sync_single_for_cpu(ib_dev, sge->addr, + to_sync, + DMA_FROM_DEVICE); + + size -= to_sync; + } +} + +static void isert_recv_completion_handler(struct isert_wr *wr) +{ + struct isert_cmnd *pdu = wr->pdu; + struct ib_sge *sge = wr->sge_list; + struct ib_device *ib_dev = wr->isert_dev->ib_dev; + int err; + + TRACE_ENTRY(); + + ib_dma_sync_single_for_cpu(ib_dev, sge->addr, + ISER_HDRS_SZ, + DMA_FROM_DEVICE); + isert_rx_pdu_parse_headers(pdu); + isert_dma_sync_data_for_cpu(ib_dev, sge, + pdu->iscsi.pdu.datasize + pdu->iscsi.pdu.ahssize); + + switch (pdu->isert_opcode) { + case ISER_ISCSI_CTRL: + switch (pdu->iscsi_opcode) { + case ISCSI_OP_NOP_OUT: + err = isert_pdu_handle_nop_out(pdu); + break; + case ISCSI_OP_SCSI_CMD: + err = isert_pdu_handle_scsi_cmd(pdu); + break; + case ISCSI_OP_SCSI_TASK_MGT_MSG: + err = isert_pdu_handle_tm_func(pdu); + break; + case ISCSI_OP_LOGIN_CMD: + err = isert_pdu_handle_login_req(pdu); + break; + case ISCSI_OP_TEXT_CMD: + err = isert_pdu_handle_text(pdu); + break; + case ISCSI_OP_SCSI_DATA_OUT: + err = isert_pdu_handle_data_out(pdu); + break; + case ISCSI_OP_LOGOUT_CMD: + err = isert_pdu_handle_logout(pdu); + break; + case ISCSI_OP_SNACK_CMD: + err = isert_pdu_handle_snack(pdu); + break; + default: + pr_err("Unexpected iscsi opcode:0x%x\n", + pdu->iscsi_opcode); + err = -EINVAL; + break; + } + break; + case ISER_HELLO: + err = isert_pdu_handle_hello_req(pdu); + break; + default: + pr_err("malformed isert_hdr, iser op:%x flags 0x%02x\n", + pdu->isert_opcode, pdu->isert_hdr->flags); + err = -EINVAL; + break; + } + + if (unlikely(err)) { + pr_err("err:%d while handling iser pdu\n", err); + isert_conn_disconnect(wr->conn); + } + + TRACE_EXIT(); +} + +static void isert_send_completion_handler(struct isert_wr *wr) +{ + struct isert_cmnd *isert_pdu = wr->pdu; + struct iscsi_cmnd *iscsi_pdu = &isert_pdu->iscsi; + struct iscsi_cmnd *iscsi_req_pdu = iscsi_pdu->parent_req; + struct isert_cmnd *isert_req_pdu = (struct isert_cmnd *)iscsi_req_pdu; + + TRACE_ENTRY(); + + if (iscsi_req_pdu && iscsi_req_pdu->bufflen && + isert_req_pdu->is_rstag_valid) + isert_data_in_sent(iscsi_req_pdu); + + isert_pdu_sent(iscsi_pdu); + + TRACE_EXIT(); +} + +static void isert_rdma_rd_completion_handler(struct isert_wr *wr) +{ + struct isert_buf *isert_buf = wr->buf; + struct isert_device *isert_dev = wr->isert_dev; + struct ib_device *ib_dev = isert_dev->ib_dev; + + ib_dma_unmap_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, + isert_buf->dma_dir); + isert_buf->sg_cnt = 0; + + isert_data_out_ready(&wr->pdu->iscsi); +} + +static void isert_rdma_wr_completion_handler(struct isert_wr *wr) +{ + struct isert_buf *isert_buf = wr->buf; + struct isert_device *isert_dev = wr->isert_dev; + struct ib_device *ib_dev = isert_dev->ib_dev; + + ib_dma_unmap_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, + isert_buf->dma_dir); + isert_buf->sg_cnt = 0; + + isert_data_in_sent(&wr->pdu->iscsi); +} + +static void isert_handle_wc(struct ib_wc *wc) +{ + struct isert_wr *wr = _u64_to_ptr(wc->wr_id); + struct isert_connection *isert_conn; + + TRACE_ENTRY(); + + switch (wr->wr_op) { + case ISER_WR_RECV: + isert_conn = wr->conn; + if (unlikely(isert_conn->state == ISER_CONN_HANDSHAKE)) { + isert_conn->state = ISER_CONN_ACTIVE; + isert_conn->saved_wr = wr; + pr_info("iser rx pdu before conn established, pdu saved\n"); + break; + } + isert_recv_completion_handler(wr); + break; + case ISER_WR_SEND: + isert_send_completion_handler(wr); + break; + case ISER_WR_RDMA_WRITE: + isert_rdma_wr_completion_handler(wr); + break; + case ISER_WR_RDMA_READ: + isert_rdma_rd_completion_handler(wr); + break; + default: + isert_conn = wr->conn; + pr_err("unexpected work req op:%d, wc op:%d, wc:%p wr_id:%p conn:%p\n", + wr->wr_op, wc->opcode, wc, wr, isert_conn); + if (isert_conn) + isert_conn_disconnect(isert_conn); + break; + } + + TRACE_EXIT(); +} + +static const char *wr_status_str(enum ib_wc_status status) +{ + switch (status) { + case IB_WC_SUCCESS: + return "WC_SUCCESS"; + + case IB_WC_LOC_LEN_ERR: + return "WC_LOC_LEN_ERR"; + + case IB_WC_LOC_QP_OP_ERR: + return "WC_LOC_QP_OP_ERR"; + + case IB_WC_LOC_EEC_OP_ERR: + return "WC_LOC_EEC_OP_ERR"; + + case IB_WC_LOC_PROT_ERR: + return "WC_LOC_PROT_ERR"; + + case IB_WC_WR_FLUSH_ERR: + return "WC_WR_FLUSH_ERR"; + + case IB_WC_MW_BIND_ERR: + return "WC_MW_BIND_ERR"; + + case IB_WC_BAD_RESP_ERR: + return "WC_BAD_RESP_ERR"; + + case IB_WC_LOC_ACCESS_ERR: + return "WC_LOC_ACCESS_ERR"; + + case IB_WC_REM_INV_REQ_ERR: + return "WC_REM_INV_REQ_ERR"; + + case IB_WC_REM_ACCESS_ERR: + return "WC_REM_ACCESS_ERR"; + + case IB_WC_REM_OP_ERR: + return "WC_REM_OP_ERR"; + + case IB_WC_RETRY_EXC_ERR: + return "WC_RETRY_EXC_ERR"; + + case IB_WC_RNR_RETRY_EXC_ERR: + return "WC_RNR_RETRY_EXC_ERR"; + + case IB_WC_LOC_RDD_VIOL_ERR: + return "WC_LOC_RDD_VIOL_ERR"; + + case IB_WC_REM_INV_RD_REQ_ERR: + return "WC_REM_INV_RD_REQ_ERR"; + + case IB_WC_REM_ABORT_ERR: + return "WC_REM_ABORT_ERR"; + + case IB_WC_INV_EECN_ERR: + return "WC_INV_EECN_ERR"; + + case IB_WC_INV_EEC_STATE_ERR: + return "WC_INV_EEC_STATE_ERR"; + + case IB_WC_FATAL_ERR: + return "WC_FATAL_ERR"; + + case IB_WC_RESP_TIMEOUT_ERR: + return "WC_RESP_TIMEOUT_ERR"; + + case IB_WC_GENERAL_ERR: + return "WC_GENERAL_ERR"; + + default: + return "UNKNOWN"; + } +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) +static void isert_conn_drained_do_work(void *ctx) +#else +static void isert_conn_drained_do_work(struct work_struct *work) +#endif +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) + struct isert_connection *isert_conn = ctx; +#else + struct isert_connection *isert_conn = + container_of(work, struct isert_connection, drain_work); +#endif + + /* notify upper layer */ + if (!test_bit(ISERT_CONNECTION_ABORTED, &isert_conn->flags)) + isert_connection_closed(&isert_conn->iscsi); + + isert_conn_free(isert_conn); +} + +static void isert_sched_conn_drained(struct isert_connection *isert_conn) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) + INIT_WORK(&isert_conn->drain_work, isert_conn_drained_do_work, isert_conn); +#else + INIT_WORK(&isert_conn->drain_work, isert_conn_drained_do_work); +#endif + isert_conn_queue_work(&isert_conn->drain_work); +} + +static void isert_handle_wc_error(struct ib_wc *wc) +{ + struct isert_wr *wr = _u64_to_ptr(wc->wr_id); + struct isert_cmnd *isert_pdu = wr->pdu; + struct isert_connection *isert_conn = wr->conn; + struct isert_buf *isert_buf = wr->buf; + struct isert_device *isert_dev = wr->isert_dev; + struct ib_device *ib_dev = isert_dev->ib_dev; + + TRACE_ENTRY(); + + if (wc->status != IB_WC_WR_FLUSH_ERR) + pr_err("conn:%p wr_id:0x%p status:%s vendor_err:0x%0x\n", + isert_conn, wr, wr_status_str(wc->status), + wc->vendor_err); + + switch (wr->wr_op) { + case ISER_WR_SEND: + if (unlikely(wr->send_wr.num_sge == 0)) /* Drain WR */ + isert_sched_conn_drained(isert_conn); + else + isert_pdu_err(&isert_pdu->iscsi); + break; + case ISER_WR_RDMA_READ: + if (isert_buf->sg_cnt != 0) { + ib_dma_unmap_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, + isert_buf->dma_dir); + isert_buf->sg_cnt = 0; + } + isert_pdu_err(&isert_pdu->iscsi); + break; + case ISER_WR_RECV: + /* this should be the Flush, no task has been created yet */ + break; + case ISER_WR_RDMA_WRITE: + if (isert_buf->sg_cnt != 0) { + ib_dma_unmap_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, + isert_buf->dma_dir); + isert_buf->sg_cnt = 0; + } + /* RDMA-WR and SEND response of a READ task + are sent together, so when receiving RDMA-WR error, + wait until SEND error arrives to complete the task */ + break; + default: + pr_err("unexpected opcode %d, wc:%p wr_id:%p conn:%p\n", + wr->wr_op, wc, wr, isert_conn); + break; + } + + TRACE_EXIT(); +} + +static int isert_poll_cq(struct isert_cq *cq) +{ + int err; + struct ib_wc *wc, *last_wc; + + TRACE_ENTRY(); + + do { + err = ib_poll_cq(cq->cq, ARRAY_SIZE(cq->wc), cq->wc); + last_wc = &cq->wc[err]; + for (wc = cq->wc; wc < last_wc; ++wc) { + if (likely(wc->status == IB_WC_SUCCESS)) + isert_handle_wc(wc); + else + isert_handle_wc_error(wc); + } + + } while (err > 0); + + TRACE_EXIT_RES(err); + return err; +} + +/* callback function for isert_dev->[cq]->cq_comp_work */ +static void isert_cq_comp_work_cb(struct work_struct *work) +{ + struct isert_cq *cq_desc; + int ret; + + TRACE_ENTRY(); + + cq_desc = container_of(work, struct isert_cq, cq_comp_work); + ret = isert_poll_cq(cq_desc); + if (unlikely(ret < 0)) { /* poll error */ + pr_err("ib_poll_cq failed\n"); + goto out; + } + + ib_req_notify_cq(cq_desc->cq, + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); + /* + * not all HCAs support IB_CQ_REPORT_MISSED_EVENTS, + * so we need to make sure we don't miss any events between + * last call to ib_poll_cq() and ib_req_notify_cq() + */ + isert_poll_cq(cq_desc); + +out: + TRACE_EXIT(); + return; +} + +static void isert_cq_comp_handler(struct ib_cq *cq, void *context) +{ + struct isert_cq *cq_desc = context; + + queue_work_on(smp_processor_id(), cq_desc->cq_workqueue, + &cq_desc->cq_comp_work); +} + +static const char *ib_event_type_str(enum ib_event_type ev_type) +{ + switch (ev_type) { + case IB_EVENT_COMM_EST: + return "COMM_EST"; + case IB_EVENT_QP_FATAL: + return "QP_FATAL"; + case IB_EVENT_QP_REQ_ERR: + return "QP_REQ_ERR"; + case IB_EVENT_QP_ACCESS_ERR: + return "QP_ACCESS_ERR"; + case IB_EVENT_SQ_DRAINED: + return "SQ_DRAINED"; + case IB_EVENT_PATH_MIG: + return "PATH_MIG"; + case IB_EVENT_PATH_MIG_ERR: + return "PATH_MIG_ERR"; + case IB_EVENT_QP_LAST_WQE_REACHED: + return "QP_LAST_WQE_REACHED"; + case IB_EVENT_CQ_ERR: + return "CQ_ERR"; + case IB_EVENT_SRQ_ERR: + return "SRQ_ERR"; + case IB_EVENT_SRQ_LIMIT_REACHED: + return "SRQ_LIMIT_REACHED"; + case IB_EVENT_PORT_ACTIVE: + return "PORT_ACTIVE"; + case IB_EVENT_PORT_ERR: + return "PORT_ERR"; + case IB_EVENT_LID_CHANGE: + return "LID_CHANGE"; + case IB_EVENT_PKEY_CHANGE: + return "PKEY_CHANGE"; + case IB_EVENT_SM_CHANGE: + return "SM_CHANGE"; + case IB_EVENT_CLIENT_REREGISTER: + return "CLIENT_REREGISTER"; + case IB_EVENT_DEVICE_FATAL: + return "DEVICE_FATAL"; + default: + return "UNKNOWN"; + } +} + +static void isert_async_evt_handler(struct ib_event *async_ev, void *context) +{ + struct isert_cq *cq = context; + struct isert_device *isert_dev = cq->dev; + struct ib_device *ib_dev = isert_dev->ib_dev; + char *dev_name = ib_dev->name; + enum ib_event_type ev_type = async_ev->event; + struct isert_connection *isert_conn; + + TRACE_ENTRY(); + + switch (ev_type) { + case IB_EVENT_COMM_EST: + isert_conn = async_ev->element.qp->qp_context; + pr_info("conn:0x%p cm_id:0x%p dev:%s, QP evt: %s\n", + isert_conn, isert_conn->cm_id, dev_name, + ib_event_type_str(IB_EVENT_COMM_EST)); + /* force "connection established" event */ + rdma_notify(isert_conn->cm_id, IB_EVENT_COMM_EST); + break; + + /* rest of QP-related events */ + case IB_EVENT_QP_FATAL: + case IB_EVENT_QP_REQ_ERR: + case IB_EVENT_QP_ACCESS_ERR: + case IB_EVENT_SQ_DRAINED: + case IB_EVENT_PATH_MIG: + case IB_EVENT_PATH_MIG_ERR: + case IB_EVENT_QP_LAST_WQE_REACHED: + isert_conn = async_ev->element.qp->qp_context; + pr_err("conn:0x%p cm_id:0x%p dev:%s, QP evt: %s\n", + isert_conn, isert_conn->cm_id, dev_name, + ib_event_type_str(ev_type)); + break; + + /* CQ-related events */ + case IB_EVENT_CQ_ERR: + pr_err("dev:%s CQ evt: %s\n", dev_name, + ib_event_type_str(ev_type)); + break; + + /* SRQ events */ + case IB_EVENT_SRQ_ERR: + case IB_EVENT_SRQ_LIMIT_REACHED: + pr_err("dev:%s SRQ evt: %s\n", dev_name, + ib_event_type_str(ev_type)); + break; + + /* Port events */ + case IB_EVENT_PORT_ACTIVE: + case IB_EVENT_PORT_ERR: + case IB_EVENT_LID_CHANGE: + case IB_EVENT_PKEY_CHANGE: + case IB_EVENT_SM_CHANGE: + case IB_EVENT_CLIENT_REREGISTER: + pr_err("dev:%s port:%d evt: %s\n", + dev_name, async_ev->element.port_num, + ib_event_type_str(ev_type)); + break; + + /* HCA events */ + case IB_EVENT_DEVICE_FATAL: + pr_err("dev:%s HCA evt: %s\n", dev_name, + ib_event_type_str(ev_type)); + break; + + default: + pr_err("dev:%s evt: %s\n", dev_name, + ib_event_type_str(ev_type)); + break; + } + + TRACE_EXIT(); +} + +static struct isert_device *isert_device_create(struct ib_device *ib_dev) +{ + struct isert_device *isert_dev; + struct ib_device_attr *dev_attr; + int cqe_num, err; + struct ib_pd *pd; + struct ib_mr *mr; + struct ib_cq *cq; + char wq_name[64]; + int i, j; + + TRACE_ENTRY(); + + isert_dev = kzalloc(sizeof(*isert_dev), GFP_KERNEL); + if (unlikely(isert_dev == NULL)) { + pr_err("Failed to allocate iser dev\n"); + err = -ENOMEM; + goto out; + } + + dev_attr = &isert_dev->device_attr; + err = ib_query_device(ib_dev, dev_attr); + if (unlikely(err)) { + pr_err("Failed to query device, err: %d\n", err); + goto fail_query; + } + + isert_dev->num_cqs = min_t(int, num_online_cpus(), + ib_dev->num_comp_vectors); + + isert_dev->cq_qps = kzalloc(sizeof(*isert_dev->cq_qps) * isert_dev->num_cqs, + GFP_KERNEL); + if (unlikely(isert_dev->cq_qps == NULL)) { + pr_err("Failed to allocate iser cq_qps\n"); + err = -ENOMEM; + goto fail_cq_qps; + } + + isert_dev->cq_desc = vmalloc(sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs); + if (unlikely(isert_dev->cq_desc == NULL)) { + pr_err("Failed to allocate %ld bytes for iser cq_desc\n", + sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs); + err = -ENOMEM; + goto fail_alloc_cq_desc; + } + + pd = ib_alloc_pd(ib_dev); + if (unlikely(IS_ERR(pd))) { + err = PTR_ERR(pd); + pr_err("Failed to alloc iser dev pd, err:%d\n", err); + goto fail_pd; + } + + mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); + if (unlikely(IS_ERR(mr))) { + err = PTR_ERR(mr); + pr_err("Failed to get dma mr, err: %d\n", err); + goto fail_mr; + } + + cqe_num = min(isert_dev->device_attr.max_cqe, ISER_CQ_ENTRIES); + cqe_num = cqe_num / isert_dev->num_cqs; + +#ifdef CONFIG_SCST_EXTRACHECKS + if (isert_dev->device_attr.max_cqe == 0) + pr_err("Zero max_cqe encountered: you may have a compilation problem\n"); +#endif + + for (i = 0; i < isert_dev->num_cqs; ++i) { + struct isert_cq *cq_desc = &isert_dev->cq_desc[i]; + + cq_desc->dev = isert_dev; + cq_desc->idx = i; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) + INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb, NULL); +#else + INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb); +#endif + + snprintf(wq_name, sizeof(wq_name), "isert_cq_%p", cq_desc); +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) + cq_desc->cq_workqueue = create_singlethread_workqueue(wq_name); +#else +#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 36) + cq_desc->cq_workqueue = alloc_workqueue(wq_name, + WQ_CPU_INTENSIVE| + WQ_RESCUER, 1); +#else + cq_desc->cq_workqueue = alloc_workqueue(wq_name, + WQ_CPU_INTENSIVE| + WQ_MEM_RECLAIM, 1); +#endif +#endif + if (unlikely(!cq_desc->cq_workqueue)) { + pr_err("Failed to alloc iser cq work queue for dev:%s\n", + ib_dev->name); + err = -ENOMEM; + goto fail_cq; + } + + cq = ib_create_cq(ib_dev, + isert_cq_comp_handler, + isert_async_evt_handler, + cq_desc, /* context */ + cqe_num, + i); /* completion vector */ + if (unlikely(IS_ERR(cq))) { + cq_desc->cq = NULL; + err = PTR_ERR(cq); + pr_err("Failed to create iser dev cq, err:%d\n", err); + goto fail_cq; + } + + cq_desc->cq = cq; + err = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); + if (unlikely(err)) { + pr_err("Failed to request notify cq, err: %d\n", err); + goto fail_cq; + } + } + + isert_dev->ib_dev = ib_dev; + isert_dev->pd = pd; + isert_dev->mr = mr; + + INIT_LIST_HEAD(&isert_dev->conn_list); + + lockdep_assert_held(&dev_list_mutex); + + isert_dev_list_add(isert_dev); + + pr_info("iser created device:%p\n", isert_dev); + return isert_dev; + +fail_cq: + for (j = 0; j <= i; ++j) { + if (isert_dev->cq_desc[j].cq) + ib_destroy_cq(isert_dev->cq_desc[j].cq); + if (isert_dev->cq_desc[j].cq_workqueue) + destroy_workqueue(isert_dev->cq_desc[j].cq_workqueue); + } + ib_dereg_mr(mr); +fail_mr: + ib_dealloc_pd(pd); +fail_pd: + vfree(isert_dev->cq_desc); +fail_alloc_cq_desc: + kfree(isert_dev->cq_qps); +fail_cq_qps: +fail_query: + kfree(isert_dev); +out: + TRACE_EXIT_RES(err); + return ERR_PTR(err); +} + +static void isert_device_release(struct isert_device *isert_dev) +{ + int err, i; + + TRACE_ENTRY(); + + lockdep_assert_held(&dev_list_mutex); + + isert_dev_list_remove(isert_dev); /* remove from global list */ + + for (i = 0; i < isert_dev->num_cqs; ++i) { + struct isert_cq *cq_desc = &isert_dev->cq_desc[i]; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22) + /* + * cancel_work_sync() was introduced in 2.6.22. We can + * only wait until all scheduled work is done. + */ + flush_workqueue(cq_desc->cq_workqueue); +#else + cancel_work_sync(&cq_desc->cq_comp_work); +#endif + + err = ib_destroy_cq(cq_desc->cq); + if (unlikely(err)) + pr_err("Failed to destroy cq, err:%d\n", err); + + destroy_workqueue(cq_desc->cq_workqueue); + } + + err = ib_dereg_mr(isert_dev->mr); + if (unlikely(err)) + pr_err("Failed to destroy mr, err:%d\n", err); + err = ib_dealloc_pd(isert_dev->pd); + if (unlikely(err)) + pr_err("Failed to destroy pd, err:%d\n", err); + + vfree(isert_dev->cq_desc); + isert_dev->cq_desc = NULL; + + kfree(isert_dev->cq_qps); + isert_dev->cq_qps = NULL; + + kfree(isert_dev); + + TRACE_EXIT(); +} + +static int isert_get_cq_idx(struct isert_device *isert_dev) +{ + int i, min_idx; + + min_idx = 0; + mutex_lock(&dev_list_mutex); + for (i = 0; i < isert_dev->num_cqs; ++i) + if (isert_dev->cq_qps[i] < isert_dev->cq_qps[min_idx]) + min_idx = i; + isert_dev->cq_qps[min_idx]++; + mutex_unlock(&dev_list_mutex); + + return min_idx; +} + +static int isert_conn_qp_create(struct isert_connection *isert_conn) +{ + struct rdma_cm_id *cm_id = isert_conn->cm_id; + struct isert_device *isert_dev = isert_conn->isert_dev; + struct ib_qp_init_attr qp_attr; + int err; + int cq_idx; + int max_wr = ISER_MAX_WCE; + + TRACE_ENTRY(); + + cq_idx = isert_get_cq_idx(isert_dev); + + memset(&qp_attr, 0, sizeof(qp_attr)); + + qp_attr.event_handler = isert_async_evt_handler; + qp_attr.qp_context = isert_conn; + qp_attr.send_cq = isert_dev->cq_desc[cq_idx].cq; + qp_attr.recv_cq = isert_dev->cq_desc[cq_idx].cq; + + isert_conn->cq_desc = &isert_dev->cq_desc[cq_idx]; + + /* + * A quote from the OFED 1.5.3.1 release notes + * (docs/release_notes/mthca_release_notes.txt), section "Known Issues": + * In mem-free devices, RC QPs can be created with a maximum of + * (max_sge - 1) entries only; UD QPs can be created with a maximum of + * (max_sge - 3) entries. + * A quote from the OFED 1.2.5 release notes + * (docs/mthca_release_notes.txt), section "Known Issues": + * In mem-free devices, RC QPs can be created with a maximum of + * (max_sge - 3) entries only. + */ + isert_conn->max_sge = isert_dev->device_attr.max_sge - 3; + + WARN_ON(isert_conn->max_sge < 1); + + qp_attr.cap.max_send_sge = isert_conn->max_sge; + qp_attr.cap.max_recv_sge = 3; + qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; + qp_attr.qp_type = IB_QPT_RC; + + do { + if (max_wr < ISER_MIN_SQ_SIZE) { + pr_err("Failed to create qp, not enough memory\n"); + goto fail_create_qp; + } + + qp_attr.cap.max_send_wr = max_wr; + qp_attr.cap.max_recv_wr = max_wr; + + err = rdma_create_qp(cm_id, isert_dev->pd, &qp_attr); + if (err && err != -ENOMEM) { + pr_err("Failed to create qp, err:%d\n", err); + goto fail_create_qp; + } + + max_wr /= 2; + } while (err == -ENOMEM); + + isert_conn->qp = cm_id->qp; + + pr_info("iser created cm_id:%p qp:0x%X\n", cm_id, cm_id->qp->qp_num); + +out: + TRACE_EXIT_RES(err); + return err; + +fail_create_qp: + mutex_lock(&dev_list_mutex); + isert_dev->cq_qps[cq_idx]--; + mutex_unlock(&dev_list_mutex); + goto out; +} + +static void isert_conn_qp_destroy(struct isert_connection *isert_conn) +{ + rdma_destroy_qp(isert_conn->cm_id); + isert_conn->qp = NULL; +} + +static struct isert_connection *isert_conn_create(struct rdma_cm_id *cm_id, + struct isert_device *isert_dev) +{ + struct isert_connection *isert_conn; + int err; + struct isert_cq *cq; + + TRACE_ENTRY(); + + isert_conn = isert_conn_alloc(); + if (unlikely(!isert_conn)) { + pr_err("Unable to allocate iser conn, cm_id:%p\n", cm_id); + err = -ENOMEM; + goto fail_alloc; + } + isert_conn->state = ISER_CONN_INIT; + isert_conn->cm_id = cm_id; + isert_conn->isert_dev = isert_dev; + + INIT_LIST_HEAD(&isert_conn->rx_buf_list); + INIT_LIST_HEAD(&isert_conn->tx_free_list); + INIT_LIST_HEAD(&isert_conn->tx_busy_list); + spin_lock_init(&isert_conn->tx_lock); + spin_lock_init(&isert_conn->post_recv_lock); + + isert_conn->login_req_pdu = isert_rx_pdu_alloc(isert_conn, + ISER_MAX_LOGIN_RDSL); + if (unlikely(!isert_conn->login_req_pdu)) { + pr_err("Failed to init login req rx pdu\n"); + err = -ENOMEM; + goto fail_login_req_pdu; + } + + isert_conn->login_rsp_pdu = isert_tx_pdu_alloc(isert_conn, + ISER_MAX_LOGIN_RDSL); + if (unlikely(!isert_conn->login_rsp_pdu)) { + pr_err("Failed to init login rsp tx pdu\n"); + err = -ENOMEM; + goto fail_login_rsp_pdu; + } + + err = isert_conn_qp_create(isert_conn); + if (unlikely(err)) + goto fail_qp; + + err = isert_post_recv(isert_conn, &isert_conn->login_req_pdu->wr[0], 1); + if (unlikely(err)) { + pr_err("Failed to post recv login req rx buf, err:%d\n", err); + goto fail_post_recv; + } + + kref_init(&isert_conn->kref); + + TRACE_EXIT(); + return isert_conn; + +fail_post_recv: + cq = isert_conn->qp->recv_cq->cq_context; + mutex_lock(&dev_list_mutex); + isert_dev->cq_qps[cq->idx]--; + mutex_unlock(&dev_list_mutex); + isert_conn_qp_destroy(isert_conn); +fail_qp: + isert_pdu_free(isert_conn->login_rsp_pdu); +fail_login_rsp_pdu: + isert_pdu_free(isert_conn->login_req_pdu); +fail_login_req_pdu: + isert_conn_kfree(isert_conn); +fail_alloc: + module_put(THIS_MODULE); + TRACE_EXIT_RES(err); + return ERR_PTR(err); +} + +static void isert_deref_device(struct isert_device *isert_dev) +{ + isert_dev->refcnt--; + if (isert_dev->refcnt == 0) + isert_device_release(isert_dev); +} + +static void isert_kref_free(struct kref *kref) +{ + struct isert_connection *isert_conn = container_of(kref, + struct isert_connection, + kref); + struct isert_device *isert_dev = isert_conn->isert_dev; + struct isert_cq *cq = isert_conn->qp->recv_cq->cq_context; + + TRACE_ENTRY(); + + pr_info("isert_conn_free conn:%p\n", isert_conn); + + isert_free_conn_resources(isert_conn); + + isert_conn_qp_destroy(isert_conn); + + mutex_lock(&dev_list_mutex); + isert_dev->cq_qps[cq->idx]--; + list_del(&isert_conn->portal_node); + isert_deref_device(isert_dev); + if (unlikely(isert_conn->portal->state == ISERT_PORTAL_INACTIVE)) + isert_portal_free(isert_conn->portal); + mutex_unlock(&dev_list_mutex); + + rdma_destroy_id(isert_conn->cm_id); + + isert_conn_kfree(isert_conn); + + module_put(THIS_MODULE); + + TRACE_EXIT(); +} + +void isert_conn_free(struct isert_connection *isert_conn) +{ + kref_put(&isert_conn->kref, isert_kref_free); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) +static void isert_conn_closed_do_work(void *ctx) +#else +static void isert_conn_closed_do_work(struct work_struct *work) +#endif +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) + struct isert_connection *isert_conn = ctx; +#else + struct isert_connection *isert_conn = + container_of(work, struct isert_connection, close_work); +#endif + + /* notify upper layer */ + if (test_bit(ISERT_DRAIN_FAILED, &isert_conn->flags)) + isert_connection_closed(&isert_conn->iscsi); + + isert_conn_free(isert_conn); +} + +static void isert_sched_conn_closed(struct isert_connection *isert_conn) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) + INIT_WORK(&isert_conn->close_work, isert_conn_closed_do_work, isert_conn); +#else + INIT_WORK(&isert_conn->close_work, isert_conn_closed_do_work); +#endif + isert_conn_queue_work(&isert_conn->close_work); +} + +static int isert_cm_timewait_exit_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + struct isert_connection *isert_conn = cm_id->qp->qp_context; + + isert_sched_conn_closed(isert_conn); + return 0; +} + +static int isert_cm_conn_req_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + /* passed in rdma_create_id */ + struct isert_portal *portal = cm_id->context; + struct ib_device *ib_dev = cm_id->device; + struct isert_device *isert_dev; + struct isert_connection *isert_conn; + struct rdma_conn_param *ini_conn_param; + struct rdma_conn_param tgt_conn_param; + struct isert_cm_hdr cm_hdr = { 0 }; + int err; + + TRACE_ENTRY(); + + if (unlikely(!try_module_get(THIS_MODULE))) { + err = -EINVAL; + goto fail_get; + } + + mutex_lock(&dev_list_mutex); + isert_dev = isert_device_find(ib_dev); + if (!isert_dev) { + isert_dev = isert_device_create(ib_dev); + if (unlikely(IS_ERR(isert_dev))) { + err = PTR_ERR(isert_dev); + mutex_unlock(&dev_list_mutex); + goto fail_dev_create; + } + } + isert_dev->refcnt++; + mutex_unlock(&dev_list_mutex); + + isert_conn = isert_conn_create(cm_id, isert_dev); + if (unlikely(IS_ERR(isert_conn))) { + err = PTR_ERR(isert_conn); + goto fail_conn_create; + } + + isert_conn->state = ISER_CONN_HANDSHAKE; + isert_conn->portal = portal; + + mutex_lock(&dev_list_mutex); + list_add_tail(&isert_conn->portal_node, &portal->conn_list); + mutex_unlock(&dev_list_mutex); + + /* initiator is dst, target is src */ + memcpy(&isert_conn->peer_addr, &cm_id->route.addr.dst_addr, + sizeof(isert_conn->peer_addr)); + memcpy(&isert_conn->self_addr, &cm_id->route.addr.src_addr, + sizeof(isert_conn->self_addr)); + + ini_conn_param = &event->param.conn; + memset(&tgt_conn_param, 0, sizeof(tgt_conn_param)); + tgt_conn_param.flow_control = + ini_conn_param->flow_control; + tgt_conn_param.rnr_retry_count = + ini_conn_param->rnr_retry_count; + + tgt_conn_param.initiator_depth = isert_dev->device_attr.max_qp_init_rd_atom; + if (tgt_conn_param.initiator_depth > ini_conn_param->initiator_depth) + tgt_conn_param.initiator_depth = ini_conn_param->initiator_depth; + + tgt_conn_param.private_data_len = sizeof(cm_hdr); + tgt_conn_param.private_data = &cm_hdr; + cm_hdr.flags = ISER_ZBVA_NOT_SUPPORTED | ISER_SEND_W_INV_NOT_SUPPORTED; + + err = rdma_accept(cm_id, &tgt_conn_param); + if (unlikely(err)) { + pr_err("Failed to accept conn request, err:%d\n", err); + goto fail_accept; + } + + switch (isert_conn->peer_addr.ss_family) { + case AF_INET: +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) + pr_info("iser accepted connection cm_id:%p " + NIPQUAD_FMT "->" NIPQUAD_FMT "\n", cm_id, + NIPQUAD(((struct sockaddr_in *)&isert_conn->peer_addr)->sin_addr.s_addr), + NIPQUAD(((struct sockaddr_in *)&isert_conn->self_addr)->sin_addr.s_addr)); +#else + pr_info("iser accepted connection cm_id:%p " + "%pI4->%pI4\n", cm_id, + &((struct sockaddr_in *)&isert_conn->peer_addr)->sin_addr.s_addr, + &((struct sockaddr_in *)&isert_conn->self_addr)->sin_addr.s_addr); +#endif + break; + case AF_INET6: +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) + pr_info("iser accepted connection cm_id:%p " + NIP6_FMT "->" NIP6_FMT "\n", cm_id, + NIP6(((struct sockaddr_in6 *)&isert_conn->peer_addr)->sin6_addr.s_addr), + NIP6(((struct sockaddr_in6 *)&isert_conn->self_addr)->sin6_addr.s_addr)); +#else + pr_info("iser accepted connection cm_id:%p " + "%pI6->%pI6\n", cm_id, + &((struct sockaddr_in6 *)&isert_conn->peer_addr)->sin6_addr, + &((struct sockaddr_in6 *)&isert_conn->self_addr)->sin6_addr); +#endif + break; + default: + pr_info("iser accepted connection cm_id:%p\n", cm_id); + } + +out: + TRACE_EXIT_RES(err); + return err; + +fail_accept: + set_bit(ISERT_CONNECTION_ABORTED, &isert_conn->flags); + isert_cm_timewait_exit_handler(cm_id, NULL); + err = 0; + goto out; + +fail_conn_create: + mutex_lock(&dev_list_mutex); + isert_deref_device(isert_dev); + mutex_unlock(&dev_list_mutex); +fail_dev_create: + rdma_reject(cm_id, NULL, 0); +fail_get: + module_put(THIS_MODULE); + goto out; +} + +static int isert_cm_connect_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + struct isert_connection *isert_conn = cm_id->qp->qp_context; + int push_saved_pdu = 0; + int ret; + + TRACE_ENTRY(); + + if (isert_conn->state == ISER_CONN_HANDSHAKE) + isert_conn->state = ISER_CONN_ACTIVE; + else if (isert_conn->state == ISER_CONN_ACTIVE) + push_saved_pdu = 1; + + ret = isert_get_addr_size((struct sockaddr *)&isert_conn->peer_addr, + &isert_conn->peer_addrsz); + if (unlikely(ret)) + goto out; + + kref_get(&isert_conn->kref); + kref_get(&isert_conn->kref); + /* notify upper layer */ + ret = isert_conn_established(&isert_conn->iscsi, + (struct sockaddr *)&isert_conn->peer_addr, + isert_conn->peer_addrsz); + if (unlikely(ret)) { + set_bit(ISERT_CONNECTION_ABORTED, &isert_conn->flags); + isert_conn_free(isert_conn); + goto out; + } + + if (push_saved_pdu) { + pr_info("iser push saved rx pdu\n"); + isert_recv_completion_handler(isert_conn->saved_wr); + isert_conn->saved_wr = NULL; + } + +out: + TRACE_EXIT_RES(ret); + return ret; +} + +static int isert_cm_disconnect_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + struct isert_connection *isert_conn = cm_id->qp->qp_context; + + isert_conn_disconnect(isert_conn); + + return 0; +} + +static const char *cm_event_type_str(enum rdma_cm_event_type ev_type) +{ + switch (ev_type) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + return "ADDRESS_RESOLVED"; + case RDMA_CM_EVENT_ADDR_ERROR: + return "ADDESS_ERROR"; + case RDMA_CM_EVENT_ROUTE_RESOLVED: + return "ROUTE_RESOLVED"; + case RDMA_CM_EVENT_ROUTE_ERROR: + return "ROUTE_ERROR"; + case RDMA_CM_EVENT_CONNECT_REQUEST: + return "CONNECT_REQUEST"; + case RDMA_CM_EVENT_CONNECT_RESPONSE: + return "CONNECT_RESPONSE"; + case RDMA_CM_EVENT_CONNECT_ERROR: + return "CONNECT_ERROR"; + case RDMA_CM_EVENT_UNREACHABLE: + return "UNREACHABLE"; + case RDMA_CM_EVENT_REJECTED: + return "REJECTED"; + case RDMA_CM_EVENT_ESTABLISHED: + return "ESTABLISHED"; + case RDMA_CM_EVENT_DISCONNECTED: + return "DISCONNECTED"; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + return "DEVICE_REMOVAL"; + case RDMA_CM_EVENT_MULTICAST_JOIN: + return "MULTICAST_JOIN"; + case RDMA_CM_EVENT_MULTICAST_ERROR: + return "MULTICAST_ERROR"; + case RDMA_CM_EVENT_ADDR_CHANGE: + return "ADDR_CHANGE"; + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + return "TIMEWAIT_EXIT"; + default: + return "UNKNOWN"; + } +} + +static int isert_handle_failure(struct isert_connection *conn) +{ + isert_conn_disconnect(conn); + return 0; +} + +static int isert_cm_evt_listener_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *cm_ev) +{ + enum rdma_cm_event_type ev_type; + struct isert_portal *portal; + int err = 0; + + ev_type = cm_ev->event; + portal = cm_id->context; + + switch (ev_type) { + case RDMA_CM_EVENT_DEVICE_REMOVAL: + portal->cm_id = NULL; + err = -EINVAL; + break; + + default: + pr_info("Listener event:%s(%d), ignored\n", + cm_event_type_str(ev_type), ev_type); + break; + } + + return err; +} + +static int isert_cm_evt_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *cm_ev) +{ + enum rdma_cm_event_type ev_type; + struct isert_portal *portal; + int err = -EINVAL; + + TRACE_ENTRY(); + + ev_type = cm_ev->event; + portal = cm_id->context; + pr_info("isert_cm_evt:%s(%d) status:%d portal:%p cm_id:%p\n", + cm_event_type_str(ev_type), ev_type, cm_ev->status, + portal, cm_id); + + if (portal->cm_id == cm_id) { + err = isert_cm_evt_listener_handler(cm_id, cm_ev); + goto out; + } + + switch (ev_type) { + case RDMA_CM_EVENT_CONNECT_REQUEST: + err = isert_cm_conn_req_handler(cm_id, cm_ev); + break; + + case RDMA_CM_EVENT_ESTABLISHED: + err = isert_cm_connect_handler(cm_id, cm_ev); + if (unlikely(err)) + err = isert_handle_failure(cm_id->qp->qp_context); + break; + + case RDMA_CM_EVENT_CONNECT_ERROR: + case RDMA_CM_EVENT_REJECTED: + case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_DISCONNECTED: + err = isert_cm_disconnect_handler(cm_id, cm_ev); + break; + + case RDMA_CM_EVENT_DEVICE_REMOVAL: + isert_cm_disconnect_handler(cm_id, cm_ev); + /* fallthrough */ + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + err = isert_cm_timewait_exit_handler(cm_id, cm_ev); + break; + + case RDMA_CM_EVENT_MULTICAST_JOIN: + case RDMA_CM_EVENT_MULTICAST_ERROR: + pr_err("UD-related event:%d, ignored\n", ev_type); + break; + + case RDMA_CM_EVENT_ADDR_RESOLVED: + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_ROUTE_RESOLVED: + case RDMA_CM_EVENT_ROUTE_ERROR: + case RDMA_CM_EVENT_CONNECT_RESPONSE: + pr_err("Active side event:%d, ignored\n", ev_type); + break; + + /* We can receive this instead of RDMA_CM_EVENT_ESTABLISHED */ + case RDMA_CM_EVENT_UNREACHABLE: + { + struct isert_connection *isert_conn; + + isert_conn = cm_id->qp->qp_context; + set_bit(ISERT_CONNECTION_ABORTED, &isert_conn->flags); + isert_sched_conn_closed(isert_conn); + err = 0; + } + break; + + default: + pr_err("Illegal event:%d, ignored\n", ev_type); + break; + } + + if (unlikely(err)) + pr_err("Failed to handle rdma cm evt:%d, err:%d\n", + ev_type, err); + +out: + TRACE_EXIT_RES(err); + return err; +} + +/* create a portal, after listening starts all events + * are received in isert_cm_evt_handler() + */ +struct isert_portal *isert_portal_create(void) +{ + struct isert_portal *portal; + struct rdma_cm_id *cm_id; + int err; + + if (unlikely(!try_module_get(THIS_MODULE))) { + pr_err("Unable increment module reference\n"); + portal = ERR_PTR(-EINVAL); + goto out; + } + + portal = kzalloc(sizeof(*portal), GFP_KERNEL); + if (unlikely(!portal)) { + pr_err("Unable to allocate struct portal\n"); + portal = ERR_PTR(-ENOMEM); + goto err_alloc; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) && !defined(RHEL_MAJOR) + cm_id = rdma_create_id(isert_cm_evt_handler, portal, RDMA_PS_TCP); +#else + cm_id = rdma_create_id(isert_cm_evt_handler, portal, RDMA_PS_TCP, + IB_QPT_RC); +#endif + if (unlikely(IS_ERR(cm_id))) { + err = PTR_ERR(cm_id); + pr_err("Failed to create rdma id, err:%d\n", err); + goto create_id_err; + } + portal->cm_id = cm_id; + + INIT_LIST_HEAD(&portal->conn_list); + isert_portal_list_add(portal); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) + rdma_set_afonly(cm_id, 1); +#endif + + pr_info("Created iser portal cm_id:%p\n", cm_id); +out: + return portal; + +create_id_err: + kfree(portal); + portal = ERR_PTR(err); +err_alloc: + module_put(THIS_MODULE); + goto out; +} + +int isert_portal_listen(struct isert_portal *portal, + struct sockaddr *sa, + size_t addr_len) +{ + int err; + + TRACE_ENTRY(); + err = rdma_bind_addr(portal->cm_id, sa); + if (err) { + pr_warn("Failed to bind rdma addr, err:%d\n", err); + goto out; + } + err = rdma_listen(portal->cm_id, ISER_LISTEN_BACKLOG); + if (err) { + pr_err("Failed rdma listen, err:%d\n", err); + goto out; + } + memcpy(&portal->addr, sa, addr_len); + + switch (sa->sa_family) { + case AF_INET: +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) + pr_info("iser portal cm_id:%p listens on: " + NIPQUAD_FMT ":%d\n", portal->cm_id, + NIPQUAD(((struct sockaddr_in *)sa)->sin_addr.s_addr), + (int)ntohs(((struct sockaddr_in *)sa)->sin_port)); +#else + pr_info("iser portal cm_id:%p listens on: " + "%pI4:%d\n", portal->cm_id, + &((struct sockaddr_in *)sa)->sin_addr.s_addr, + (int)ntohs(((struct sockaddr_in *)sa)->sin_port)); +#endif + break; + case AF_INET6: +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) + pr_info("iser portal cm_id:%p listens on: " + NIP6_FMT " %d\n", + portal->cm_id, + NIP6(((struct sockaddr_in6 *)sa)->sin6_addr.s_addr), + (int)ntohs(((struct sockaddr_in6 *)sa)->sin6_port)); +#else + pr_info("iser portal cm_id:%p listens on: " + "%pI6 %d\n", portal->cm_id, + &((struct sockaddr_in6 *)sa)->sin6_addr, + (int)ntohs(((struct sockaddr_in6 *)sa)->sin6_port)); +#endif + break; + default: + pr_err("Unknown address family\n"); + err = -EINVAL; + goto out; + } + +out: + TRACE_EXIT_RES(err); + return err; +} + +void isert_portal_free(struct isert_portal *portal) +{ + lockdep_assert_held(&dev_list_mutex); + + if (!list_empty(&portal->conn_list)) + return; + + kfree(portal); + module_put(THIS_MODULE); +} + +void isert_portal_release(struct isert_portal *portal) +{ + struct isert_connection *conn; + + pr_info("iser portal cm_id:%p releasing\n", portal->cm_id); + + if (portal->cm_id) { + rdma_destroy_id(portal->cm_id); + portal->cm_id = NULL; + } + + isert_portal_list_remove(portal); + + mutex_lock(&dev_list_mutex); + list_for_each_entry(conn, &portal->conn_list, portal_node) + isert_conn_disconnect(conn); + portal->state = ISERT_PORTAL_INACTIVE; + isert_portal_free(portal); + mutex_unlock(&dev_list_mutex); +} + +struct isert_portal *isert_portal_start(struct sockaddr *sa, size_t addr_len) +{ + struct isert_portal *portal; + int err; + + portal = isert_portal_create(); + if (unlikely(IS_ERR(portal))) + return portal; + + err = isert_portal_listen(portal, sa, addr_len); + if (err) { + isert_portal_release(portal); + portal = ERR_PTR(err); + } + return portal; +} diff --git a/iscsi-scst/kernel/isert-scst/isert.c b/iscsi-scst/kernel/isert-scst/isert.c new file mode 100644 index 00000000..21fd9658 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/isert.c @@ -0,0 +1,499 @@ +/* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#include +#include +#include + +#include "isert.h" +#include "isert_dbg.h" +#include "iscsit_transport.h" +#include "iser_datamover.h" + +#if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING) +unsigned long isert_trace_flag = ISERT_DEFAULT_LOG_FLAGS; +unsigned long iscsi_trace_flag = ISERT_DEFAULT_LOG_FLAGS; +#endif + +static unsigned int isert_nr_devs = ISERT_NR_DEVS; +module_param(isert_nr_devs, uint, S_IRUGO); +MODULE_PARM_DESC(isert_nr_devs, + "Maximum concurrent number of connection requests to handle."); + +static void isert_mark_conn_closed(struct iscsi_conn *conn, int flags) +{ + TRACE_ENTRY(); + if (flags & ISCSI_CONN_ACTIVE_CLOSE) + conn->active_close = 1; + if (flags & ISCSI_CONN_DELETING) + conn->deleting = 1; + + conn->read_state = 0; + + if (!conn->closing) { + conn->closing = 1; + schedule_work(&conn->close_work); + } + + TRACE_EXIT(); +} + +static void isert_close_conn(struct iscsi_conn *conn, int flags) +{ +} + +static int isert_receive_cmnd_data(struct iscsi_cmnd *cmnd) +{ +#ifdef CONFIG_SCST_EXTRACHECKS + if (cmnd->scst_state == ISCSI_CMD_STATE_RX_CMD) + TRACE_DBG("cmnd %p is still in RX_CMD state", + cmnd); +#endif + EXTRACHECKS_BUG_ON(cmnd->scst_state != ISCSI_CMD_STATE_AFTER_PREPROC); + return 0; +} + +static void isert_update_len_sn(struct iscsi_cmnd *cmnd) +{ + TRACE_ENTRY(); + + iscsi_cmnd_set_length(&cmnd->pdu); + switch (cmnd_opcode(cmnd)) { + case ISCSI_OP_NOP_IN: + if (cmnd->pdu.bhs.itt == ISCSI_RESERVED_TAG) + cmnd->pdu.bhs.sn = (__force u32)cmnd_set_sn(cmnd, 0); + else + cmnd_set_sn(cmnd, 1); + break; + case ISCSI_OP_SCSI_RSP: + cmnd_set_sn(cmnd, 1); + break; + case ISCSI_OP_SCSI_TASK_MGT_RSP: + cmnd_set_sn(cmnd, 1); + break; + case ISCSI_OP_TEXT_RSP: + cmnd_set_sn(cmnd, 1); + break; + case ISCSI_OP_SCSI_DATA_IN: + { + struct iscsi_data_in_hdr *rsp = + (struct iscsi_data_in_hdr *)&cmnd->pdu.bhs; + + cmnd_set_sn(cmnd, (rsp->flags & ISCSI_FLG_FINAL) ? 1 : 0); + break; + } + case ISCSI_OP_LOGOUT_RSP: + cmnd_set_sn(cmnd, 1); + break; + case ISCSI_OP_R2T: + cmnd->pdu.bhs.sn = (__force u32)cmnd_set_sn(cmnd, 0); + break; + case ISCSI_OP_ASYNC_MSG: + cmnd_set_sn(cmnd, 1); + break; + case ISCSI_OP_REJECT: + cmnd_set_sn(cmnd, 1); + break; + default: + PRINT_ERROR("Unexpected cmnd op %x", cmnd_opcode(cmnd)); + break; + } + + TRACE_EXIT(); +} + +static int isert_process_all_writes(struct iscsi_conn *conn) +{ + struct iscsi_cmnd *cmnd; + int res = 0; + + TRACE_ENTRY(); + + while ((cmnd = iscsi_get_send_cmnd(conn)) != NULL) { + isert_update_len_sn(cmnd); + conn_get(conn); + isert_pdu_tx(cmnd); + } + + TRACE_EXIT_RES(res); + return res; +} + +static int isert_send_locally(struct iscsi_cmnd *req, unsigned int cmd_count) +{ + int res = 0; + + TRACE_ENTRY(); + + req_cmnd_pre_release(req); + res = isert_process_all_writes(req->conn); + cmnd_put(req); + + TRACE_EXIT_RES(res); + return res; +} + +static struct iscsi_cmnd *isert_cmnd_alloc(struct iscsi_conn *conn, + struct iscsi_cmnd *parent) +{ + struct iscsi_cmnd *cmnd; + + TRACE_ENTRY(); + + if (likely(parent)) + cmnd = isert_alloc_scsi_rsp_pdu(conn); + else + cmnd = isert_alloc_scsi_fake_pdu(conn); + + iscsi_cmnd_init(conn, cmnd, parent); + + TRACE_EXIT(); + return cmnd; +} + +static void isert_cmnd_free(struct iscsi_cmnd *cmnd) +{ + TRACE_ENTRY(); + +#ifdef CONFIG_SCST_EXTRACHECKS + if (unlikely(cmnd->on_write_list || cmnd->on_write_timeout_list)) { + struct iscsi_scsi_cmd_hdr *req = cmnd_hdr(cmnd); + + PRINT_CRIT_ERROR("cmnd %p still on some list?, %x, %x, %x, " + "%x, %x, %x, %x", cmnd, req->opcode, req->scb[0], + req->flags, req->itt, be32_to_cpu(req->data_length), + req->cmd_sn, + be32_to_cpu((__force __be32)(cmnd->pdu.datasize))); + + if (unlikely(cmnd->parent_req)) { + struct iscsi_scsi_cmd_hdr *preq = + cmnd_hdr(cmnd->parent_req); + PRINT_CRIT_ERROR("%p %x %u", preq, preq->opcode, + preq->scb[0]); + } + sBUG(); + } +#endif + if (cmnd->parent_req) + isert_release_tx_pdu(cmnd); + else + isert_release_rx_pdu(cmnd); + + TRACE_EXIT(); +} + +static void isert_preprocessing_done(struct iscsi_cmnd *req) +{ + req->scst_state = ISCSI_CMD_STATE_AFTER_PREPROC; +} + +static void isert_set_sense_data(struct iscsi_cmnd *rsp, + const u8 *sense_buf, int sense_len) +{ + u8 *buf; + + buf = sg_virt(rsp->sg) + ISER_HDRS_SZ; + + memcpy(buf, &rsp->sense_hdr, sizeof(rsp->sense_hdr)); + memcpy(&buf[sizeof(rsp->sense_hdr)], sense_buf, sense_len); +} + +static void isert_set_req_data(struct iscsi_cmnd *req, struct iscsi_cmnd *rsp) +{ + memcpy(sg_virt(rsp->sg) + ISER_HDRS_SZ, + sg_virt(req->sg) + ISER_HDRS_SZ, req->bufflen); + rsp->bufflen = req->bufflen; +} + +static void isert_send_data_rsp(struct iscsi_cmnd *req, u8 *sense, + int sense_len, u8 status, int is_send_status) +{ + struct iscsi_cmnd *rsp; + + TRACE_ENTRY(); + + sBUG_ON(!is_send_status); + + rsp = create_status_rsp(req, status, sense, sense_len); + + isert_update_len_sn(rsp); + + conn_get(rsp->conn); + if (status != SAM_STAT_CHECK_CONDITION) + isert_send_data_in(req, rsp); + else + isert_pdu_tx(rsp); + + TRACE_EXIT(); +} + +static void isert_make_conn_wr_active(struct iscsi_conn *conn) +{ + isert_process_all_writes(conn); +} + +static int isert_conn_activate(struct iscsi_conn *conn) +{ + return 0; +} + +static void isert_free_conn(struct iscsi_conn *conn) +{ + isert_free_connection(conn); +} + +int isert_handle_close_connection(struct iscsi_conn *conn) +{ + isert_mark_conn_closed(conn, 0); + /* Take care of case where our connection is being closed + * without being connected to a session - if connection allocation + * failed for some reason */ + if (unlikely(!conn->session)) + isert_free_connection(conn); + else + start_close_conn(conn); + return 0; +} + +int isert_pdu_rx(struct iscsi_cmnd *cmnd) +{ + int res = 0; + scst_data_direction dir; + + TRACE_ENTRY(); + +#ifdef CONFIG_SCST_EXTRACHECKS + cmnd->conn->rd_task = current; +#endif + iscsi_cmnd_init(cmnd->conn, cmnd, NULL); + cmnd_rx_start(cmnd); + + if (unlikely(!cmnd->scst_cmd)) { + cmnd_rx_end(cmnd); + goto out; + } + + if (unlikely(scst_cmd_prelim_completed(cmnd->scst_cmd) || + unlikely(cmnd->prelim_compl_flags != 0))) { + set_bit(ISCSI_CMD_PRELIM_COMPLETED, &cmnd->prelim_compl_flags); + cmnd_rx_end(cmnd); + goto out; + } + + dir = scst_cmd_get_data_direction(cmnd->scst_cmd); + + if (dir & SCST_DATA_WRITE) { + res = iscsi_cmnd_set_write_buf(cmnd); + if (unlikely(res)) + goto out; + res = isert_request_data_out(cmnd); + cmnd->r2t_len_to_receive = 0; + cmnd->r2t_len_to_send = 0; + cmnd->outstanding_r2t = 0; + } else { + cmnd_rx_end(cmnd); + } + +out: + TRACE_EXIT_RES(res); + return res; +} + +int isert_data_out_ready(struct iscsi_cmnd *cmnd) +{ + int res = 0; + + TRACE_ENTRY(); +#ifdef CONFIG_SCST_EXTRACHECKS + cmnd->conn->rd_task = current; +#endif + cmnd_rx_end(cmnd); + + TRACE_EXIT_RES(res); + return res; +} + +int isert_data_in_sent(struct iscsi_cmnd *din) +{ + return 0; +} + +void isert_pdu_err(struct iscsi_cmnd *pdu) +{ + struct iscsi_conn *conn = pdu->conn; + + if (!conn->session) /* we are still in login phase */ + return; + + if (pdu->parent_req) { + rsp_cmnd_release(pdu); + conn_put(conn); + } else { + /* + * we will get multiple pdu errors + * for same PDU with multiple RDMAs case + */ + if (pdu->on_write_timeout_list) + req_cmnd_release_force(pdu); + } +} + +int isert_pdu_sent(struct iscsi_cmnd *pdu) +{ + struct iscsi_conn *conn = pdu->conn; + int res = 0; + + TRACE_ENTRY(); + + if (unlikely(pdu->should_close_conn)) { + if (pdu->should_close_all_conn) { + struct iscsi_target *target = pdu->conn->session->target; + + PRINT_INFO("Closing all connections for target %x at " + "initiator's %s request", target->tid, + conn->session->initiator_name); + mutex_lock(&target->target_mutex); + target_del_all_sess(target, 0); + mutex_unlock(&target->target_mutex); + } else { + PRINT_INFO("Closing connection at initiator's %s " + "request", conn->session->initiator_name); + mark_conn_closed(conn); + } + } + + /* we may get NULL parent req for login response */ + if (likely(pdu->parent_req)) { + rsp_cmnd_release(pdu); + conn_put(conn); + } + + TRACE_EXIT_RES(res); + return res; +} + +static ssize_t isert_get_initiator_ip(struct iscsi_conn *conn, + char *buf, int size) +{ + int pos; + struct sockaddr_storage ss; + size_t addr_len; + + TRACE_ENTRY(); + + isert_get_peer_addr(conn, (struct sockaddr *)&ss, &addr_len); + + switch (ss.ss_family) { + case AF_INET: +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) + pos = scnprintf(buf, size, + "%u.%u.%u.%u", + NIPQUAD(((struct sockaddr_in *)&ss)->sin_addr.s_addr)); +#else + pos = scnprintf(buf, size, + "%pI4", &((struct sockaddr_in *)&ss)->sin_addr.s_addr); +#endif + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) + pos = scnprintf(buf, size, + "[%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]", + NIP6(((struct sockaddr_in6 *)&ss)->sin6_addr.s_addr)); +#else + pos = scnprintf(buf, size, "[%p6]", + &((struct sockaddr_in6 *)&ss)->sin6_addr); +#endif + break; +#endif + default: + pos = scnprintf(buf, size, "Unknown family %d", + ss.ss_family); + break; + } + + TRACE_EXIT_RES(pos); + return pos; +} + +static struct iscsit_transport isert_transport = { + .owner = THIS_MODULE, + .name = "iSER", + .transport_type = ISCSI_RDMA, + .iscsit_conn_alloc = isert_conn_alloc, + .iscsit_conn_activate = isert_conn_activate, + .iscsit_conn_free = isert_free_conn, + .iscsit_alloc_cmd = isert_cmnd_alloc, + .iscsit_free_cmd = isert_cmnd_free, + .iscsit_preprocessing_done = isert_preprocessing_done, + .iscsit_send_data_rsp = isert_send_data_rsp, + .iscsit_make_conn_wr_active = isert_make_conn_wr_active, + .iscsit_get_initiator_ip = isert_get_initiator_ip, + .iscsit_send_locally = isert_send_locally, + .iscsit_mark_conn_closed = isert_mark_conn_closed, + .iscsit_conn_close = isert_close_conn, + .iscsit_set_sense_data = isert_set_sense_data, + .iscsit_set_req_data = isert_set_req_data, + .iscsit_receive_cmnd_data = isert_receive_cmnd_data, + .iscsit_close_all_portals = isert_close_all_portals, +}; + +static void isert_cleanup_module(void) +{ + iscsit_unreg_transport(&isert_transport); + isert_cleanup_login_devs(); +} + +static int __init isert_init_module(void) +{ + int ret; + + ret = iscsit_reg_transport(&isert_transport); + if (unlikely(ret)) + goto out; + + ret = isert_init_login_devs(isert_nr_devs); + +out: + return ret; +} + +MODULE_AUTHOR("Yan Burman"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("iSER target transport driver v3.0.1-pre#" + __stringify(OFED_FLAVOR)); + +module_init(isert_init_module); +module_exit(isert_cleanup_module); diff --git a/iscsi-scst/kernel/isert-scst/isert.h b/iscsi-scst/kernel/isert-scst/isert.h new file mode 100644 index 00000000..178b2284 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/isert.h @@ -0,0 +1,136 @@ +/* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#ifndef __ISERT_H__ +#define __ISERT_H__ + +#include +#include +#include +#include /* size_t, dev_t */ +#include +#include +#include +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37) +#include +#else +#include +#endif + +#ifdef INSIDE_KERNEL_TREE +#include +#include +#include +#else +#include "isert_scst.h" +#include "iscsi_scst.h" +#include "iscsi.h" +#endif + +#include "iser_hdr.h" + +struct iscsi_conn; + +#define ISERT_NR_DEVS 128 + +struct isert_listener_dev { + struct device *dev; + struct cdev cdev; + dev_t devno; + wait_queue_head_t waitqueue; + spinlock_t conn_lock; + struct list_head new_conn_list; + struct list_head curr_conn_list; + struct isert_addr_info info; + atomic_t available; + void *portal_h[ISERT_MAX_PORTALS]; + int free_portal_idx; +}; + +enum isert_conn_dev_state { + CS_INIT, + CS_REQ_BHS, + CS_REQ_DATA, + CS_REQ_FINISHED, + CS_RSP_BHS, + CS_RSP_DATA, + CS_RSP_FINISHED, + CS_DISCONNECTED, +}; + +#define ISERT_CONN_PASSED 0 + +struct isert_conn_dev { + struct device *dev; + struct cdev cdev; + dev_t devno; + wait_queue_head_t waitqueue; + struct list_head conn_list_entry; + struct iscsi_conn *conn; + unsigned int idx; + int occupied; + spinlock_t pdu_lock; + struct iscsi_cmnd *login_req; + struct iscsi_cmnd *login_rsp; + atomic_t available; + size_t read_len; + char *read_buf; + size_t write_len; + char *write_buf; + void *sg_virt; + struct page *pages[DIV_ROUND_UP(ISER_MAX_LOGIN_RDSL, PAGE_SIZE)]; + enum isert_conn_dev_state state; + int is_discovery; + struct timer_list tmo_timer; + int timer_active; + struct kref kref; + unsigned long flags; +}; + +#define ISER_CONN_DEV_PREFIX "isert/conn" + +/* isert_login.c */ +int __init isert_init_login_devs(unsigned int ndevs); +void isert_cleanup_login_devs(void); +int isert_conn_alloc(struct iscsi_session *session, + struct iscsi_kern_conn_info *info, + struct iscsi_conn **new_conn, + struct iscsit_transport *t); +int isert_handle_close_connection(struct iscsi_conn *conn); +void isert_close_all_portals(void); + +#endif /* __ISERT_H__ */ diff --git a/iscsi-scst/kernel/isert-scst/isert_dbg.h b/iscsi-scst/kernel/isert-scst/isert_dbg.h new file mode 100644 index 00000000..064c714b --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/isert_dbg.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2007 - 2014 Vladislav Bolkhovitin + * Copyright (C) 2007 - 2014 Fusion-io, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2 + * of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef ISERT_DBG_H +#define ISERT_DBG_H + +#include + +#ifdef LOG_PREFIX +#undef LOG_PREFIX +#endif + +#define LOG_PREFIX "isert" /* Prefix for SCST tracing macros. */ + +#ifdef INSIDE_KERNEL_TREE +#include +#else +#include +#endif + +#ifdef CONFIG_SCST_DEBUG +#define ISERT_DEFAULT_LOG_FLAGS (TRACE_FUNCTION | TRACE_LINE | TRACE_PID | \ + TRACE_OUT_OF_MEM | TRACE_MGMT | TRACE_MGMT_DEBUG | \ + TRACE_MINOR | TRACE_SPECIAL) +#else +#define ISERT_DEFAULT_LOG_FLAGS (TRACE_OUT_OF_MEM | TRACE_MGMT | \ + TRACE_SPECIAL) +#endif + +#if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING) +extern unsigned long isert_trace_flag; +#ifdef trace_flag +#undef trace_flag +#endif +#define trace_flag isert_trace_flag +#endif + +#endif diff --git a/iscsi-scst/kernel/isert-scst/isert_login.c b/iscsi-scst/kernel/isert-scst/isert_login.c new file mode 100644 index 00000000..a392679d --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/isert_login.c @@ -0,0 +1,1010 @@ +/* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#include +#include +#include /* everything... */ +#include /* error codes */ +#include +#include +#include + +#ifdef INSIDE_KERNEL_TREE +#include +#else +#include "iscsi.h" +#endif + +#include "isert.h" +#include "isert_dbg.h" +#include "iser_datamover.h" + +static unsigned int n_devs; + +static int isert_major; + +static struct isert_conn_dev *isert_conn_devices; + +static struct isert_listener_dev isert_listen_dev; + +static struct class *isert_class; + +static struct isert_conn_dev *get_available_dev(struct isert_listener_dev *dev, + struct iscsi_conn *conn) +{ + unsigned int i; + struct isert_conn_dev *res = NULL; + + spin_lock(&dev->conn_lock); + for (i = 0; i < n_devs; ++i) { + if (!isert_conn_devices[i].occupied) { + res = &isert_conn_devices[i]; + res->occupied = 1; + res->conn = conn; + isert_set_priv(conn, res); + list_add_tail(&res->conn_list_entry, &dev->new_conn_list); + break; + } + } + spin_unlock(&dev->conn_lock); + + return res; +} + +static void isert_del_timer(struct isert_conn_dev *dev) +{ + if (dev->timer_active) { + dev->timer_active = 0; + del_timer_sync(&dev->tmo_timer); + } +} + +static void isert_kref_release_dev(struct kref *kref) +{ + struct isert_conn_dev *dev = container_of(kref, + struct isert_conn_dev, + kref); + kref_init(&dev->kref); + dev->occupied = 0; + dev->state = CS_INIT; + atomic_set(&dev->available, 1); + list_del_init(&dev->conn_list_entry); + dev->flags = 0; + dev->conn = NULL; +} + +static void isert_dev_release(struct isert_conn_dev *dev) +{ + spin_lock(&isert_listen_dev.conn_lock); + kref_put(&dev->kref, isert_kref_release_dev); + spin_unlock(&isert_listen_dev.conn_lock); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) +static void isert_close_conn_fn(void *ctx) +#else +static void isert_close_conn_fn(struct work_struct *work) +#endif +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) + struct iscsi_conn *conn = ctx; +#else + struct iscsi_conn *conn = container_of(work, + struct iscsi_conn, close_work); +#endif + + isert_close_connection(conn); +} + +static void isert_conn_timer_fn(unsigned long arg) +{ + struct isert_conn_dev *conn_dev = (struct isert_conn_dev *)arg; + struct iscsi_conn *conn = conn_dev->conn; + + TRACE_ENTRY(); + + conn_dev->timer_active = 0; + + PRINT_ERROR("Timeout on connection %p\n", conn_dev->conn); + + schedule_work(&conn->close_work); + + TRACE_EXIT(); +} + +static int add_new_connection(struct isert_listener_dev *dev, + struct iscsi_conn *conn) +{ + struct isert_conn_dev *conn_dev = get_available_dev(dev, conn); + int res = 0; + + TRACE_ENTRY(); + + if (!conn_dev) { + PRINT_WARNING("%s", "Unable to allocate new connection"); + res = -ENOSPC; + goto out; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) + INIT_WORK(&conn->close_work, isert_close_conn_fn, conn); +#else + INIT_WORK(&conn->close_work, isert_close_conn_fn); +#endif + + init_timer(&conn_dev->tmo_timer); + conn_dev->tmo_timer.function = isert_conn_timer_fn; + conn_dev->tmo_timer.expires = jiffies + 60 * HZ; + conn_dev->tmo_timer.data = (unsigned long)conn_dev; + add_timer(&conn_dev->tmo_timer); + conn_dev->timer_active = 1; + wake_up(&dev->waitqueue); + +out: + TRACE_EXIT_RES(res); + return res; +} + +static bool have_new_connection(struct isert_listener_dev *dev) +{ + bool ret; + + spin_lock(&dev->conn_lock); + ret = !list_empty(&dev->new_conn_list); + spin_unlock(&dev->conn_lock); + + return ret; +} + +int isert_conn_alloc(struct iscsi_session *session, + struct iscsi_kern_conn_info *info, + struct iscsi_conn **new_conn, + struct iscsit_transport *t) +{ + int res = 0; + struct isert_conn_dev *dev; + struct iscsi_conn *conn; + struct iscsi_cmnd *cmnd; + struct file *filp = fget(info->fd); + + TRACE_ENTRY(); + + lockdep_assert_held(&session->target->target_mutex); + + if (unlikely(!filp)) { + res = -EBADF; + goto out; + } + + dev = filp->private_data; + + cmnd = dev->login_rsp; + + sBUG_ON(cmnd == NULL); + dev->login_rsp = NULL; + + *new_conn = dev->conn; + res = isert_set_session_params(dev->conn, &session->sess_params, + &session->tgt_params); + + if (!res) + set_bit(ISERT_CONN_PASSED, &dev->flags); + + fput(filp); + + conn = *new_conn; + + if (unlikely(res)) + goto cleanup_conn; + + conn->transport = t; + + res = iscsi_init_conn(session, info, conn); + if (unlikely(res)) + goto cleanup_conn; + + conn->rd_state = 1; + isert_del_timer(dev); + isert_dev_release(dev); + isert_set_priv(conn, NULL); + + res = isert_login_rsp_tx(cmnd, true, false); + vunmap(dev->sg_virt); + dev->sg_virt = NULL; + + if (unlikely(res)) + goto cleanup_iscsi_conn; + +#ifndef CONFIG_SCST_PROC + res = conn_sysfs_add(conn); + if (unlikely(res)) + goto cleanup_iscsi_conn; +#endif + + list_add_tail(&conn->conn_list_entry, &session->conn_list); + + goto out; + +cleanup_iscsi_conn: + conn->rd_state = 0; + if (conn->nop_in_interval > 0) + cancel_delayed_work_sync(&conn->nop_in_delayed_work); +cleanup_conn: + conn->session = NULL; + isert_close_connection(conn); +out: + TRACE_EXIT_RES(res); + return res; +} + +static unsigned int isert_listen_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct isert_listener_dev *dev = filp->private_data; + unsigned int mask = 0; + + poll_wait(filp, &dev->waitqueue, wait); + + if (have_new_connection(dev)) + mask |= POLLIN | POLLRDNORM; + + return mask; +} + +static int isert_listen_open(struct inode *inode, struct file *filp) +{ + struct isert_listener_dev *dev; + + dev = container_of(inode->i_cdev, struct isert_listener_dev, cdev); + + if (!atomic_dec_and_test(&dev->available)) { + atomic_inc(&dev->available); + return -EBUSY; /* already open */ + } + + filp->private_data = dev; /* for other methods */ + + return 0; +} + +static void isert_delete_conn_dev(struct isert_conn_dev *conn_dev) +{ + isert_del_timer(conn_dev); + + if (!test_and_set_bit(ISERT_CONN_PASSED, &conn_dev->flags)) { + BUG_ON(conn_dev->conn == NULL); + isert_close_connection(conn_dev->conn); + } +} + +static int isert_listen_release(struct inode *inode, struct file *filp) +{ + struct isert_listener_dev *dev = filp->private_data; + struct isert_conn_dev *conn_dev; + + spin_lock(&isert_listen_dev.conn_lock); + list_for_each_entry(conn_dev, &dev->new_conn_list, conn_list_entry) + isert_delete_conn_dev(conn_dev); + + list_for_each_entry(conn_dev, &dev->curr_conn_list, conn_list_entry) + isert_delete_conn_dev(conn_dev); + spin_unlock(&isert_listen_dev.conn_lock); + + atomic_inc(&dev->available); + return 0; +} + +static ssize_t isert_listen_read(struct file *filp, char __user *buf, + size_t count, loff_t *f_pos) +{ + struct isert_listener_dev *dev = filp->private_data; + struct isert_conn_dev *conn_dev; + int res = 0; + char k_buff[sizeof("/dev/") + sizeof(ISER_CONN_DEV_PREFIX) + 3 + 1]; + + TRACE_ENTRY(); + + if (!have_new_connection(dev)) { +wait_for_connection: + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + res = wait_event_freezable(dev->waitqueue, + !have_new_connection(dev)); + if (res < 0) + goto out; + } + + spin_lock(&dev->conn_lock); + if (list_empty(&dev->new_conn_list)) { + /* could happen if we got disconnect */ + spin_unlock(&dev->conn_lock); + goto wait_for_connection; + } + conn_dev = list_first_entry(&dev->new_conn_list, struct isert_conn_dev, + conn_list_entry); + list_move(&conn_dev->conn_list_entry, &dev->curr_conn_list); + spin_unlock(&dev->conn_lock); + + res = snprintf(k_buff, sizeof(k_buff), "/dev/"ISER_CONN_DEV_PREFIX"%d", + conn_dev->idx); + ++res; /* copy trailing \0 as well */ + + if (unlikely(copy_to_user(buf, k_buff, res))) + res = -EFAULT; + +out: + TRACE_EXIT_RES(res); + return res; +} + +static long isert_listen_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct isert_listener_dev *dev = filp->private_data; + int res = 0, rc; + void __user *ptr = (void __user *)arg; + void *portal; + + TRACE_ENTRY(); + + switch (cmd) { + case SET_LISTEN_ADDR: + rc = copy_from_user(&dev->info, ptr, sizeof(dev->info)); + if (unlikely(rc != 0)) { + PRINT_ERROR("Failed to copy %d user's bytes\n", rc); + res = -EFAULT; + goto out; + } + + if (unlikely(dev->free_portal_idx >= ISERT_MAX_PORTALS)) { + PRINT_ERROR("Maximum number of portals exceeded: %d\n", + ISERT_MAX_PORTALS); + res = -EINVAL; + goto out; + } + + portal = isert_portal_add((struct sockaddr *)&dev->info.addr, + dev->info.addr_len); + if (IS_ERR(portal)) { + PRINT_ERROR("Unable to add portal of size %zu\n", + dev->info.addr_len); + res = PTR_ERR(portal); + goto out; + } + dev->portal_h[dev->free_portal_idx++] = portal; + break; + + default: + PRINT_ERROR("Invalid ioctl cmd %x", cmd); + res = -EINVAL; + } + +out: + TRACE_EXIT_RES(res); + return res; +} + +int isert_conn_established(struct iscsi_conn *iscsi_conn, + struct sockaddr *from_addr, int addr_len) +{ + return add_new_connection(&isert_listen_dev, iscsi_conn); +} + +int isert_connection_closed(struct iscsi_conn *iscsi_conn) +{ + int res = 0; + + TRACE_ENTRY(); + + if (iscsi_conn->rd_state) { + res = isert_handle_close_connection(iscsi_conn); + } else { + struct isert_conn_dev *dev = isert_get_priv(iscsi_conn); + + if (dev) { + isert_del_timer(dev); + dev->state = CS_DISCONNECTED; + if (dev->login_req) { + res = isert_task_abort(dev->login_req); + spin_lock(&dev->pdu_lock); + dev->login_req = NULL; + spin_unlock(&dev->pdu_lock); + } + + wake_up(&dev->waitqueue); + isert_dev_release(dev); + } + + isert_free_connection(iscsi_conn); + } + + TRACE_EXIT_RES(res); + return res; +} + +static bool will_read_block(struct isert_conn_dev *dev) +{ + bool res = true; + + spin_lock(&dev->pdu_lock); + if (dev->login_req != NULL) { + switch (dev->state) { + case CS_REQ_BHS: + case CS_REQ_DATA: + res = false; + break; + default: + ; + } + } + spin_unlock(&dev->pdu_lock); + + return res; +} + +static int isert_open(struct inode *inode, struct file *filp) +{ + struct isert_conn_dev *dev; /* device information */ + int res = 0; + + TRACE_ENTRY(); + + dev = container_of(inode->i_cdev, struct isert_conn_dev, cdev); + + spin_lock(&isert_listen_dev.conn_lock); + if (unlikely(dev->occupied == 0)) { + spin_unlock(&isert_listen_dev.conn_lock); + res = -ENODEV; /* already closed */ + goto out; + } + spin_unlock(&isert_listen_dev.conn_lock); + + if (unlikely(!atomic_dec_and_test(&dev->available))) { + atomic_inc(&dev->available); + res = -EBUSY; /* already open */ + goto out; + } + + spin_lock(&isert_listen_dev.conn_lock); + kref_get(&dev->kref); + spin_unlock(&isert_listen_dev.conn_lock); + + filp->private_data = dev; /* for other methods */ + +out: + TRACE_EXIT_RES(res); + return res; +} + +static int isert_release(struct inode *inode, struct file *filp) +{ + struct isert_conn_dev *dev = filp->private_data; + int res = 0; + + TRACE_ENTRY(); + + vunmap(dev->sg_virt); + dev->sg_virt = NULL; + dev->is_discovery = 0; + + if (!test_and_set_bit(ISERT_CONN_PASSED, &dev->flags)) { + BUG_ON(dev->conn == NULL); + isert_close_connection(dev->conn); + } + + isert_del_timer(dev); + + isert_dev_release(dev); + + TRACE_EXIT_RES(res); + return res; +} + +static char *isert_vmap_sg(struct page **pages, struct scatterlist *sgl, + int n_ents) +{ + unsigned int i; + struct scatterlist *sg; + void *vaddr; + + for_each_sg(sgl, sg, n_ents, i) + pages[i] = sg_page(sg); + + vaddr = vmap(pages, n_ents, 0, PAGE_KERNEL); + + return vaddr; +} + +static ssize_t isert_read(struct file *filp, char __user *buf, size_t count, + loff_t *f_pos) +{ + struct isert_conn_dev *dev = filp->private_data; + size_t to_read; + + if (dev->state == CS_DISCONNECTED) + return -EPIPE; + + if (will_read_block(dev)) { + int ret; + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + ret = wait_event_freezable(dev->waitqueue, + !will_read_block(dev)); + if (ret < 0) + return ret; + } + + to_read = min(count, dev->read_len); + if (copy_to_user(buf, dev->read_buf, to_read)) + return -EFAULT; + + dev->read_len -= to_read; + dev->read_buf += to_read; + + switch (dev->state) { + case CS_REQ_BHS: + if (dev->read_len == 0) { + dev->read_len = dev->login_req->bufflen; + dev->sg_virt = isert_vmap_sg(dev->pages, + dev->login_req->sg, + dev->login_req->sg_cnt); + if (!dev->sg_virt) + return -ENOMEM; + dev->read_buf = dev->sg_virt + ISER_HDRS_SZ; + dev->state = CS_REQ_DATA; + } + break; + + case CS_REQ_DATA: + if (dev->read_len == 0) { + vunmap(dev->sg_virt); + dev->sg_virt = NULL; + + spin_lock(&dev->pdu_lock); + dev->login_req = NULL; + dev->state = CS_REQ_FINISHED; + spin_unlock(&dev->pdu_lock); + } + break; + + default: + PRINT_ERROR("Invalid state in %s (%d)\n", __func__, + dev->state); + to_read = 0; + } + + return to_read; +} + +static ssize_t isert_write(struct file *filp, const char __user *buf, + size_t count, loff_t *f_pos) +{ + struct isert_conn_dev *dev = filp->private_data; + size_t to_write; + + if (dev->state == CS_DISCONNECTED) + return -EPIPE; + + to_write = min(count, dev->write_len); + if (copy_from_user(dev->write_buf, buf, to_write)) + return -EFAULT; + + dev->write_len -= to_write; + dev->write_buf += to_write; + + switch (dev->state) { + case CS_RSP_BHS: + if (dev->write_len == 0) { + dev->state = CS_RSP_DATA; + dev->sg_virt = isert_vmap_sg(dev->pages, + dev->login_rsp->sg, + dev->login_rsp->sg_cnt); + if (!dev->sg_virt) + return -ENOMEM; + dev->write_buf = dev->sg_virt + ISER_HDRS_SZ; + dev->write_len = dev->login_rsp->bufflen - + sizeof(dev->login_rsp->pdu.bhs); + iscsi_cmnd_get_length(&dev->login_rsp->pdu); + } + break; + + case CS_RSP_DATA: + break; + + default: + PRINT_ERROR("Invalid state in %s (%d)\n", __func__, + dev->state); + to_write = 0; + } + + return to_write; +} + +static bool is_last_login_rsp(struct iscsi_login_rsp_hdr *rsp) +{ + return (rsp->flags & ISCSI_FLG_TRANSIT) && + ((rsp->flags & ISCSI_FLG_NSG_MASK) == ISCSI_FLG_NSG_FULL_FEATURE); +} + +static long isert_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct isert_conn_dev *dev = filp->private_data; + int res = 0, rc; + int val; + void __user *ptr = (void __user *)arg; + struct iscsi_cmnd *cmnd; + + TRACE_ENTRY(); + + if (dev->state == CS_DISCONNECTED) { + res = -EPIPE; + goto out; + } + + switch (cmd) { + case RDMA_CORK: + rc = copy_from_user(&val, ptr, sizeof(val)); + if (unlikely(rc != 0)) { + PRINT_ERROR("Failed to copy %d user's bytes", rc); + res = -EFAULT; + goto out; + } + if (val) { + if (!dev->login_rsp) { + cmnd = isert_alloc_login_rsp_pdu(dev->conn); + if (unlikely(!cmnd)) { + res = -ENOMEM; + goto out; + } + dev->login_rsp = cmnd; + dev->write_buf = (char *)&cmnd->pdu.bhs; + dev->write_len = sizeof(cmnd->pdu.bhs); + dev->state = CS_RSP_BHS; + } + } else { + struct iscsi_login_rsp_hdr *rsp; + bool last; + + if (unlikely(!dev->login_rsp)) { + res = -EINVAL; + goto out; + } + + dev->state = CS_RSP_FINISHED; + rsp = (struct iscsi_login_rsp_hdr *)(&dev->login_rsp->pdu.bhs); + last = is_last_login_rsp(rsp); + + dev->login_rsp->bufflen -= dev->write_len; + + if (!last || dev->is_discovery) { + spin_lock(&dev->pdu_lock); + dev->login_req = NULL; + spin_unlock(&dev->pdu_lock); + res = isert_login_rsp_tx(dev->login_rsp, + last, + dev->is_discovery); + vunmap(dev->sg_virt); + dev->sg_virt = NULL; + dev->login_rsp = NULL; + } + } + break; + + case GET_PORTAL_ADDR: + { + struct isert_addr_info addr; + + res = isert_get_target_addr(dev->conn, + (struct sockaddr *)&addr.addr, + &addr.addr_len); + if (unlikely(res)) + goto out; + + rc = copy_to_user(ptr, &addr, sizeof(addr)); + if (unlikely(rc != 0)) + res = -EFAULT; + } + break; + + case DISCOVERY_SESSION: + rc = copy_from_user(&val, ptr, sizeof(val)); + if (unlikely(rc != 0)) { + PRINT_ERROR("Failed to copy %d user's bytes", rc); + res = -EFAULT; + goto out; + } + dev->is_discovery = val; + break; + + default: + PRINT_ERROR("Invalid ioctl cmd %x", cmd); + res = -EINVAL; + } + +out: + TRACE_EXIT_RES(res); + return res; +} + +static unsigned int isert_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct isert_conn_dev *dev = filp->private_data; + unsigned int mask = 0; + + poll_wait(filp, &dev->waitqueue, wait); + + if (!dev->conn || dev->state == CS_DISCONNECTED) + mask |= POLLHUP | POLLIN; + else { + if (!will_read_block(dev)) + mask |= POLLIN | POLLRDNORM; + + mask |= POLLOUT | POLLWRNORM; + } + + return mask; +} + +int isert_login_req_rx(struct iscsi_cmnd *login_req) +{ + struct isert_conn_dev *dev = isert_get_priv(login_req->conn); + int res = 0; + + TRACE_ENTRY(); + + if (!dev) { + PRINT_ERROR("Received PDU %p on invalid connection", + login_req); + res = -EINVAL; + goto out; + } + + switch (dev->state) { + case CS_INIT: + case CS_RSP_FINISHED: + if (unlikely(dev->login_req != NULL)) { + sBUG(); + res = -EINVAL; + goto out; + } + break; + + case CS_REQ_BHS: /* Got login request before done handling old one */ + break; + + case CS_REQ_DATA: + case CS_REQ_FINISHED: + case CS_RSP_BHS: + case CS_RSP_DATA: + PRINT_WARNING("Received login PDU while handling previous one. State:%d", + dev->state); + res = -EINVAL; + goto out; + + default: + sBUG(); + res = -EINVAL; + goto out; + } + + + spin_lock(&dev->pdu_lock); + dev->login_req = login_req; + dev->read_len = sizeof(login_req->pdu.bhs); + dev->read_buf = (char *)&login_req->pdu.bhs; + dev->state = CS_REQ_BHS; + spin_unlock(&dev->pdu_lock); + + wake_up(&dev->waitqueue); + +out: + TRACE_EXIT_RES(res); + return res; +} + +static dev_t devno; + +static const struct file_operations listener_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = isert_listen_read, + .unlocked_ioctl = isert_listen_ioctl, + .compat_ioctl = isert_listen_ioctl, + .poll = isert_listen_poll, + .open = isert_listen_open, + .release = isert_listen_release, +}; + +static const struct file_operations conn_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = isert_read, + .write = isert_write, + .unlocked_ioctl = isert_ioctl, + .compat_ioctl = isert_ioctl, + .poll = isert_poll, + .open = isert_open, + .release = isert_release, +}; + +static void __init isert_setup_cdev(struct isert_conn_dev *dev, + unsigned int index) +{ + int err; + + TRACE_ENTRY(); + + dev->devno = MKDEV(isert_major, index + 1); + + cdev_init(&dev->cdev, &conn_fops); + dev->cdev.owner = THIS_MODULE; + dev->cdev.ops = &conn_fops; + dev->idx = index; + init_waitqueue_head(&dev->waitqueue); + dev->login_req = NULL; + dev->login_rsp = NULL; + spin_lock_init(&dev->pdu_lock); + atomic_set(&dev->available, 1); + kref_init(&dev->kref); + dev->state = CS_INIT; + err = cdev_add(&dev->cdev, dev->devno, 1); + /* Fail gracefully if need be */ + if (unlikely(err)) + PRINT_ERROR("Error %d adding "ISER_CONN_DEV_PREFIX"%d", err, + index); + + dev->dev = device_create(isert_class, NULL, dev->devno, NULL, + ISER_CONN_DEV_PREFIX"%d", index); + + TRACE_EXIT(); +} + +static void __init isert_setup_listener_cdev(struct isert_listener_dev *dev) +{ + int err; + + TRACE_ENTRY(); + + dev->devno = MKDEV(isert_major, 0); + + cdev_init(&dev->cdev, &listener_fops); + dev->cdev.owner = THIS_MODULE; + dev->cdev.ops = &listener_fops; + init_waitqueue_head(&dev->waitqueue); + INIT_LIST_HEAD(&dev->new_conn_list); + INIT_LIST_HEAD(&dev->curr_conn_list); + spin_lock_init(&dev->conn_lock); + atomic_set(&dev->available, 1); + err = cdev_add(&dev->cdev, dev->devno, 1); + /* Fail gracefully if need be */ + if (unlikely(err)) + PRINT_ERROR("Error %d adding isert_scst", err); + + dev->dev = device_create(isert_class, NULL, dev->devno, NULL, + "isert_scst"); + + TRACE_EXIT(); +} + +int __init isert_init_login_devs(unsigned int ndevs) +{ + int res; + unsigned int i; + + TRACE_ENTRY(); + + n_devs = ndevs; + + res = alloc_chrdev_region(&devno, 0, n_devs, + "isert_scst"); + isert_major = MAJOR(devno); + + if (unlikely(res < 0)) { + PRINT_ERROR("isert: can't get major %d\n", isert_major); + goto out; + } + + /* + * allocate the devices -- we can't have them static, as the number + * can be specified at load time + */ + isert_conn_devices = kzalloc(n_devs * sizeof(struct isert_conn_dev), + GFP_KERNEL); + if (unlikely(!isert_conn_devices)) { + res = -ENOMEM; + goto fail; /* Make this more graceful */ + } + + isert_class = class_create(THIS_MODULE, "isert_scst"); + + isert_setup_listener_cdev(&isert_listen_dev); + + /* Initialize each device. */ + for (i = 0; i < n_devs; i++) + isert_setup_cdev(&isert_conn_devices[i], i); + + res = isert_datamover_init(); + if (unlikely(res)) { + PRINT_ERROR("Unable to initialize datamover: %d\n", res); + goto fail; + } + +out: + TRACE_EXIT_RES(res); + return res; +fail: + isert_cleanup_login_devs(); + goto out; +} + +void isert_close_all_portals(void) +{ + int i; + + for (i = 0; i < isert_listen_dev.free_portal_idx; ++i) + isert_portal_remove(isert_listen_dev.portal_h[i]); + isert_listen_dev.free_portal_idx = 0; +} + +void isert_cleanup_login_devs(void) +{ + int i; + + TRACE_ENTRY(); + + isert_close_all_portals(); + + isert_datamover_cleanup(); + + if (isert_conn_devices) { + for (i = 0; i < n_devs; i++) { + device_destroy(isert_class, + isert_conn_devices[i].devno); + cdev_del(&isert_conn_devices[i].cdev); + } + kfree(isert_conn_devices); + } + + device_destroy(isert_class, isert_listen_dev.devno); + cdev_del(&isert_listen_dev.cdev); + + if (isert_class) + class_destroy(isert_class); + + unregister_chrdev_region(devno, n_devs); + + TRACE_EXIT(); +} diff --git a/iscsi-scst/kernel/nthread.c b/iscsi-scst/kernel/nthread.c index 44ec3986..4dea0cf5 100644 --- a/iscsi-scst/kernel/nthread.c +++ b/iscsi-scst/kernel/nthread.c @@ -23,6 +23,7 @@ #include "iscsi.h" #include "digest.h" +#include "iscsit_transport.h" /* Read data states */ enum rx_state { @@ -407,10 +408,10 @@ static void close_conn(struct iscsi_conn *conn) if (conn->active_close) { /* We want all our already send operations to complete */ - conn->sock->ops->shutdown(conn->sock, RCV_SHUTDOWN); + conn->transport->iscsit_conn_close(conn, RCV_SHUTDOWN); } else { - conn->sock->ops->shutdown(conn->sock, - RCV_SHUTDOWN|SEND_SHUTDOWN); + conn->transport->iscsit_conn_close(conn, + RCV_SHUTDOWN|SEND_SHUTDOWN); } mutex_lock(&session->target->target_mutex); @@ -486,15 +487,13 @@ static void close_conn(struct iscsi_conn *conn) } } - iscsi_make_conn_wr_active(conn); + conn->transport->iscsit_make_conn_wr_active(conn); /* That's for active close only, actually */ if (time_after(jiffies, start_waiting + CONN_WAIT_TIMEOUT) && !wait_expired) { - TRACE_CONN_CLOSE("Wait time expired (conn %p, " - "sk_state %d)", - conn, conn->sock->sk->sk_state); - conn->sock->ops->shutdown(conn->sock, SEND_SHUTDOWN); + TRACE_CONN_CLOSE("Wait time expired (conn %p)", conn); + conn->transport->iscsit_conn_close(conn, SEND_SHUTDOWN); wait_expired = 1; shut_start_waiting = jiffies; } @@ -504,9 +503,8 @@ static void close_conn(struct iscsi_conn *conn) conn->deleting ? CONN_DEL_SHUT_TIMEOUT : CONN_REG_SHUT_TIMEOUT)) { TRACE_CONN_CLOSE("Wait time after shutdown expired " - "(conn %p, sk_state %d)", conn, - conn->sock->sk->sk_state); - conn->sock->sk->sk_prot->disconnect(conn->sock->sk, 0); + "(conn %p)", conn); + conn->transport->iscsit_conn_close(conn, 0); shut_expired = 1; } @@ -522,17 +520,21 @@ static void close_conn(struct iscsi_conn *conn) trace_conn_close(conn); - /* It might never be called for being closed conn */ - __iscsi_write_space_ready(conn); + if (!conn->session->sess_params.rdma_extensions) { + /* It might never be called for being closed conn */ + __iscsi_write_space_ready(conn); - iscsi_check_closewait(conn); + iscsi_check_closewait(conn); + } } - write_lock_bh(&conn->sock->sk->sk_callback_lock); - conn->sock->sk->sk_state_change = conn->old_state_change; - conn->sock->sk->sk_data_ready = conn->old_data_ready; - conn->sock->sk->sk_write_space = conn->old_write_space; - write_unlock_bh(&conn->sock->sk->sk_callback_lock); + if (!conn->session->sess_params.rdma_extensions) { + write_lock_bh(&conn->sock->sk->sk_callback_lock); + conn->sock->sk->sk_state_change = conn->old_state_change; + conn->sock->sk->sk_data_ready = conn->old_data_ready; + conn->sock->sk->sk_write_space = conn->old_write_space; + write_unlock_bh(&conn->sock->sk->sk_callback_lock); + } while (1) { bool t; @@ -595,7 +597,7 @@ static int close_conn_thr(void *arg) } /* No locks */ -static void start_close_conn(struct iscsi_conn *conn) +void start_close_conn(struct iscsi_conn *conn) { struct task_struct *t; @@ -611,6 +613,7 @@ static void start_close_conn(struct iscsi_conn *conn) TRACE_EXIT(); return; } +EXPORT_SYMBOL(start_close_conn); static inline void iscsi_conn_init_read(struct iscsi_conn *conn, void *data, size_t len) @@ -640,7 +643,7 @@ static void iscsi_conn_prepare_read_ahs(struct iscsi_conn *conn, return; } -static struct iscsi_cmnd *iscsi_get_send_cmnd(struct iscsi_conn *conn) +struct iscsi_cmnd *iscsi_get_send_cmnd(struct iscsi_conn *conn) { struct iscsi_cmnd *cmnd = NULL; @@ -679,6 +682,7 @@ static struct iscsi_cmnd *iscsi_get_send_cmnd(struct iscsi_conn *conn) out: return cmnd; } +EXPORT_SYMBOL(iscsi_get_send_cmnd); /* Returns number of bytes left to receive or <0 for error */ static int do_recv(struct iscsi_conn *conn) @@ -843,7 +847,7 @@ static int process_read_io(struct iscsi_conn *conn, int *closed) switch (conn->read_state) { case RX_INIT_BHS: EXTRACHECKS_BUG_ON(conn->read_cmnd != NULL); - cmnd = cmnd_alloc(conn, NULL); + cmnd = conn->transport->iscsit_alloc_cmd(conn, NULL); conn->read_cmnd = cmnd; iscsi_conn_init_read(cmnd->conn, &cmnd->pdu.bhs, sizeof(cmnd->pdu.bhs)); diff --git a/iscsi-scst/kernel/param.c b/iscsi-scst/kernel/param.c index 2a423093..525cd633 100644 --- a/iscsi-scst/kernel/param.c +++ b/iscsi-scst/kernel/param.c @@ -98,12 +98,13 @@ static void log_params(struct iscsi_sess_params *params) iscsi_get_bool_value(params->data_sequence_inorder), params->error_recovery_level); PRINT_INFO(" HeaderDigest %s, DataDigest %s, OFMarker %s, " - "IFMarker %s, OFMarkInt %d, IFMarkInt %d", + "IFMarker %s, OFMarkInt %d, IFMarkInt %d, RDMAExtensions %s", iscsi_get_digest_name(params->header_digest, hdigest_name), iscsi_get_digest_name(params->data_digest, ddigest_name), iscsi_get_bool_value(params->ofmarker), iscsi_get_bool_value(params->ifmarker), - params->ofmarkint, params->ifmarkint); + params->ofmarkint, params->ifmarkint, + iscsi_get_bool_value(params->rdma_extensions)); } /* target_mutex supposed to be locked */ @@ -135,6 +136,11 @@ static void sess_params_check(struct iscsi_kern_params_info *info) CHECK_PARAM(info, iparams, ofmarker, 0, 0); CHECK_PARAM(info, iparams, ifmarker, 0, 0); + /* iSER related parameters */ + CHECK_PARAM(info, iparams, rdma_extensions, 0, 1); + CHECK_PARAM(info, iparams, target_recv_data_length, 512, max_len); + CHECK_PARAM(info, iparams, initiator_recv_data_length, 512, max_len); + return; } @@ -163,6 +169,11 @@ static void sess_params_set(struct iscsi_sess_params *params, SET_PARAM(params, info, iparams, ifmarker); SET_PARAM(params, info, iparams, ofmarkint); SET_PARAM(params, info, iparams, ifmarkint); + + /* iSER related parameters */ + SET_PARAM(params, info, iparams, rdma_extensions); + SET_PARAM(params, info, iparams, target_recv_data_length); + SET_PARAM(params, info, iparams, initiator_recv_data_length); return; } @@ -190,6 +201,11 @@ static void sess_params_get(struct iscsi_sess_params *params, GET_PARAM(params, info, iparams, ifmarker); GET_PARAM(params, info, iparams, ofmarkint); GET_PARAM(params, info, iparams, ifmarkint); + + /* iSER related parameters */ + GET_PARAM(params, info, iparams, rdma_extensions); + GET_PARAM(params, info, iparams, target_recv_data_length); + GET_PARAM(params, info, iparams, initiator_recv_data_length); return; } diff --git a/iscsi-scst/kernel/target.c b/iscsi-scst/kernel/target.c index fdb132e5..b12fa688 100644 --- a/iscsi-scst/kernel/target.c +++ b/iscsi-scst/kernel/target.c @@ -331,9 +331,11 @@ void target_del_all_sess(struct iscsi_target *target, int flags) TRACE_EXIT(); return; } +EXPORT_SYMBOL(target_del_all_sess); void target_del_all(void) { + struct iscsit_transport *transport; struct iscsi_target *target, *t; bool first = true; @@ -341,6 +343,14 @@ void target_del_all(void) TRACE_MGMT_DBG("%s", "Deleting all targets"); + transport = iscsit_get_transport(ISCSI_TCP); + if (transport && transport->iscsit_close_all_portals) + transport->iscsit_close_all_portals(); + + transport = iscsit_get_transport(ISCSI_RDMA); + if (transport && transport->iscsit_close_all_portals) + transport->iscsit_close_all_portals(); + /* Not the best, ToDo */ while (1) { mutex_lock(&target_mgmt_mutex); diff --git a/iscsi-scst/usr/ctldev.c b/iscsi-scst/usr/ctldev.c index 4bb76055..f8366867 100644 --- a/iscsi-scst/usr/ctldev.c +++ b/iscsi-scst/usr/ctldev.c @@ -27,59 +27,17 @@ #include "iscsid.h" -#define CTL_DEVICE "/dev/iscsi-scst-ctl" +#define CTL_DEVICE "iscsi-scst-ctl" int kernel_open(void) { - FILE *f; - char devname[256]; - char buf[256]; - int devn; int ctlfd = -1; int err; struct iscsi_kern_register_info reg; - if (!(f = fopen("/proc/devices", "r"))) { - err = -errno; - perror("Cannot open control path to the driver"); - goto out_err; - } - - devn = 0; - while (!feof(f)) { - if (!fgets(buf, sizeof(buf), f)) { - break; - } - if (sscanf(buf, "%d %s", &devn, devname) != 2) { - continue; - } - if (!strcmp(devname, "iscsi-scst-ctl")) { - break; - } - devn = 0; - } - - fclose(f); - if (!devn) { - err = -ENOENT; - printf("cannot find iscsictl in /proc/devices - " - "make sure the module is loaded\n"); - goto out_err; - } - - unlink(CTL_DEVICE); - if (mknod(CTL_DEVICE, (S_IFCHR | 0600), (devn << 8))) { - err = -errno; - printf("cannot create %s %s\n", CTL_DEVICE, strerror(errno)); - goto out_err; - } - - ctlfd = open(CTL_DEVICE, O_RDWR); - if (ctlfd < 0) { - err = -errno; - printf("cannot open %s %s\n", CTL_DEVICE, strerror(errno)); - goto out_err; - } + ctlfd = create_and_open_dev(CTL_DEVICE, 0); + if (ctlfd < 0) + goto out; memset(®, 0, sizeof(reg)); reg.version = (uintptr_t)ISCSI_SCST_INTERFACE_VERSION; @@ -103,7 +61,6 @@ int kernel_open(void) out_close: close(ctlfd); -out_err: ctlfd = err; goto out; } diff --git a/iscsi-scst/usr/iscsi_scstd.c b/iscsi-scst/usr/iscsi_scstd.c index f78506a5..322b4c32 100644 --- a/iscsi-scst/usr/iscsi_scstd.c +++ b/iscsi-scst/usr/iscsi_scstd.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -169,6 +170,224 @@ static void create_listen_socket(struct pollfd *array) exit(1); } +static struct connection *alloc_and_init_conn(int fd) +{ + struct pollfd *pollfd; + struct connection *conn = NULL; + int i; + + for (i = 0; i < INCOMING_MAX; i++) { + if (!incoming[i]) + break; + } + if (i >= INCOMING_MAX) { + log_error("Unable to find incoming slot? %d\n", i); + goto out; + } + + conn = conn_alloc(); + if (!conn) { + log_error("Fail to allocate %s", "conn\n"); + goto out; + } + + conn->fd = fd; + incoming[i] = conn; + + pollfd = &poll_array[POLL_INCOMING + i]; + pollfd->fd = fd; + pollfd->events = POLLIN; + pollfd->revents = 0; + + conn_read_pdu(conn); + set_non_blocking(fd); + +out: + return conn; +} + +static int transmit_iser(int fd, bool start) +{ + int opt = start; + return ioctl(fd, RDMA_CORK, &opt, sizeof(opt)); +} + +static int cork_transmit_iser(int fd) +{ + return transmit_iser(fd, true); +} + +static int uncork_transmit_iser(int fd) +{ + return transmit_iser(fd, false); +} + +static void create_iser_listen_socket(struct pollfd *array) +{ + struct addrinfo hints, *res, *res0; + char servname[64]; + int rc, i; + int iser_fd; + struct isert_addr_info info; + + iser_fd = create_and_open_dev("isert_scst", 1); + + poll_array[POLL_ISER_LISTEN].fd = iser_fd; + if (iser_fd != -1) { + poll_array[POLL_ISER_LISTEN].events = POLLIN; + + /* RDMAExtensions */ + session_keys[key_rdma_extensions].max = 1; + session_keys[key_rdma_extensions].local_def = 1; + } else { + poll_array[POLL_ISER_LISTEN].events = 0; + return; + } + + memset(servname, 0, sizeof(servname)); + snprintf(servname, sizeof(servname), "%d", server_port); + + memset(&hints, 0, sizeof(hints)); + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_PASSIVE; + + rc = getaddrinfo(server_address, servname, &hints, &res0); + if (rc != 0) { + log_error("Unable to get address info (%s)!", + get_error_str(rc)); + exit(1); + } + + i = 0; + for (res = res0; res && i < ISERT_MAX_PORTALS; res = res->ai_next) { + memcpy(&info.addr, res->ai_addr, res->ai_addrlen); + info.addr_len = res->ai_addrlen; + + rc = ioctl(iser_fd, SET_LISTEN_ADDR, &info); + if (rc != 0) { + log_error("Unable to set listen address (%s)!", + strerror(errno)); + } + ++i; + } + + freeaddrinfo(res0); +} + +static int iser_getsockname(int fd, struct sockaddr *name, socklen_t *namelen) +{ + struct isert_addr_info addr; + int ret; + + ret = ioctl(fd, GET_PORTAL_ADDR, &addr, sizeof(addr)); + if (ret) + return ret; + + memcpy(name, &addr.addr, addr.addr_len); + *namelen = addr.addr_len; + + return ret; +} + +static int iser_is_discovery(int fd) +{ + int val = 1; + + return ioctl(fd, DISCOVERY_SESSION, &val, sizeof(val)); +} + +static void iser_accept(int fd) +{ + char buff[256]; + int ret, conn_fd; + struct connection *conn; + char target_portal[ISCSI_PORTAL_LEN], target_portal_port[NI_MAXSERV]; + struct isert_addr_info addr; + + ret = read(fd, buff, sizeof(buff)); + if (ret == -1) + goto out; + + conn_fd = open(buff, O_RDWR); + if (conn_fd == -1) { + log_error("open(iser_connection) %s failed: %s\n", + buff, strerror(errno)); + goto out; + } + + ret = ioctl(conn_fd, GET_PORTAL_ADDR, &addr, sizeof(addr)); + if (ret) { + log_error("ioctl(GET_PORTAL_ADDR) failed: %s\n", + strerror(errno)); + goto out_close; + } + + ret = getnameinfo((struct sockaddr *)&addr, sizeof(addr), target_portal, + sizeof(target_portal), target_portal_port, + sizeof(target_portal_port), + NI_NUMERICHOST | NI_NUMERICSERV); + if (ret != 0) { + log_error("Target portal getnameinfo() failed: %s!", + get_error_str(ret)); + goto out_close; + } + + log_info("iSER Connect to %s:%s", target_portal, target_portal_port); + + if (conn_blocked) { + log_warning("Connection refused due to blocking\n"); + goto out_close; + } + + conn = alloc_and_init_conn(conn_fd); + if (!conn) + goto out_close; + + conn->target_portal = strdup(target_portal); + if (conn->target_portal == NULL) { + log_error("Unable to duplicate target portal %s", target_portal); + goto out_free; + } + + conn->cork_transmit = cork_transmit_iser; + conn->uncork_transmit = uncork_transmit_iser; + conn->getsockname = iser_getsockname; + conn->is_discovery = iser_is_discovery; + conn->is_iser = true; + incoming_cnt++; + +out: + return; + +out_free: + conn_free(conn); + +out_close: + close(conn_fd); + goto out; +} + +static int transmit_sock(int fd, bool start) +{ + int opt = start; + return setsockopt(fd, SOL_TCP, TCP_CORK, &opt, sizeof(opt)); +} + +static int cork_transmit_sock(int fd) +{ + return transmit_sock(fd, true); +} + +static int uncork_transmit_sock(int fd) +{ + return transmit_sock(fd, false); +} + +static int tcp_is_discovery(int fd) +{ + return 0; +} + static void accept_connection(int listen) { union { @@ -177,9 +396,8 @@ static void accept_connection(int listen) struct sockaddr_in6 sin6; } from, to; socklen_t namesize; - struct pollfd *pollfd; struct connection *conn; - int fd, i, rc; + int fd, rc; char initiator_addr[ISCSI_PORTAL_LEN], initiator_port[NI_MAXSERV]; char target_portal[ISCSI_PORTAL_LEN], target_portal_port[NI_MAXSERV]; @@ -238,36 +456,22 @@ static void accept_connection(int listen) goto out_close; } - for (i = 0; i < INCOMING_MAX; i++) { - if (!incoming[i]) - break; - } - if (i >= INCOMING_MAX) { - log_error("Unable to find incoming slot? %d\n", i); - goto out_close; - } - - if (!(conn = conn_alloc())) { - log_error("Fail to allocate %s", "conn\n"); + conn = alloc_and_init_conn(fd); + if (!conn) goto out_close; - } - conn->fd = fd; conn->target_portal = strdup(target_portal); if (conn->target_portal == NULL) { log_error("Unable to duplicate target portal %s", target_portal); goto out_free; } - incoming[i] = conn; + conn->cork_transmit = cork_transmit_sock; + conn->uncork_transmit = uncork_transmit_sock; + conn->getsockname = getsockname; + conn->is_discovery = tcp_is_discovery; conn_read_pdu(conn); - set_non_blocking(fd); - pollfd = &poll_array[POLL_INCOMING + i]; - pollfd->fd = fd; - pollfd->events = POLLIN; - pollfd->revents = 0; - incoming_cnt++; out: @@ -296,7 +500,7 @@ void isns_set_fd(int isns, int scn_listen, int scn) static void event_conn(struct connection *conn, struct pollfd *pollfd) { - int res, opt; + int res; again: switch (conn->iostate) { @@ -367,8 +571,7 @@ static void event_conn(struct connection *conn, struct pollfd *pollfd) case IOSTATE_WRITE_AHS: case IOSTATE_WRITE_DATA: write_again: - opt = 1; - setsockopt(pollfd->fd, SOL_TCP, TCP_CORK, &opt, sizeof(opt)); + conn->cork_transmit(pollfd->fd); res = write(pollfd->fd, conn->buffer, conn->rwsize); if (res < 0) { if (errno != EINTR && errno != EAGAIN) { @@ -408,8 +611,7 @@ static void event_conn(struct connection *conn, struct pollfd *pollfd) goto write_again; } case IOSTATE_WRITE_DATA: - opt = 0; - setsockopt(pollfd->fd, SOL_TCP, TCP_CORK, &opt, sizeof(opt)); + conn->uncork_transmit(pollfd->fd); cmnd_finish(conn); switch (conn->state) { @@ -447,6 +649,7 @@ static void event_loop(void) int res, i; create_listen_socket(poll_array + POLL_LISTEN); + create_iser_listen_socket(poll_array); poll_array[POLL_IPC].fd = ipc_fd; poll_array[POLL_IPC].events = POLLIN; @@ -520,6 +723,9 @@ static void event_loop(void) if (poll_array[POLL_SCN].revents) isns_scn_handle(0); + if (poll_array[POLL_ISER_LISTEN].revents) + iser_accept(poll_array[POLL_ISER_LISTEN].fd); + for (i = 0; i < INCOMING_MAX; i++) { struct connection *conn = incoming[i]; struct pollfd *pollfd = &poll_array[POLL_INCOMING + i]; diff --git a/iscsi-scst/usr/iscsid.c b/iscsi-scst/usr/iscsid.c index dfe92780..1e4ee87b 100644 --- a/iscsi-scst/usr/iscsid.c +++ b/iscsi-scst/usr/iscsid.c @@ -41,6 +41,10 @@ static struct iscsi_key login_keys[] = { {"InitiatorAlias",}, {"SessionType",}, {"TargetName",}, + {"InitiatorRecvDataSegmentLength",}, + {"MaxAHSLength",}, + {"TaggedBufferForSolicitedDataOnly",}, + {"iSERHelloRequired",}, {NULL,}, }; @@ -369,6 +373,26 @@ static void text_scan_login(struct connection *conn) } } + if (conn->is_iser) { + switch (idx) { + case key_rdma_extensions: + if (val != 1) { + login_rsp_ini_err(conn, ISCSI_STATUS_INIT_ERR); + goto out; + } + break; + case key_initial_r2t: + val = 1; + break; + case key_immediate_data: + val = 0; + break; + } + } else if (idx == key_rdma_extensions && val != 0) { + login_rsp_ini_err(conn, ISCSI_STATUS_INIT_ERR); + goto out; + } + params_check_val(session_keys, idx, &val); params_set_val(session_keys, conn->session_params, idx, &val); @@ -502,6 +526,11 @@ static void login_start(struct connection *conn) if (session_type) { if (!strcmp(session_type, "Discovery")) { + int ret = conn->is_discovery(conn->fd); + if (ret) { + login_rsp_tgt_err(conn, ISCSI_STATUS_MISSING_FIELDS); + return; + } conn->session_type = SESSION_DISCOVERY; } else if (strcmp(session_type, "Normal")) { login_rsp_ini_err(conn, ISCSI_STATUS_INV_SESSION_TYPE); @@ -613,6 +642,15 @@ static int login_finish(struct connection *conn) { int res = 0; + if (conn->is_iser && + conn->session_params[key_target_recv_data_length].key_state == KEY_STATE_START) { + char buf[32] = "\0"; + params_val_to_str(session_keys, key_target_recv_data_length, + session_keys[key_target_recv_data_length].local_def, + buf, sizeof(buf)); + text_key_add(conn, "TargetRecvDataSegmentLength", buf); + } + switch (conn->session_type) { case SESSION_NORMAL: if (!conn->sess) diff --git a/iscsi-scst/usr/iscsid.h b/iscsi-scst/usr/iscsid.h index 670e8095..a245b351 100644 --- a/iscsi-scst/usr/iscsid.h +++ b/iscsi-scst/usr/iscsid.h @@ -27,8 +27,10 @@ #include "types.h" #ifdef INSIDE_KERNEL_TREE #include +#include #else #include "iscsi_scst.h" +#include "isert_scst.h" #endif #include "iscsi_hdr.h" #include "param.h" @@ -126,6 +128,13 @@ struct connection { } auth; struct __qelem clist; + + bool is_iser; + + int (*cork_transmit)(int fd); + int (*uncork_transmit)(int fd); + int (*getsockname)(int fd, struct sockaddr *name, socklen_t *namelen); + int (*is_discovery)(int fd); }; #define IOSTATE_FREE 0 @@ -221,6 +230,7 @@ extern int conn_blocked; enum { POLL_LISTEN, POLL_IPC = POLL_LISTEN + LISTEN_MAX, + POLL_ISER_LISTEN, POLL_NL, POLL_ISNS, POLL_SCN_LISTEN, diff --git a/iscsi-scst/usr/misc.c b/iscsi-scst/usr/misc.c index 7ef434b6..357be4d1 100644 --- a/iscsi-scst/usr/misc.c +++ b/iscsi-scst/usr/misc.c @@ -18,9 +18,74 @@ #include #include #include +#include +#include +#include +#include #include "iscsid.h" +int create_and_open_dev(const char *dev, int readonly) +{ + FILE *f; + char devname[256]; + char buf[256]; + int devn; + int ctlfd = -1; + int err; + int flags; + + f = fopen("/proc/devices", "r"); + if (!f) { + err = -errno; + perror("Cannot open control path to the driver"); + goto out; + } + + devn = 0; + while (!feof(f)) { + if (!fgets(buf, sizeof(buf), f)) + break; + if (sscanf(buf, "%d %s", &devn, devname) != 2) + continue; + if (!strcmp(devname, dev)) + break; + devn = 0; + } + + fclose(f); + if (!devn) { + err = -ENOENT; + printf("cannot find %s in /proc/devices - " + "make sure the module is loaded\n", dev); + goto out; + } + + sprintf(devname, "/dev/%s", dev); + + unlink(devname); + if (mknod(devname, (S_IFCHR | 0600), (devn << 8))) { + err = -errno; + printf("cannot create %s %s\n", devname, strerror(errno)); + goto out; + } + + if (readonly) + flags = O_RDONLY; + else + flags = O_RDWR; + + err = ctlfd = open(devname, flags); + if (ctlfd < 0) { + err = -errno; + printf("cannot open %s %s\n", devname, strerror(errno)); + goto out; + } + +out: + return err; +} + void set_non_blocking(int fd) { int res = fcntl(fd, F_GETFL); diff --git a/iscsi-scst/usr/misc.h b/iscsi-scst/usr/misc.h index 9b972ebd..879a79ca 100644 --- a/iscsi-scst/usr/misc.h +++ b/iscsi-scst/usr/misc.h @@ -107,5 +107,6 @@ static inline int list_length_is_one(const struct __qelem *head) extern void set_non_blocking(int fd); extern void sock_set_keepalive(int sock, int timeout); +extern int create_and_open_dev(const char *dev, int readonly); #endif diff --git a/iscsi-scst/usr/param.c b/iscsi-scst/usr/param.c index 007271e7..09a93e3c 100644 --- a/iscsi-scst/usr/param.c +++ b/iscsi-scst/usr/param.c @@ -382,5 +382,12 @@ struct iscsi_key session_keys[] = { {"IFMarker", 0, 0, 0, 0, 0, &and_ops}, {"OFMarkInt", 2048, 2048, 1, 65535, 0, &marker_ops}, {"IFMarkInt", 2048, 2048, 1, 65535, 0, &marker_ops}, + {"RDMAExtensions", 0, 0, 0, 0, 1, &and_ops}, + {"TargetRecvDataSegmentLength", 8192, 512, 512, -1, 0, &minimum_ops}, + {"InitiatorRecvDataSegmentLength", 8192, -1, 512, -1, 0, &minimum_ops}, + {"MaxAHSLength", 256, 0, 0, -1, 0, &minimum_ops}, + {"TaggedBufferForSolicitedDataOnly", 0, 0, 0, 0, 0, &and_ops}, + {"iSERHelloRequired", 0, 0, 0, 0, 0, &and_ops}, + {"MaxOutstandingUnexpectedPDUs", 0, 0, 0, -1, 0, &minimum_ops}, {NULL,}, }; diff --git a/iscsi-scst/usr/target.c b/iscsi-scst/usr/target.c index cfab907a..030d9afa 100644 --- a/iscsi-scst/usr/target.c +++ b/iscsi-scst/usr/target.c @@ -252,10 +252,11 @@ void target_list_build(struct connection *conn, char *target_name) char portal[NI_MAXHOST]; int family, i; - if (getsockname(conn->fd, (struct sockaddr *) &ss1, &slen)) { + if (conn->getsockname(conn->fd, (struct sockaddr *) &ss1, &slen)) { log_error("getsockname failed: %m"); return; } + family = ss1.ss_family; list_for_each_entry(target, &targets_list, tlist) { diff --git a/scripts/generate-kernel-patch b/scripts/generate-kernel-patch index dbcf623f..23135edd 100755 --- a/scripts/generate-kernel-patch +++ b/scripts/generate-kernel-patch @@ -458,9 +458,16 @@ make -s -C iscsi-scst include/iscsi_scst_itf_ver.h ( for f in $(ls iscsi-scst/include/*h 2>/dev/null) do - if [ "${f}" != "iscsi-scst/include/iscsi_scst_itf_ver.h" ]; then - add_file "${f}" "include/scst/${f#iscsi-scst/include/}" - fi + case "${f}" in + "iscsi-scst/include/iscsi_scst_itf_ver.h") + ;; + "iscsi-scst/include/iscsit_transport.h") + add_file "${f}" "drivers/scst/iscsi-scst/${f#iscsi-scst/include/}" + ;; + *) + add_file "${f}" "include/scst/${f#iscsi-scst/include/}" + ;; + esac done add_file "iscsi-scst/include/iscsi_scst_itf_ver.h" "include/scst/iscsi_scst_itf_ver.h" @@ -474,6 +481,11 @@ for f in $(ls iscsi-scst/kernel/*.[ch] 2>/dev/null) do add_file "${f}" "drivers/scst/iscsi-scst/${f#iscsi-scst/kernel/}" done + +for f in $(ls iscsi-scst/kernel/isert-scst/*.[ch] 2>/dev/null) +do + add_file "${f}" "drivers/scst/iscsi-scst/${f#iscsi-scst/kernel/isert-scst/}" +done ) \ | process_patch "iscsi-scst.diff" diff --git a/scst-dkms.spec.in b/scst-dkms.spec.in index ea33b642..e62d3cb6 100644 --- a/scst-dkms.spec.in +++ b/scst-dkms.spec.in @@ -165,6 +165,7 @@ BUILT_MODULE_NAME[12]="scst_raid" BUILT_MODULE_NAME[13]="scst_tape" BUILT_MODULE_NAME[14]="scst_user" BUILT_MODULE_NAME[15]="scst_vdisk" +BUILT_MODULE_NAME[16]="isert-scst" DEST_MODULE_LOCATION[ 0]="/extra" DEST_MODULE_LOCATION[ 1]="/extra" DEST_MODULE_LOCATION[ 2]="/extra" @@ -181,6 +182,7 @@ DEST_MODULE_LOCATION[12]="/extra/dev_handlers" DEST_MODULE_LOCATION[13]="/extra/dev_handlers" DEST_MODULE_LOCATION[14]="/extra/dev_handlers" DEST_MODULE_LOCATION[15]="/extra/dev_handlers" +DEST_MODULE_LOCATION[16]="/extra" EOF %clean diff --git a/scst.spec.in b/scst.spec.in index 6ad861f1..9f243056 100644 --- a/scst.spec.in +++ b/scst.spec.in @@ -148,6 +148,7 @@ rm -rf /usr/local/include/scst /lib/modules/%{kversion}/extra/fcst.ko /lib/modules/%{kversion}/extra/ib_srpt.ko /lib/modules/%{kversion}/extra/iscsi-scst.ko +/lib/modules/%{kversion}/extra/isert-scst.ko /lib/modules/%{kversion}/extra/qla2x00tgt.ko /lib/modules/%{kversion}/extra/qla2xxx_scst.ko /lib/modules/%{kversion}/extra/scst.ko diff --git a/scstadmin/init.d/scst b/scstadmin/init.d/scst index c92180a0..99bc1f6c 100755 --- a/scstadmin/init.d/scst +++ b/scstadmin/init.d/scst @@ -159,6 +159,7 @@ parse_scst_conf() { x86_64|i686) SCST_OPT_MODULES="crc32c-intel $SCST_OPT_MODULES";; esac + SCST_MODULES="$SCST_MODULES isert_scst" SCST_OPT_MODULES="crc32c $SCST_OPT_MODULES" SCST_DAEMONS="${ISCSI_DAEMON} $SCST_DAEMONS" fi