From dad91d3fdd6982b9bff2ca336c838832f62c1826 Mon Sep 17 00:00:00 2001 From: LiLi Date: Thu, 4 Nov 2021 09:04:34 +0800 Subject: [PATCH 1/3] [website][upgrade]feat: docs migration - 2.7.1 / deploy Signed-off-by: LiLi --- .../version-2.7.1/deploy-aws.md | 274 +++++++++ .../deploy-bare-metal-multi-cluster.md | 483 ++++++++++++++++ .../version-2.7.1/deploy-bare-metal.md | 546 ++++++++++++++++++ .../version-2.7.1/deploy-dcos.md | 202 +++++++ .../version-2.7.1/deploy-docker.md | 64 ++ .../version-2.7.1/deploy-kubernetes.md | 15 + .../version-2.7.1/deploy-monitoring.md | 125 ++++ .../version-2.7.1-sidebars.json | 34 ++ 8 files changed, 1743 insertions(+) create mode 100644 site2/website-next/versioned_docs/version-2.7.1/deploy-aws.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/deploy-bare-metal-multi-cluster.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/deploy-bare-metal.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/deploy-dcos.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/deploy-docker.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/deploy-kubernetes.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/deploy-monitoring.md diff --git a/site2/website-next/versioned_docs/version-2.7.1/deploy-aws.md b/site2/website-next/versioned_docs/version-2.7.1/deploy-aws.md new file mode 100644 index 0000000000000..78defa1e3e5ec --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/deploy-aws.md @@ -0,0 +1,274 @@ +--- +id: deploy-aws +title: Deploying a Pulsar cluster on AWS using Terraform and Ansible +sidebar_label: "Amazon Web Services" +original_id: deploy-aws +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +> For instructions on deploying a single Pulsar cluster manually rather than using Terraform and Ansible, see [Deploying a Pulsar cluster on bare metal](deploy-bare-metal.md). For instructions on manually deploying a multi-cluster Pulsar instance, see [Deploying a Pulsar instance on bare metal](deploy-bare-metal-multi-cluster). + +One of the easiest ways to get a Pulsar [cluster](reference-terminology.md#cluster) running on [Amazon Web Services](https://aws.amazon.com/) (AWS) is to use the [Terraform](https://terraform.io) infrastructure provisioning tool and the [Ansible](https://www.ansible.com) server automation tool. Terraform can create the resources necessary for running the Pulsar cluster---[EC2](https://aws.amazon.com/ec2/) instances, networking and security infrastructure, etc.---While Ansible can install and run Pulsar on the provisioned resources. + +## Requirements and setup + +In order to install a Pulsar cluster on AWS using Terraform and Ansible, you need to prepare the following things: + +* An [AWS account](https://aws.amazon.com/account/) and the [`aws`](https://aws.amazon.com/cli/) command-line tool +* Python and [pip](https://pip.pypa.io/en/stable/) +* The [`terraform-inventory`](https://github.com/adammck/terraform-inventory) tool, which enables Ansible to use Terraform artifacts + +You also need to make sure that you are currently logged into your AWS account via the `aws` tool: + +```bash + +$ aws configure + +``` + +## Installation + +You can install Ansible on Linux or macOS using pip. + +```bash + +$ pip install ansible + +``` + +You can install Terraform using the instructions [here](https://www.terraform.io/intro/getting-started/install.html). + +You also need to have the Terraform and Ansible configuration for Pulsar locally on your machine. You can find them in the [GitHub repository](https://github.com/apache/pulsar) of Pulsar, which you can fetch using Git commands: + +```bash + +$ git clone https://github.com/apache/pulsar +$ cd pulsar/deployment/terraform-ansible/aws + +``` + +## SSH setup + +> If you already have an SSH key and want to use it, you can skip the step of generating an SSH key and update `private_key_file` setting +> in `ansible.cfg` file and `public_key_path` setting in `terraform.tfvars` file. +> +> For example, if you already have a private SSH key in `~/.ssh/pulsar_aws` and a public key in `~/.ssh/pulsar_aws.pub`, +> follow the steps below: +> +> 1. update `ansible.cfg` with following values: +> + +> ```shell +> +> private_key_file=~/.ssh/pulsar_aws +> +> +> ``` + +> +> 2. update `terraform.tfvars` with following values: +> + +> ```shell +> +> public_key_path=~/.ssh/pulsar_aws.pub +> +> +> ``` + +In order to create the necessary AWS resources using Terraform, you need to create an SSH key. Enter the following commands to create a private SSH key in `~/.ssh/id_rsa` and a public key in `~/.ssh/id_rsa.pub`: + +```bash + +$ ssh-keygen -t rsa + +``` + +Do *not* enter a passphrase (hit **Enter** instead when the prompt comes out). Enter the following command to verify that a key has been created: + +```bash + +$ ls ~/.ssh +id_rsa id_rsa.pub + +``` + +## Create AWS resources using Terraform + +To start building AWS resources with Terraform, you need to install all Terraform dependencies. Enter the following command: + +```bash + +$ terraform init +# This will create a .terraform folder + +``` + +After that, you can apply the default Terraform configuration by entering this command: + +```bash + +$ terraform apply + +``` + +Then you see this prompt below: + +```bash + +Do you want to perform these actions? + Terraform will perform the actions described above. + Only 'yes' will be accepted to approve. + + Enter a value: + +``` + +Type `yes` and hit **Enter**. Applying the configuration could take several minutes. When the configuration applying finishes, you can see `Apply complete!` along with some other information, including the number of resources created. + +### Apply a non-default configuration + +You can apply a non-default Terraform configuration by changing the values in the `terraform.tfvars` file. The following variables are available: + +Variable name | Description | Default +:-------------|:------------|:------- +`public_key_path` | The path of the public key that you have generated. | `~/.ssh/id_rsa.pub` +`region` | The AWS region in which the Pulsar cluster runs | `us-west-2` +`availability_zone` | The AWS availability zone in which the Pulsar cluster runs | `us-west-2a` +`aws_ami` | The [Amazon Machine Image](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html) (AMI) that the cluster uses | `ami-9fa343e7` +`num_zookeeper_nodes` | The number of [ZooKeeper](https://zookeeper.apache.org) nodes in the ZooKeeper cluster | 3 +`num_bookie_nodes` | The number of bookies that runs in the cluster | 3 +`num_broker_nodes` | The number of Pulsar brokers that runs in the cluster | 2 +`num_proxy_nodes` | The number of Pulsar proxies that runs in the cluster | 1 +`base_cidr_block` | The root [CIDR](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) that network assets uses for the cluster | `10.0.0.0/16` +`instance_types` | The EC2 instance types to be used. This variable is a map with two keys: `zookeeper` for the ZooKeeper instances, `bookie` for the BookKeeper bookies and `broker` and `proxy` for Pulsar brokers and bookies | `t2.small` (ZooKeeper), `i3.xlarge` (BookKeeper) and `c5.2xlarge` (Brokers/Proxies) + +### What is installed + +When you run the Ansible playbook, the following AWS resources are used: + +* 9 total [Elastic Compute Cloud](https://aws.amazon.com/ec2) (EC2) instances running the [ami-9fa343e7](https://access.redhat.com/articles/3135091) Amazon Machine Image (AMI), which runs [Red Hat Enterprise Linux (RHEL) 7.4](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html-single/7.4_release_notes/index). By default, that includes: + * 3 small VMs for ZooKeeper ([t2.small](https://www.ec2instances.info/?selected=t2.small) instances) + * 3 larger VMs for BookKeeper [bookies](reference-terminology.md#bookie) ([i3.xlarge](https://www.ec2instances.info/?selected=i3.xlarge) instances) + * 2 larger VMs for Pulsar [brokers](reference-terminology.md#broker) ([c5.2xlarge](https://www.ec2instances.info/?selected=c5.2xlarge) instances) + * 1 larger VMs for Pulsar [proxy](reference-terminology.md#proxy) ([c5.2xlarge](https://www.ec2instances.info/?selected=c5.2xlarge) instances) +* An EC2 [security group](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-network-security.html) +* A [virtual private cloud](https://aws.amazon.com/vpc/) (VPC) for security +* An [API Gateway](https://aws.amazon.com/api-gateway/) for connections from the outside world +* A [route table](http://docs.aws.amazon.com/AmazonVPC/latest/UserGuide/VPC_Route_Tables.html) for the Pulsar cluster's VPC +* A [subnet](http://docs.aws.amazon.com/AmazonVPC/latest/UserGuide/VPC_Subnets.html) for the VPC + +All EC2 instances for the cluster run in the [us-west-2](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html) region. + +### Fetch your Pulsar connection URL + +When you apply the Terraform configuration by entering the command `terraform apply`, Terraform outputs a value for the `pulsar_service_url`. The value should look something like this: + +``` + +pulsar://pulsar-elb-1800761694.us-west-2.elb.amazonaws.com:6650 + +``` + +You can fetch that value at any time by entering the command `terraform output pulsar_service_url` or parsing the `terraform.tstate` file (which is JSON, even though the filename does not reflect that): + +```bash + +$ cat terraform.tfstate | jq .modules[0].outputs.pulsar_service_url.value + +``` + +### Destroy your cluster + +At any point, you can destroy all AWS resources associated with your cluster using Terraform's `destroy` command: + +```bash + +$ terraform destroy + +``` + +## Setup Disks + +Before you run the Pulsar playbook, you need to mount the disks to the correct directories on those bookie nodes. Since different type of machines have different disk layout, you need to update the task defined in `setup-disk.yaml` file after changing the `instance_types` in your terraform config, + +To setup disks on bookie nodes, enter this command: + +```bash + +$ ansible-playbook \ + --user='ec2-user' \ + --inventory=`which terraform-inventory` \ + setup-disk.yaml + +``` + +After that, the disks is mounted under `/mnt/journal` as journal disk, and `/mnt/storage` as ledger disk. +Remember to enter this command just only once. If you attempt to enter this command again after you have run Pulsar playbook, your disks might potentially be erased again, causing the bookies to fail to start up. + +## Run the Pulsar playbook + +Once you have created the necessary AWS resources using Terraform, you can install and run Pulsar on the Terraform-created EC2 instances using Ansible. + +(Optional) If you want to use any [built-in IO connectors](io-connectors) , edit the `Download Pulsar IO packages` task in the `deploy-pulsar.yaml` file and uncomment the connectors you want to use. + +To run the playbook, enter this command: + +```bash + +$ ansible-playbook \ + --user='ec2-user' \ + --inventory=`which terraform-inventory` \ + ../deploy-pulsar.yaml + +``` + +If you have created a private SSH key at a location different from `~/.ssh/id_rsa`, you can specify the different location using the `--private-key` flag in the following command: + +```bash + +$ ansible-playbook \ + --user='ec2-user' \ + --inventory=`which terraform-inventory` \ + --private-key="~/.ssh/some-non-default-key" \ + ../deploy-pulsar.yaml + +``` + +## Access the cluster + +You can now access your running Pulsar using the unique Pulsar connection URL for your cluster, which you can obtain following the instructions [above](#fetching-your-pulsar-connection-url). + +For a quick demonstration of accessing the cluster, we can use the Python client for Pulsar and the Python shell. First, install the Pulsar Python module using pip: + +```bash + +$ pip install pulsar-client + +``` + +Now, open up the Python shell using the `python` command: + +```bash + +$ python + +``` + +Once you are in the shell, enter the following command: + +```python + +>>> import pulsar +>>> client = pulsar.Client('pulsar://pulsar-elb-1800761694.us-west-2.elb.amazonaws.com:6650') +# Make sure to use your connection URL +>>> producer = client.create_producer('persistent://public/default/test-topic') +>>> producer.send('Hello world') +>>> client.close() + +``` + +If all of these commands are successful, Pulsar clients can now use your cluster! diff --git a/site2/website-next/versioned_docs/version-2.7.1/deploy-bare-metal-multi-cluster.md b/site2/website-next/versioned_docs/version-2.7.1/deploy-bare-metal-multi-cluster.md new file mode 100644 index 0000000000000..783b17104dd1f --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/deploy-bare-metal-multi-cluster.md @@ -0,0 +1,483 @@ +--- +id: deploy-bare-metal-multi-cluster +title: Deploying a multi-cluster on bare metal +sidebar_label: "Bare metal multi-cluster" +original_id: deploy-bare-metal-multi-cluster +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +> ### Tips +> +> 1. Single-cluster Pulsar installations should be sufficient for all but the most ambitious use cases. If you are interested in experimenting with +> Pulsar or using it in a startup or on a single team, you had better opt for a single cluster. For instructions on deploying a single cluster, +> see the guide [here](deploy-bare-metal). +> +> 2. If you want to use all builtin [Pulsar IO](io-overview) connectors in your Pulsar deployment, you need to download `apache-pulsar-io-connectors` +> package and install `apache-pulsar-io-connectors` under `connectors` directory in the pulsar directory on every broker node or on every function-worker node if you +> run a separate cluster of function workers for [Pulsar Functions](functions-overview). +> +> 3. If you want to use [Tiered Storage](concepts-tiered-storage) feature in your Pulsar deployment, you need to download `apache-pulsar-offloaders` +> package and install `apache-pulsar-offloaders` under `offloaders` directory in the pulsar directory on every broker node. For more details of how to configure +> this feature, you can refer to the [Tiered storage cookbook](cookbooks-tiered-storage). + +A Pulsar *instance* consists of multiple Pulsar clusters working in unison. You can distribute clusters across data centers or geographical regions and replicate the clusters amongst themselves using [geo-replication](administration-geo). Deploying a multi-cluster Pulsar instance involves the following basic steps: + +* Deploying two separate [ZooKeeper](#deploy-zookeeper) quorums: a [local](#deploy-local-zookeeper) quorum for each cluster in the instance and a [configuration store](#configuration-store) quorum for instance-wide tasks +* Initializing [cluster metadata](#cluster-metadata-initialization) for each cluster +* Deploying a [BookKeeper cluster](#deploy-bookkeeper) of bookies in each Pulsar cluster +* Deploying [brokers](#deploy-brokers) in each Pulsar cluster + +If you want to deploy a single Pulsar cluster, see [Clusters and Brokers](getting-started-standalone.md#start-the-cluster). + +> #### Run Pulsar locally or on Kubernetes? +> This guide shows you how to deploy Pulsar in production in a non-Kubernetes environment. If you want to run a standalone Pulsar cluster on a single machine for development purposes, see the [Setting up a local cluster](getting-started-standalone.md) guide. If you want to run Pulsar on [Kubernetes](https://kubernetes.io), see the [Pulsar on Kubernetes](deploy-kubernetes) guide, which includes sections on running Pulsar on Kubernetes on [Google Kubernetes Engine](deploy-kubernetes#pulsar-on-google-kubernetes-engine) and on [Amazon Web Services](deploy-kubernetes#pulsar-on-amazon-web-services). + +## System requirement +Currently, Pulsar is available for 64-bit **macOS**, **Linux**, and **Windows**. To use Pulsar, you need to install 64-bit JRE/JDK 8 or later versions. + +## Install Pulsar + +To get started running Pulsar, download a binary tarball release in one of the following ways: + +* by clicking the link below and downloading the release from an Apache mirror: + + * Pulsar @pulsar:version@ binary release + +* from the Pulsar [downloads page](pulsar:download_page_url) +* from the Pulsar [releases page](https://github.com/apache/pulsar/releases/latest) +* using [wget](https://www.gnu.org/software/wget): + + ```shell + + $ wget 'https://www.apache.org/dyn/mirrors/mirrors.cgi?action=download&filename=pulsar/pulsar-@pulsar:version@/apache-pulsar-@pulsar:version@-bin.tar.gz' -O apache-pulsar-@pulsar:version@-bin.tar.gz + + ``` + +Once you download the tarball, untar it and `cd` into the resulting directory: + +```bash + +$ tar xvfz apache-pulsar-@pulsar:version@-bin.tar.gz +$ cd apache-pulsar-@pulsar:version@ + +``` + +## What your package contains + +The Pulsar binary package initially contains the following directories: + +Directory | Contains +:---------|:-------- +`bin` | [Command-line tools](reference-cli-tools) of Pulsar, such as [`pulsar`](reference-cli-tools.md#pulsar) and [`pulsar-admin`](https://pulsar.apache.org/tools/pulsar-admin/) +`conf` | Configuration files for Pulsar, including for [broker configuration](reference-configuration.md#broker), [ZooKeeper configuration](reference-configuration.md#zookeeper), and more +`examples` | A Java JAR file containing example [Pulsar Functions](functions-overview) +`lib` | The [JAR](https://en.wikipedia.org/wiki/JAR_(file_format)) files that Pulsar uses +`licenses` | License files, in `.txt` form, for various components of the Pulsar codebase + +The following directories are created once you begin running Pulsar: + +Directory | Contains +:---------|:-------- +`data` | The data storage directory that ZooKeeper and BookKeeper use +`instances` | Artifacts created for [Pulsar Functions](functions-overview) +`logs` | Logs that the installation creates + + +## Deploy ZooKeeper + +Each Pulsar instance relies on two separate ZooKeeper quorums. + +* [Local ZooKeeper](#deploy-local-zookeeper) operates at the cluster level and provides cluster-specific configuration management and coordination. Each Pulsar cluster needs to have a dedicated ZooKeeper cluster. +* [Configuration Store](#deploy-the-configuration-store) operates at the instance level and provides configuration management for the entire system (and thus across clusters). An independent cluster of machines or the same machines that local ZooKeeper uses can provide the configuration store quorum. + +The configuration store quorum can be provided by an independent cluster of machines or by the same machines used by local ZooKeeper. + + +### Deploy local ZooKeeper + +ZooKeeper manages a variety of essential coordination-related and configuration-related tasks for Pulsar. + +You need to stand up one local ZooKeeper cluster *per Pulsar cluster* for deploying a Pulsar instance. + +To begin, add all ZooKeeper servers to the quorum configuration specified in the [`conf/zookeeper.conf`](reference-configuration.md#zookeeper) file. Add a `server.N` line for each node in the cluster to the configuration, where `N` is the number of the ZooKeeper node. The following is an example for a three-node cluster: + +```properties + +server.1=zk1.us-west.example.com:2888:3888 +server.2=zk2.us-west.example.com:2888:3888 +server.3=zk3.us-west.example.com:2888:3888 + +``` + +On each host, you need to specify the ID of the node in the `myid` file of each node, which is in `data/zookeeper` folder of each server by default (you can change the file location via the [`dataDir`](reference-configuration.md#zookeeper-dataDir) parameter). + +> See the [Multi-server setup guide](https://zookeeper.apache.org/doc/r3.4.10/zookeeperAdmin.html#sc_zkMulitServerSetup) in the ZooKeeper documentation for detailed information on `myid` and more. + +On a ZooKeeper server at `zk1.us-west.example.com`, for example, you could set the `myid` value like this: + +```shell + +$ mkdir -p data/zookeeper +$ echo 1 > data/zookeeper/myid + +``` + +On `zk2.us-west.example.com` the command looks like `echo 2 > data/zookeeper/myid` and so on. + +Once you add each server to the `zookeeper.conf` configuration and each server has the appropriate `myid` entry, you can start ZooKeeper on all hosts (in the background, using nohup) with the [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) CLI tool: + +```shell + +$ bin/pulsar-daemon start zookeeper + +``` + +### Deploy the configuration store + +The ZooKeeper cluster that is configured and started up in the section above is a *local* ZooKeeper cluster that you can use to manage a single Pulsar cluster. In addition to a local cluster, however, a full Pulsar instance also requires a configuration store for handling some instance-level configuration and coordination tasks. + +If you deploy a [single-cluster](#single-cluster-pulsar-instance) instance, you do not need a separate cluster for the configuration store. If, however, you deploy a [multi-cluster](#multi-cluster-pulsar-instance) instance, you should stand up a separate ZooKeeper cluster for configuration tasks. + +#### Single-cluster Pulsar instance + +If your Pulsar instance consists of just one cluster, then you can deploy a configuration store on the same machines as the local ZooKeeper quorum but run on different TCP ports. + +To deploy a ZooKeeper configuration store in a single-cluster instance, add the same ZooKeeper servers that the local quorum uses to the configuration file in [`conf/global_zookeeper.conf`](reference-configuration.md#configuration-store) using the same method for [local ZooKeeper](#local-zookeeper), but make sure to use a different port (2181 is the default for ZooKeeper). The following is an example that uses port 2184 for a three-node ZooKeeper cluster: + +```properties + +clientPort=2184 +server.1=zk1.us-west.example.com:2185:2186 +server.2=zk2.us-west.example.com:2185:2186 +server.3=zk3.us-west.example.com:2185:2186 + +``` + +As before, create the `myid` files for each server on `data/global-zookeeper/myid`. + +#### Multi-cluster Pulsar instance + +When you deploy a global Pulsar instance, with clusters distributed across different geographical regions, the configuration store serves as a highly available and strongly consistent metadata store that can tolerate failures and partitions spanning whole regions. + +The key here is to make sure the ZK quorum members are spread across at least 3 regions and that other regions run as observers. + +Again, given the very low expected load on the configuration store servers, you can +share the same hosts used for the local ZooKeeper quorum. + +For example, assume a Pulsar instance with the following clusters `us-west`, +`us-east`, `us-central`, `eu-central`, `ap-south`. Also assume, each cluster has its own local ZK servers named such as the following: + +``` + +zk[1-3].${CLUSTER}.example.com + +``` + +In this scenario if you want to pick the quorum participants from few clusters and +let all the others be ZK observers. For example, to form a 7 servers quorum, you can pick 3 servers from `us-west`, 2 from `us-central` and 2 from `us-east`. + +This method guarantees that writes to configuration store is possible even if one of these regions is unreachable. + +The ZK configuration in all the servers looks like: + +```properties + +clientPort=2184 +server.1=zk1.us-west.example.com:2185:2186 +server.2=zk2.us-west.example.com:2185:2186 +server.3=zk3.us-west.example.com:2185:2186 +server.4=zk1.us-central.example.com:2185:2186 +server.5=zk2.us-central.example.com:2185:2186 +server.6=zk3.us-central.example.com:2185:2186:observer +server.7=zk1.us-east.example.com:2185:2186 +server.8=zk2.us-east.example.com:2185:2186 +server.9=zk3.us-east.example.com:2185:2186:observer +server.10=zk1.eu-central.example.com:2185:2186:observer +server.11=zk2.eu-central.example.com:2185:2186:observer +server.12=zk3.eu-central.example.com:2185:2186:observer +server.13=zk1.ap-south.example.com:2185:2186:observer +server.14=zk2.ap-south.example.com:2185:2186:observer +server.15=zk3.ap-south.example.com:2185:2186:observer + +``` + +Additionally, ZK observers need to have the following parameters: + +```properties + +peerType=observer + +``` + +##### Start the service + +Once your configuration store configuration is in place, you can start up the service using [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) + +```shell + +$ bin/pulsar-daemon start configuration-store + +``` + +## Cluster metadata initialization + +Once you set up the cluster-specific ZooKeeper and configuration store quorums for your instance, you need to write some metadata to ZooKeeper for each cluster in your instance. **you only needs to write these metadata once**. + +You can initialize this metadata using the [`initialize-cluster-metadata`](reference-cli-tools.md#pulsar-initialize-cluster-metadata) command of the [`pulsar`](reference-cli-tools.md#pulsar) CLI tool. The following is an example: + +```shell + +$ bin/pulsar initialize-cluster-metadata \ + --cluster us-west \ + --zookeeper zk1.us-west.example.com:2181 \ + --configuration-store zk1.us-west.example.com:2184 \ + --web-service-url http://pulsar.us-west.example.com:8080/ \ + --web-service-url-tls https://pulsar.us-west.example.com:8443/ \ + --broker-service-url pulsar://pulsar.us-west.example.com:6650/ \ + --broker-service-url-tls pulsar+ssl://pulsar.us-west.example.com:6651/ + +``` + +As you can see from the example above, you need to specify the following: + +* The name of the cluster +* The local ZooKeeper connection string for the cluster +* The configuration store connection string for the entire instance +* The web service URL for the cluster +* A broker service URL enabling interaction with the [brokers](reference-terminology.md#broker) in the cluster + +If you use [TLS](security-tls-transport), you also need to specify a TLS web service URL for the cluster as well as a TLS broker service URL for the brokers in the cluster. + +Make sure to run `initialize-cluster-metadata` for each cluster in your instance. + +## Deploy BookKeeper + +BookKeeper provides [persistent message storage](concepts-architecture-overview.md#persistent-storage) for Pulsar. + +Each Pulsar broker needs to have its own cluster of bookies. The BookKeeper cluster shares a local ZooKeeper quorum with the Pulsar cluster. + +### Configure bookies + +You can configure BookKeeper bookies using the [`conf/bookkeeper.conf`](reference-configuration.md#bookkeeper) configuration file. The most important aspect of configuring each bookie is ensuring that the [`zkServers`](reference-configuration.md#bookkeeper-zkServers) parameter is set to the connection string for the local ZooKeeper of Pulsar cluster. + +### Start bookies + +You can start a bookie in two ways: in the foreground or as a background daemon. + +To start a bookie in the background, use the [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) CLI tool: + +```bash + +$ bin/pulsar-daemon start bookie + +``` + +You can verify that the bookie works properly using the `bookiesanity` command for the [BookKeeper shell](reference-cli-tools.md#bookkeeper-shell): + +```shell + +$ bin/bookkeeper shell bookiesanity + +``` + +This command creates a new ledger on the local bookie, writes a few entries, reads them back and finally deletes the ledger. + +After you have started all bookies, you can use the `simpletest` command for [BookKeeper shell](reference-cli-tools.md#shell) on any bookie node, to verify that all bookies in the cluster are running. + +```bash + +$ bin/bookkeeper shell simpletest --ensemble --writeQuorum --ackQuorum --numEntries + +``` + +Bookie hosts are responsible for storing message data on disk. In order for bookies to provide optimal performance, having a suitable hardware configuration is essential for the bookies. The following are key dimensions for bookie hardware capacity. + +* Disk I/O capacity read/write +* Storage capacity + +Message entries written to bookies are always synced to disk before returning an acknowledgement to the Pulsar broker. To ensure low write latency, BookKeeper is +designed to use multiple devices: + +* A **journal** to ensure durability. For sequential writes, having fast [fsync](https://linux.die.net/man/2/fsync) operations on bookie hosts is critical. Typically, small and fast [solid-state drives](https://en.wikipedia.org/wiki/Solid-state_drive) (SSDs) should suffice, or [hard disk drives](https://en.wikipedia.org/wiki/Hard_disk_drive) (HDDs) with a [RAID](https://en.wikipedia.org/wiki/RAID)s controller and a battery-backed write cache. Both solutions can reach fsync latency of ~0.4 ms. +* A **ledger storage device** is where data is stored until all consumers acknowledge the message. Writes happen in the background, so write I/O is not a big concern. Reads happen sequentially most of the time and the backlog is drained only in case of consumer drain. To store large amounts of data, a typical configuration involves multiple HDDs with a RAID controller. + + + +## Deploy brokers + +Once you set up ZooKeeper, initialize cluster metadata, and spin up BookKeeper bookies, you can deploy brokers. + +### Broker configuration + +You can configure brokers using the [`conf/broker.conf`](reference-configuration.md#broker) configuration file. + +The most important element of broker configuration is ensuring that each broker is aware of its local ZooKeeper quorum as well as the configuration store quorum. Make sure that you set the [`zookeeperServers`](reference-configuration.md#broker-zookeeperServers) parameter to reflect the local quorum and the [`configurationStoreServers`](reference-configuration.md#broker-configurationStoreServers) parameter to reflect the configuration store quorum (although you need to specify only those ZooKeeper servers located in the same cluster). + +You also need to specify the name of the [cluster](reference-terminology.md#cluster) to which the broker belongs using the [`clusterName`](reference-configuration.md#broker-clusterName) parameter. In addition, you need to match the broker and web service ports provided when you initialize the metadata (especially when you use a different port from default) of the cluster. + +The following is an example configuration: + +```properties + +# Local ZooKeeper servers +zookeeperServers=zk1.us-west.example.com:2181,zk2.us-west.example.com:2181,zk3.us-west.example.com:2181 + +# Configuration store quorum connection string. +configurationStoreServers=zk1.us-west.example.com:2184,zk2.us-west.example.com:2184,zk3.us-west.example.com:2184 + +clusterName=us-west + +# Broker data port +brokerServicePort=6650 + +# Broker data port for TLS +brokerServicePortTls=6651 + +# Port to use to server HTTP request +webServicePort=8080 + +# Port to use to server HTTPS request +webServicePortTls=8443 + +``` + +### Broker hardware + +Pulsar brokers do not require any special hardware since they do not use the local disk. You had better choose fast CPUs and 10Gbps [NIC](https://en.wikipedia.org/wiki/Network_interface_controller) so that the software can take full advantage of that. + +### Start the broker service + +You can start a broker in the background by using [nohup](https://en.wikipedia.org/wiki/Nohup) with the [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) CLI tool: + +```shell + +$ bin/pulsar-daemon start broker + +``` + +You can also start brokers in the foreground by using [`pulsar broker`](reference-cli-tools.md#broker): + +```shell + +$ bin/pulsar broker + +``` + +## Service discovery + +[Clients](getting-started-clients) connecting to Pulsar brokers need to be able to communicate with an entire Pulsar instance using a single URL. Pulsar provides a built-in service discovery mechanism that you can set up using the instructions [immediately below](#service-discovery-setup). + +You can also use your own service discovery system if you want. If you use your own system, you only need to satisfy just one requirement: when a client performs an HTTP request to an [endpoint](reference-configuration) for a Pulsar cluster, such as `http://pulsar.us-west.example.com:8080`, the client needs to be redirected to *some* active broker in the desired cluster, whether via DNS, an HTTP or IP redirect, or some other means. + +> #### Service discovery already provided by many scheduling systems +> Many large-scale deployment systems, such as [Kubernetes](deploy-kubernetes), have service discovery systems built in. If you run Pulsar on such a system, you may not need to provide your own service discovery mechanism. + + +### Service discovery setup + +The service discovery mechanism that included with Pulsar maintains a list of active brokers, which stored in ZooKeeper, and supports lookup using HTTP and also the [binary protocol](developing-binary-protocol) of Pulsar. + +To get started setting up the built-in service of discovery of Pulsar, you need to change a few parameters in the [`conf/discovery.conf`](reference-configuration.md#service-discovery) configuration file. Set the [`zookeeperServers`](reference-configuration.md#service-discovery-zookeeperServers) parameter to the ZooKeeper quorum connection string of the cluster and the [`configurationStoreServers`](reference-configuration.md#service-discovery-configurationStoreServers) setting to the [configuration +store](reference-terminology.md#configuration-store) quorum connection string. + +```properties + +# Zookeeper quorum connection string +zookeeperServers=zk1.us-west.example.com:2181,zk2.us-west.example.com:2181,zk3.us-west.example.com:2181 + +# Global configuration store connection string +configurationStoreServers=zk1.us-west.example.com:2184,zk2.us-west.example.com:2184,zk3.us-west.example.com:2184 + +``` + +To start the discovery service: + +```shell + +$ bin/pulsar-daemon start discovery + +``` + +## Admin client and verification + +At this point your Pulsar instance should be ready to use. You can now configure client machines that can serve as [administrative clients](admin-api-overview) for each cluster. You can use the [`conf/client.conf`](reference-configuration.md#client) configuration file to configure admin clients. + +The most important thing is that you point the [`serviceUrl`](reference-configuration.md#client-serviceUrl) parameter to the correct service URL for the cluster: + +```properties + +serviceUrl=http://pulsar.us-west.example.com:8080/ + +``` + +## Provision new tenants + +Pulsar is built as a fundamentally multi-tenant system. + + +If a new tenant wants to use the system, you need to create a new one. You can create a new tenant by using the [`pulsar-admin`](reference-pulsar-admin.md#tenants) CLI tool: + +```shell + +$ bin/pulsar-admin tenants create test-tenant \ + --allowed-clusters us-west \ + --admin-roles test-admin-role + +``` + +In this command, users who identify with `test-admin-role` role can administer the configuration for the `test-tenant` tenant. The `test-tenant` tenant can only use the `us-west` cluster. From now on, this tenant can manage its resources. + +Once you create a tenant, you need to create [namespaces](reference-terminology.md#namespace) for topics within that tenant. + + +The first step is to create a namespace. A namespace is an administrative unit that can contain many topics. A common practice is to create a namespace for each different use case from a single tenant. + +```shell + +$ bin/pulsar-admin namespaces create test-tenant/ns1 + +``` + +##### Test producer and consumer + + +Everything is now ready to send and receive messages. The quickest way to test the system is through the [`pulsar-perf`](reference-cli-tools.md#pulsar-perf) client tool. + + +You can use a topic in the namespace that you have just created. Topics are automatically created the first time when a producer or a consumer tries to use them. + +The topic name in this case could be: + +```http + +persistent://test-tenant/ns1/my-topic + +``` + +Start a consumer that creates a subscription on the topic and waits for messages: + +```shell + +$ bin/pulsar-perf consume persistent://test-tenant/ns1/my-topic + +``` + +Start a producer that publishes messages at a fixed rate and reports stats every 10 seconds: + +```shell + +$ bin/pulsar-perf produce persistent://test-tenant/ns1/my-topic + +``` + +To report the topic stats: + +```shell + +$ bin/pulsar-admin topics stats persistent://test-tenant/ns1/my-topic + +``` + diff --git a/site2/website-next/versioned_docs/version-2.7.1/deploy-bare-metal.md b/site2/website-next/versioned_docs/version-2.7.1/deploy-bare-metal.md new file mode 100644 index 0000000000000..678231342f478 --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/deploy-bare-metal.md @@ -0,0 +1,546 @@ +--- +id: deploy-bare-metal +title: Deploy a cluster on bare metal +sidebar_label: "Bare metal" +original_id: deploy-bare-metal +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + + +> ### Tips +> +> 1. Single-cluster Pulsar installations should be sufficient for all but the most ambitious use cases. If you are interested in experimenting with +> Pulsar or using Pulsar in a startup or on a single team, it is simplest to opt for a single cluster. If you do need to run a multi-cluster Pulsar instance, +> see the guide [here](deploy-bare-metal-multi-cluster). +> +> 2. If you want to use all builtin [Pulsar IO](io-overview) connectors in your Pulsar deployment, you need to download `apache-pulsar-io-connectors` +> package and install `apache-pulsar-io-connectors` under `connectors` directory in the pulsar directory on every broker node or on every function-worker node if you +> have run a separate cluster of function workers for [Pulsar Functions](functions-overview). +> +> 3. If you want to use [Tiered Storage](concepts-tiered-storage) feature in your Pulsar deployment, you need to download `apache-pulsar-offloaders` +> package and install `apache-pulsar-offloaders` under `offloaders` directory in the pulsar directory on every broker node. For more details of how to configure +> this feature, you can refer to the [Tiered storage cookbook](cookbooks-tiered-storage). + +Deploying a Pulsar cluster involves doing the following (in order): + +* Deploy a [ZooKeeper](#deploy-a-zookeeper-cluster) cluster (optional) +* Initialize [cluster metadata](#initialize-cluster-metadata) +* Deploy a [BookKeeper](#deploy-a-bookkeeper-cluster) cluster +* Deploy one or more Pulsar [brokers](#deploy-pulsar-brokers) + +## Preparation + +### Requirements + +Currently, Pulsar is available for 64-bit **macOS**, **Linux**, and **Windows**. To use Pulsar, you need to install 64-bit JRE/JDK 8 or later versions. + +> If you already have an existing ZooKeeper cluster and want to reuse it, you do not need to prepare the machines +> for running ZooKeeper. + +To run Pulsar on bare metal, the following configuration is recommended: + +* At least 6 Linux machines or VMs + * 3 for running [ZooKeeper](https://zookeeper.apache.org) + * 3 for running a Pulsar broker, and a [BookKeeper](https://bookkeeper.apache.org) bookie +* A single [DNS](https://en.wikipedia.org/wiki/Domain_Name_System) name covering all of the Pulsar broker hosts + +> If you do not have enough machines, or to try out Pulsar in cluster mode (and expand the cluster later), +> you can deploy a full Pulsar configuration on one node, where Zookeeper, the bookie and broker are run on the same machine. + +> If you do not have a DNS server, you can use the multi-host format in the service URL instead. + +Each machine in your cluster needs to have [Java 8](http://www.oracle.com/technetwork/java/javase/downloads/index.html) or a more recent version of Java installed. + +The following is a diagram showing the basic setup: + +![alt-text](/assets/pulsar-basic-setup.png) + +In this diagram, connecting clients need to be able to communicate with the Pulsar cluster using a single URL. In this case, `pulsar-cluster.acme.com` abstracts over all of the message-handling brokers. Pulsar message brokers run on machines alongside BookKeeper bookies; brokers and bookies, in turn, rely on ZooKeeper. + +### Hardware considerations + +When you deploy a Pulsar cluster, keep in mind the following basic better choices when you do the capacity planning. + +#### ZooKeeper + +For machines running ZooKeeper, is is recommended to use less powerful machines or VMs. Pulsar uses ZooKeeper only for periodic coordination-related and configuration-related tasks, *not* for basic operations. If you run Pulsar on [Amazon Web Services](https://aws.amazon.com/) (AWS), for example, a [t2.small](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/t2-instances.html) instance might likely suffice. + +#### Bookies and Brokers + +For machines running a bookie and a Pulsar broker, more powerful machines are required. For an AWS deployment, for example, [i3.4xlarge](https://aws.amazon.com/blogs/aws/now-available-i3-instances-for-demanding-io-intensive-applications/) instances may be appropriate. On those machines you can use the following: + +* Fast CPUs and 10Gbps [NIC](https://en.wikipedia.org/wiki/Network_interface_controller) (for Pulsar brokers) +* Small and fast [solid-state drives](https://en.wikipedia.org/wiki/Solid-state_drive) (SSDs) or [hard disk drives](https://en.wikipedia.org/wiki/Hard_disk_drive) (HDDs) with a [RAID](https://en.wikipedia.org/wiki/RAID) controller and a battery-backed write cache (for BookKeeper bookies) + +## Install the Pulsar binary package + +> You need to install the Pulsar binary package on *each machine in the cluster*, including machines running [ZooKeeper](#deploy-a-zookeeper-cluster) and [BookKeeper](#deploy-a-bookkeeper-cluster). + +To get started deploying a Pulsar cluster on bare metal, you need to download a binary tarball release in one of the following ways: + +* By clicking on the link below directly, which automatically triggers a download: + * Pulsar @pulsar:version@ binary release +* From the Pulsar [downloads page](pulsar:download_page_url) +* From the Pulsar [releases page](https://github.com/apache/pulsar/releases/latest) on [GitHub](https://github.com) +* Using [wget](https://www.gnu.org/software/wget): + +```bash + +$ wget pulsar:binary_release_url + +``` + +Once you download the tarball, untar it and `cd` into the resulting directory: + +```bash + +$ tar xvzf apache-pulsar-@pulsar:version@-bin.tar.gz +$ cd apache-pulsar-@pulsar:version@ + +``` + +The extracted directory contains the following subdirectories: + +Directory | Contains +:---------|:-------- +`bin` |[command-line tools](reference-cli-tools) of Pulsar, such as [`pulsar`](reference-cli-tools.md#pulsar) and [`pulsar-admin`](https://pulsar.apache.org/tools/pulsar-admin/) +`conf` | Configuration files for Pulsar, including for [broker configuration](reference-configuration.md#broker), [ZooKeeper configuration](reference-configuration.md#zookeeper), and more +`data` | The data storage directory that ZooKeeper and BookKeeper use +`lib` | The [JAR](https://en.wikipedia.org/wiki/JAR_(file_format)) files that Pulsar uses +`logs` | Logs that the installation creates + +## [Install Builtin Connectors (optional)]( https://pulsar.apache.org/docs/en/next/standalone/#install-builtin-connectors-optional) + +> Since Pulsar release `2.1.0-incubating`, Pulsar provides a separate binary distribution, containing all the `builtin` connectors. +> If you want to enable those `builtin` connectors, you can follow the instructions as below; otherwise you can +> skip this section for now. + +To get started using builtin connectors, you need to download the connectors tarball release on every broker node in one of the following ways: + +* by clicking the link below and downloading the release from an Apache mirror: + + * Pulsar IO Connectors @pulsar:version@ release + +* from the Pulsar [downloads page](pulsar:download_page_url) +* from the Pulsar [releases page](https://github.com/apache/pulsar/releases/latest) +* using [wget](https://www.gnu.org/software/wget): + + ```shell + + $ wget pulsar:connector_release_url/{connector}-@pulsar:version@.nar + + ``` + +Once you download the .nar file, copy the file to directory `connectors` in the pulsar directory. +For example, if you download the connector file `pulsar-io-aerospike-@pulsar:version@.nar`: + +```bash + +$ mkdir connectors +$ mv pulsar-io-aerospike-@pulsar:version@.nar connectors + +$ ls connectors +pulsar-io-aerospike-@pulsar:version@.nar +... + +``` + +## [Install Tiered Storage Offloaders (optional)](https://pulsar.apache.org/docs/en/next/standalone/#install-tiered-storage-offloaders-optional) + +> Since Pulsar release `2.2.0`, Pulsar releases a separate binary distribution, containing the tiered storage offloaders. +> If you want to enable tiered storage feature, you can follow the instructions as below; otherwise you can +> skip this section for now. + +To get started using tiered storage offloaders, you need to download the offloaders tarball release on every broker node in one of the following ways: + +* by clicking the link below and downloading the release from an Apache mirror: + + * Pulsar Tiered Storage Offloaders @pulsar:version@ release + +* from the Pulsar [downloads page](pulsar:download_page_url) +* from the Pulsar [releases page](https://github.com/apache/pulsar/releases/latest) +* using [wget](https://www.gnu.org/software/wget): + + ```shell + + $ wget pulsar:offloader_release_url + + ``` + +Once you download the tarball, in the pulsar directory, untar the offloaders package and copy the offloaders as `offloaders` in the pulsar directory: + +```bash + +$ tar xvfz apache-pulsar-offloaders-@pulsar:version@-bin.tar.gz + +// you can find a directory named `apache-pulsar-offloaders-@pulsar:version@` in the pulsar directory +// then copy the offloaders + +$ mv apache-pulsar-offloaders-@pulsar:version@/offloaders offloaders + +$ ls offloaders +tiered-storage-jcloud-@pulsar:version@.nar + +``` + +For more details of how to configure tiered storage feature, you can refer to the [Tiered storage cookbook](cookbooks-tiered-storage) + + +## Deploy a ZooKeeper cluster + +> If you already have an existing zookeeper cluster and want to use it, you can skip this section. + +[ZooKeeper](https://zookeeper.apache.org) manages a variety of essential coordination- and configuration-related tasks for Pulsar. To deploy a Pulsar cluster, you need to deploy ZooKeeper first (before all other components). A 3-node ZooKeeper cluster is the recommended configuration. Pulsar does not make heavy use of ZooKeeper, so more lightweight machines or VMs should suffice for running ZooKeeper. + +To begin, add all ZooKeeper servers to the configuration specified in [`conf/zookeeper.conf`](reference-configuration.md#zookeeper) (in the Pulsar directory that you create [above](#install-the-pulsar-binary-package)). The following is an example: + +```properties + +server.1=zk1.us-west.example.com:2888:3888 +server.2=zk2.us-west.example.com:2888:3888 +server.3=zk3.us-west.example.com:2888:3888 + +``` + +> If you only have one machine on which to deploy Pulsar, you only need to add one server entry in the configuration file. + +On each host, you need to specify the ID of the node in the `myid` file, which is in the `data/zookeeper` folder of each server by default (you can change the file location via the [`dataDir`](reference-configuration.md#zookeeper-dataDir) parameter). + +> See the [Multi-server setup guide](https://zookeeper.apache.org/doc/r3.4.10/zookeeperAdmin.html#sc_zkMulitServerSetup) in the ZooKeeper documentation for detailed information on `myid` and more. + +For example, on a ZooKeeper server like `zk1.us-west.example.com`, you can set the `myid` value as follows: + +```bash + +$ mkdir -p data/zookeeper +$ echo 1 > data/zookeeper/myid + +``` + +On `zk2.us-west.example.com`, the command is `echo 2 > data/zookeeper/myid` and so on. + +Once you add each server to the `zookeeper.conf` configuration and have the appropriate `myid` entry, you can start ZooKeeper on all hosts (in the background, using nohup) with the [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) CLI tool: + +```bash + +$ bin/pulsar-daemon start zookeeper + +``` + +> If you plan to deploy Zookeeper with the Bookie on the same node, you +> need to start zookeeper by using different stats port. + +Start zookeeper with [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) CLI tool like: + +```bash + +$ PULSAR_EXTRA_OPTS="-Dstats_server_port=8001" bin/pulsar-daemon start zookeeper + +``` + +## Initialize cluster metadata + +Once you deploy ZooKeeper for your cluster, you need to write some metadata to ZooKeeper for each cluster in your instance. You only need to write this data **once**. + +You can initialize this metadata using the [`initialize-cluster-metadata`](reference-cli-tools.md#pulsar-initialize-cluster-metadata) command of the [`pulsar`](reference-cli-tools.md#pulsar) CLI tool. This command can be run on any machine in your ZooKeeper cluster. The following is an example: + +```shell + +$ bin/pulsar initialize-cluster-metadata \ + --cluster pulsar-cluster-1 \ + --zookeeper zk1.us-west.example.com:2181 \ + --configuration-store zk1.us-west.example.com:2181 \ + --web-service-url http://pulsar.us-west.example.com:8080 \ + --web-service-url-tls https://pulsar.us-west.example.com:8443 \ + --broker-service-url pulsar://pulsar.us-west.example.com:6650 \ + --broker-service-url-tls pulsar+ssl://pulsar.us-west.example.com:6651 + +``` + +As you can see from the example above, you will need to specify the following: + +Flag | Description +:----|:----------- +`--cluster` | A name for the cluster +`--zookeeper` | A "local" ZooKeeper connection string for the cluster. This connection string only needs to include *one* machine in the ZooKeeper cluster. +`--configuration-store` | The configuration store connection string for the entire instance. As with the `--zookeeper` flag, this connection string only needs to include *one* machine in the ZooKeeper cluster. +`--web-service-url` | The web service URL for the cluster, plus a port. This URL should be a standard DNS name. The default port is 8080 (you had better not use a different port). +`--web-service-url-tls` | If you use [TLS](security-tls-transport), you also need to specify a TLS web service URL for the cluster. The default port is 8443 (you had better not use a different port). +`--broker-service-url` | A broker service URL enabling interaction with the brokers in the cluster. This URL should not use the same DNS name as the web service URL but should use the `pulsar` scheme instead. The default port is 6650 (you had better not use a different port). +`--broker-service-url-tls` | If you use [TLS](security-tls-transport), you also need to specify a TLS web service URL for the cluster as well as a TLS broker service URL for the brokers in the cluster. The default port is 6651 (you had better not use a different port). + + +> If you do not have a DNS server, you can use multi-host format in the service URL with the following settings: +> + +> ```properties +> +> --web-service-url http://host1:8080,host2:8080,host3:8080 \ +> --web-service-url-tls https://host1:8443,host2:8443,host3:8443 \ +> --broker-service-url pulsar://host1:6650,host2:6650,host3:6650 \ +> --broker-service-url-tls pulsar+ssl://host1:6651,host2:6651,host3:6651 +> +> +> ``` + +> +> If you want to use an existing BookKeeper cluster, you can add the `--existing-bk-metadata-service-uri` flag as follows: +> + +> ```properties +> +> --existing-bk-metadata-service-uri "zk+null://zk1:2181;zk2:2181/ledgers" \ +> --web-service-url http://host1:8080,host2:8080,host3:8080 \ +> --web-service-url-tls https://host1:8443,host2:8443,host3:8443 \ +> --broker-service-url pulsar://host1:6650,host2:6650,host3:6650 \ +> --broker-service-url-tls pulsar+ssl://host1:6651,host2:6651,host3:6651 +> +> +> ``` + +> You can obtain the metadata service URI of the existing BookKeeper cluster by using the `bin/bookkeeper shell whatisinstanceid` command. You must enclose the value in double quotes since the multiple metadata service URIs are separated with semicolons. + +## Deploy a BookKeeper cluster + +[BookKeeper](https://bookkeeper.apache.org) handles all persistent data storage in Pulsar. You need to deploy a cluster of BookKeeper bookies to use Pulsar. You can choose to run a **3-bookie BookKeeper cluster**. + +You can configure BookKeeper bookies using the [`conf/bookkeeper.conf`](reference-configuration.md#bookkeeper) configuration file. The most important step in configuring bookies for our purposes here is ensuring that [`zkServers`](reference-configuration.md#bookkeeper-zkServers) is set to the connection string for the ZooKeeper cluster. The following is an example: + +```properties + +zkServers=zk1.us-west.example.com:2181,zk2.us-west.example.com:2181,zk3.us-west.example.com:2181 + +``` + +Once you appropriately modify the `zkServers` parameter, you can make any other configuration changes that you require. You can find a full listing of the available BookKeeper configuration parameters [here](reference-configuration.md#bookkeeper). However, consulting the [BookKeeper documentation](http://bookkeeper.apache.org/docs/latest/reference/config/) for a more in-depth guide might be a better choice. + +Once you apply the desired configuration in `conf/bookkeeper.conf`, you can start up a bookie on each of your BookKeeper hosts. You can start up each bookie either in the background, using [nohup](https://en.wikipedia.org/wiki/Nohup), or in the foreground. + +To start the bookie in the background, use the [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) CLI tool: + +```bash + +$ bin/pulsar-daemon start bookie + +``` + +To start the bookie in the foreground: + +```bash + +$ bin/pulsar bookie + +``` + +You can verify that a bookie works properly by running the `bookiesanity` command on the [BookKeeper shell](reference-cli-tools.md#shell): + +```bash + +$ bin/bookkeeper shell bookiesanity + +``` + +This command creates an ephemeral BookKeeper ledger on the local bookie, writes a few entries, reads them back, and finally deletes the ledger. + +After you start all the bookies, you can use `simpletest` command for [BookKeeper shell](reference-cli-tools.md#shell) on any bookie node, to verify all the bookies in the cluster are up running. + +```bash + +$ bin/bookkeeper shell simpletest --ensemble --writeQuorum --ackQuorum --numEntries + +``` + +This command creates a `num-bookies` sized ledger on the cluster, writes a few entries, and finally deletes the ledger. + + +## Deploy Pulsar brokers + +Pulsar brokers are the last thing you need to deploy in your Pulsar cluster. Brokers handle Pulsar messages and provide the administrative interface of Pulsar. A good choice is to run **3 brokers**, one for each machine that already runs a BookKeeper bookie. + +### Configure Brokers + +The most important element of broker configuration is ensuring that each broker is aware of the ZooKeeper cluster that you have deployed. Ensure that the [`zookeeperServers`](reference-configuration.md#broker-zookeeperServers) and [`configurationStoreServers`](reference-configuration.md#broker-configurationStoreServers) parameters are correct. In this case, since you only have 1 cluster and no configuration store setup, the `configurationStoreServers` point to the same `zookeeperServers`. + +```properties + +zookeeperServers=zk1.us-west.example.com:2181,zk2.us-west.example.com:2181,zk3.us-west.example.com:2181 +configurationStoreServers=zk1.us-west.example.com:2181,zk2.us-west.example.com:2181,zk3.us-west.example.com:2181 + +``` + +You also need to specify the cluster name (matching the name that you provided when you [initialize the metadata of the cluster](#initialize-cluster-metadata)): + +```properties + +clusterName=pulsar-cluster-1 + +``` + +In addition, you need to match the broker and web service ports provided when you initialize the metadata of the cluster (especially when you use a different port than the default): + +```properties + +brokerServicePort=6650 +brokerServicePortTls=6651 +webServicePort=8080 +webServicePortTls=8443 + +``` + +> If you deploy Pulsar in a one-node cluster, you should update the replication settings in `conf/broker.conf` to `1`. +> + +> ```properties +> +> # Number of bookies to use when creating a ledger +> managedLedgerDefaultEnsembleSize=1 +> +> # Number of copies to store for each message +> managedLedgerDefaultWriteQuorum=1 +> +> # Number of guaranteed copies (acks to wait before write is complete) +> managedLedgerDefaultAckQuorum=1 +> +> +> ``` + +### Enable Pulsar Functions (optional) + +If you want to enable [Pulsar Functions](functions-overview), you can follow the instructions as below: + +1. Edit `conf/broker.conf` to enable functions worker, by setting `functionsWorkerEnabled` to `true`. + + ```conf + + functionsWorkerEnabled=true + + ``` + +2. Edit `conf/functions_worker.yml` and set `pulsarFunctionsCluster` to the cluster name that you provide when you [initialize the metadata of the cluster](#initialize-cluster-metadata). + + ```conf + + pulsarFunctionsCluster: pulsar-cluster-1 + + ``` + +If you want to learn more options about deploying the functions worker, check out [Deploy and manage functions worker](functions-worker). + +### Start Brokers + +You can then provide any other configuration changes that you want in the [`conf/broker.conf`](reference-configuration.md#broker) file. Once you decide on a configuration, you can start up the brokers for your Pulsar cluster. Like ZooKeeper and BookKeeper, you can start brokers either in the foreground or in the background, using nohup. + +You can start a broker in the foreground using the [`pulsar broker`](reference-cli-tools.md#pulsar-broker) command: + +```bash + +$ bin/pulsar broker + +``` + +You can start a broker in the background using the [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) CLI tool: + +```bash + +$ bin/pulsar-daemon start broker + +``` + +Once you successfully start up all the brokers that you intend to use, your Pulsar cluster should be ready to go! + +## Connect to the running cluster + +Once your Pulsar cluster is up and running, you should be able to connect with it using Pulsar clients. One such client is the [`pulsar-client`](reference-cli-tools.md#pulsar-client) tool, which is included with the Pulsar binary package. The `pulsar-client` tool can publish messages to and consume messages from Pulsar topics and thus provide a simple way to make sure that your cluster runs properly. + +To use the `pulsar-client` tool, first modify the client configuration file in [`conf/client.conf`](reference-configuration.md#client) in your binary package. You need to change the values for `webServiceUrl` and `brokerServiceUrl`, substituting `localhost` (which is the default), with the DNS name that you assign to your broker/bookie hosts. The following is an example: + +```properties + +webServiceUrl=http://us-west.example.com:8080 +brokerServiceurl=pulsar://us-west.example.com:6650 + +``` + +> If you do not have a DNS server, you can specify multi-host in service URL as follows: +> + +> ```properties +> +> webServiceUrl=http://host1:8080,host2:8080,host3:8080 +> brokerServiceurl=pulsar://host1:6650,host2:6650,host3:6650 +> +> +> ``` + +Once that is complete, you can publish a message to the Pulsar topic: + +```bash + +$ bin/pulsar-client produce \ + persistent://public/default/test \ + -n 1 \ + -m "Hello Pulsar" + +``` + +> You may need to use a different cluster name in the topic if you specify a cluster name other than `pulsar-cluster-1`. + +This command publishes a single message to the Pulsar topic. In addition, you can subscribe to the Pulsar topic in a different terminal before publishing messages as below: + +```bash + +$ bin/pulsar-client consume \ + persistent://public/default/test \ + -n 100 \ + -s "consumer-test" \ + -t "Exclusive" + +``` + +Once you successfully publish the above message to the topic, you should see it in the standard output: + +```bash + +----- got message ----- +Hello Pulsar + +``` + +## Run Functions + +> If you have [enabled](#enable-pulsar-functions-optional) Pulsar Functions, you can try out the Pulsar Functions now. + +Create an ExclamationFunction `exclamation`. + +```bash + +bin/pulsar-admin functions create \ + --jar examples/api-examples.jar \ + --classname org.apache.pulsar.functions.api.examples.ExclamationFunction \ + --inputs persistent://public/default/exclamation-input \ + --output persistent://public/default/exclamation-output \ + --tenant public \ + --namespace default \ + --name exclamation + +``` + +Check whether the function runs as expected by [triggering](functions-deploying.md#triggering-pulsar-functions) the function. + +```bash + +bin/pulsar-admin functions trigger --name exclamation --trigger-value "hello world" + +``` + +You should see the following output: + +```shell + +hello world! + +``` + diff --git a/site2/website-next/versioned_docs/version-2.7.1/deploy-dcos.md b/site2/website-next/versioned_docs/version-2.7.1/deploy-dcos.md new file mode 100644 index 0000000000000..14a36352e6872 --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/deploy-dcos.md @@ -0,0 +1,202 @@ +--- +id: deploy-dcos +title: Deploy Pulsar on DC/OS +sidebar_label: "DC/OS" +original_id: deploy-dcos +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +> ### Tips +> +> If you want to enable all builtin [Pulsar IO](io-overview) connectors in your Pulsar deployment, you can choose to use `apachepulsar/pulsar-all` image instead of +> `apachepulsar/pulsar` image. `apachepulsar/pulsar-all` image has already bundled [all builtin connectors](io-overview.md#working-with-connectors). + +[DC/OS](https://dcos.io/) (the DataCenter Operating System) is a distributed operating system used for deploying and managing applications and systems on [Apache Mesos](http://mesos.apache.org/). DC/OS is an open-source tool that [Mesosphere](https://mesosphere.com/) creates and maintains . + +Apache Pulsar is available as a [Marathon Application Group](https://mesosphere.github.io/marathon/docs/application-groups.html), which runs multiple applications as manageable sets. + +## Prerequisites + +In order to run Pulsar on DC/OS, you need the following: + +* DC/OS version [1.9](https://docs.mesosphere.com/1.9/) or higher +* A [DC/OS cluster](https://docs.mesosphere.com/1.9/installing/) with at least three agent nodes +* The [DC/OS CLI tool](https://docs.mesosphere.com/1.9/cli/install/) installed +* The [`PulsarGroups.json`](https://github.com/apache/pulsar/blob/master/deployment/dcos/PulsarGroups.json) configuration file from the Pulsar GitHub repo. + + ```bash + + $ curl -O https://raw.githubusercontent.com/apache/pulsar/master/deployment/dcos/PulsarGroups.json + + ``` + +Each node in the DC/OS-managed Mesos cluster must have at least: + +* 4 CPU +* 4 GB of memory +* 60 GB of total persistent disk + +Alternatively, you can change the configuration in `PulsarGroups.json` according to match your resources of DC/OS cluster. + +## Deploy Pulsar using the DC/OS command interface + +You can deploy Pulsar on DC/OS using this command: + +```bash + +$ dcos marathon group add PulsarGroups.json + +``` + +This command deploys Docker container instances in three groups, which together comprise a Pulsar cluster: + +* 3 bookies (1 [bookie](reference-terminology.md#bookie) on each agent node and 1 [bookie recovery](http://bookkeeper.apache.org/docs/latest/admin/autorecovery/) instance) +* 3 Pulsar [brokers](reference-terminology.md#broker) (1 broker on each node and 1 admin instance) +* 1 [Prometheus](http://prometheus.io/) instance and 1 [Grafana](https://grafana.com/) instance + + +> When you run DC/OS, a ZooKeeper cluster already runs at `master.mesos:2181`, thus you do not have to install or start up ZooKeeper separately. + +After executing the `dcos` command above, click on the **Services** tab in the DC/OS [GUI interface](https://docs.mesosphere.com/latest/gui/), which you can access at [http://m1.dcos](http://m1.dcos) in this example. You should see several applications in the process of deploying. + +![DC/OS command executed](/assets/dcos_command_execute.png) + +![DC/OS command executed2](/assets/dcos_command_execute2.png) + +## The BookKeeper group + +To monitor the status of the BookKeeper cluster deployment, click on the **bookkeeper** group in the parent **pulsar** group. + +![DC/OS bookkeeper status](/assets/dcos_bookkeeper_status.png) + +At this point, 3 [bookies](reference-terminology.md#bookie) should be shown as green, which means that the bookies have been deployed successfully and are now running. + +![DC/OS bookkeeper running](/assets/dcos_bookkeeper_run.png) + +You can also click into each bookie instance to get more detailed information, such as the bookie running log. + +![DC/OS bookie log](/assets/dcos_bookie_log.png) + +To display information about the BookKeeper in ZooKeeper, you can visit [http://m1.dcos/exhibitor](http://m1.dcos/exhibitor). In this example, 3 bookies are under the `available` directory. + +![DC/OS bookkeeper in zk](/assets/dcos_bookkeeper_in_zookeeper.png) + +## The Pulsar broker Group + +Similar to the BookKeeper group above, click into the **brokers** to check the status of the Pulsar brokers. + +![DC/OS broker status](/assets/dcos_broker_status.png) + +![DC/OS broker running](/assets/dcos_broker_run.png) + +You can also click into each broker instance to get more detailed information, such as the broker running log. + +![DC/OS broker log](/assets/dcos_broker_log.png) + +Broker cluster information in Zookeeper is also available through the web UI. In this example, you can see that the `loadbalance` and `managed-ledgers` directories have been created. + +![DC/OS broker in zk](/assets/dcos_broker_in_zookeeper.png) + +## Monitor Group + +The **monitory** group consists of Prometheus and Grafana. + +![DC/OS monitor status](/assets/dcos_monitor_status.png) + +### Prometheus + +Click into the instance of `prom` to get the endpoint of Prometheus, which is `192.168.65.121:9090` in this example. + +![DC/OS prom endpoint](/assets/dcos_prom_endpoint.png) + +If you click that endpoint, you can see the Prometheus dashboard. The [http://192.168.65.121:9090/targets](http://192.168.65.121:9090/targets) URL display all the bookies and brokers. + +![DC/OS prom targets](/assets/dcos_prom_targets.png) + +### Grafana + +Click into `grafana` to get the endpoint for Grafana, which is `192.168.65.121:3000` in this example. + +![DC/OS grafana endpoint](/assets/dcos_grafana_endpoint.png) + +If you click that endpoint, you can access the Grafana dashboard. + +![DC/OS grafana targets](/assets/dcos_grafana_dashboard.png) + +## Run a simple Pulsar consumer and producer on DC/OS + +Now that you have a fully deployed Pulsar cluster, you can run a simple consumer and producer to show Pulsar on DC/OS in action. + +### Download and prepare the Pulsar Java tutorial + +You can clone a [Pulsar Java tutorial](https://github.com/streamlio/pulsar-java-tutorial) repo. This repo contains a simple Pulsar consumer and producer (you can find more information in the `README` file of the repo). + +```bash + +$ git clone https://github.com/streamlio/pulsar-java-tutorial + +``` + +Change the `SERVICE_URL` from `pulsar://localhost:6650` to `pulsar://a1.dcos:6650` in both [`ConsumerTutorial.java`](https://github.com/streamlio/pulsar-java-tutorial/blob/master/src/main/java/tutorial/ConsumerTutorial.java) and [`ProducerTutorial.java`](https://github.com/streamlio/pulsar-java-tutorial/blob/master/src/main/java/tutorial/ProducerTutorial.java). +The `pulsar://a1.dcos:6650` endpoint is for the broker service. You can fetch the endpoint details for each broker instance from the DC/OS GUI. `a1.dcos` is a DC/OS client agent, which runs a broker. The client agent IP address can also replace this. + +Now, change the message number from 10 to 10000000 in main method of [`ProducerTutorial.java`](https://github.com/streamlio/pulsar-java-tutorial/blob/master/src/main/java/tutorial/ProducerTutorial.java) so that it can produce more messages. + +Now compile the project code using the command below: + +```bash + +$ mvn clean package + +``` + +### Run the consumer and producer + +Execute this command to run the consumer: + +```bash + +$ mvn exec:java -Dexec.mainClass="tutorial.ConsumerTutorial" + +``` + +Execute this command to run the producer: + +```bash + +$ mvn exec:java -Dexec.mainClass="tutorial.ProducerTutorial" + +``` + +You can see the producer producing messages and the consumer consuming messages through the DC/OS GUI. + +![DC/OS pulsar producer](/assets/dcos_producer.png) + +![DC/OS pulsar consumer](/assets/dcos_consumer.png) + +### View Grafana metric output + +While the producer and consumer run, you can access running metrics information from Grafana. + +![DC/OS pulsar dashboard](/assets/dcos_metrics.png) + + +## Uninstall Pulsar + +You can shut down and uninstall the `pulsar` application from DC/OS at any time in the following two ways: + +1. Using the DC/OS GUI, you can choose **Delete** at the right end of Pulsar group. + + ![DC/OS pulsar uninstall](/assets/dcos_uninstall.png) + +2. You can use the following command: + + ```bash + + $ dcos marathon group remove /pulsar + + ``` + diff --git a/site2/website-next/versioned_docs/version-2.7.1/deploy-docker.md b/site2/website-next/versioned_docs/version-2.7.1/deploy-docker.md new file mode 100644 index 0000000000000..f76318f67daaf --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/deploy-docker.md @@ -0,0 +1,64 @@ +--- +id: deploy-docker +title: Deploy a cluster on Docker +sidebar_label: "Docker" +original_id: deploy-docker +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +To deploy a Pulsar cluster on Docker, complete the following steps: +1. Deploy a ZooKeeper cluster (optional) +2. Initialize cluster metadata +3. Deploy a BookKeeper cluster +4. Deploy one or more Pulsar brokers + +## Prepare + +To run Pulsar on Docker, you need to create a container for each Pulsar component: ZooKeeper, BookKeeper and broker. You can pull the images of ZooKeeper and BookKeeper separately on [Docker Hub](https://hub.docker.com/), and pull a [Pulsar image](https://hub.docker.com/r/apachepulsar/pulsar-all/tags) for the broker. You can also pull only one [Pulsar image](https://hub.docker.com/r/apachepulsar/pulsar-all/tags) and create three containers with this image. This tutorial takes the second option as an example. + +### Pull a Pulsar image +You can pull a Pulsar image from [Docker Hub](https://hub.docker.com/r/apachepulsar/pulsar-all/tags) with the following command. + +``` + +docker pull apachepulsar/pulsar-all:latest + +``` + +### Create three containers +Create containers for ZooKeeper, BookKeeper and broker. In this example, they are named as `zookeeper`, `bookkeeper` and `broker` respectively. You can name them as you want with the `--name` flag. By default, the container names are created randomly. + +``` + +docker run -it --name bookkeeper apachepulsar/pulsar-all:latest /bin/bash +docker run -it --name zookeeper apachepulsar/pulsar-all:latest /bin/bash +docker run -it --name broker apachepulsar/pulsar-all:latest /bin/bash + +``` + +### Create a network +To deploy a Pulsar cluster on Docker, you need to create a `network` and connect the containers of ZooKeeper, BookKeeper and broker to this network. The following command creates the network `pulsar`: + +``` + +docker network create pulsar + +``` + +### Connect containers to network +Connect the containers of ZooKeeper, BookKeeper and broker to the `pulsar` network with the following commands. + +``` + +docker network connect pulsar zookeeper +docker network connect pulsar bookkeeper +docker network connect pulsar broker + +``` + +To check whether the containers are successfully connected to the network, enter the `docker network inspect pulsar` command. + +For detailed information about how to deploy ZooKeeper cluster, BookKeeper cluster, brokers, see [deploy a cluster on bare metal](deploy-bare-metal). diff --git a/site2/website-next/versioned_docs/version-2.7.1/deploy-kubernetes.md b/site2/website-next/versioned_docs/version-2.7.1/deploy-kubernetes.md new file mode 100644 index 0000000000000..f8f450042c4b9 --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/deploy-kubernetes.md @@ -0,0 +1,15 @@ +--- +id: deploy-kubernetes +title: Deploy Pulsar on Kubernetes +sidebar_label: "Kubernetes" +original_id: deploy-kubernetes +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +To get up and running with these charts as fast as possible, in a **non-production** use case, we provide +a [quick start guide](getting-started-helm) for Proof of Concept (PoC) deployments. + +To configure and install a Pulsar cluster on Kubernetes for production usage, follow the complete [Installation Guide](helm-install). \ No newline at end of file diff --git a/site2/website-next/versioned_docs/version-2.7.1/deploy-monitoring.md b/site2/website-next/versioned_docs/version-2.7.1/deploy-monitoring.md new file mode 100644 index 0000000000000..6923caa805da8 --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/deploy-monitoring.md @@ -0,0 +1,125 @@ +--- +id: deploy-monitoring +title: Monitor +sidebar_label: "Monitor" +original_id: deploy-monitoring +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +You can use different ways to monitor a Pulsar cluster, exposing both metrics related to the usage of topics and the overall health of the individual components of the cluster. + +## Collect metrics + +You can collect broker stats, ZooKeeper stats, and BookKeeper stats. + +### Broker stats + +You can collect Pulsar broker metrics from brokers and export the metrics in JSON format. The Pulsar broker metrics mainly have two types: + +* *Destination dumps*, which contain stats for each individual topic. You can fetch the destination dumps using the command below: + + ```shell + + bin/pulsar-admin broker-stats destinations + + ``` + +* Broker metrics, which contain the broker information and topics stats aggregated at namespace level. You can fetch the broker metrics by using the following command: + + ```shell + + bin/pulsar-admin broker-stats monitoring-metrics + + ``` + +All the message rates are updated every minute. + +The aggregated broker metrics are also exposed in the [Prometheus](https://prometheus.io) format at: + +```shell + +http://$BROKER_ADDRESS:8080/metrics + +``` + +### ZooKeeper stats + +The local ZooKeeper, configuration store server and clients that are shipped with Pulsar can expose detailed stats through Prometheus. + +```shell + +http://$LOCAL_ZK_SERVER:8000/metrics +http://$GLOBAL_ZK_SERVER:8001/metrics + +``` + +The default port of local ZooKeeper is `8000` and the default port of configuration store is `8001`. You can change the default port of local ZooKeeper and configuration store by specifying system property `stats_server_port`. + +### BookKeeper stats + +You can configure the stats frameworks for BookKeeper by modifying the `statsProviderClass` in the `conf/bookkeeper.conf` file. + +The default BookKeeper configuration enables the Prometheus exporter. The configuration is included with Pulsar distribution. + +```shell + +http://$BOOKIE_ADDRESS:8000/metrics + +``` + +The default port for bookie is `8000`. You can change the port by configuring `prometheusStatsHttpPort` in the `conf/bookkeeper.conf` file. + +### Managed cursor acknowledgment state +The acknowledgment state is persistent to the ledger first. When the acknowledgment state fails to be persistent to the ledger, they are persistent to ZooKeeper. To track the stats of acknowledgement, you can configure the metrics for the managed cursor. + +``` + +brk_ml_cursor_persistLedgerSucceed(namespace=", ledger_name="", cursor_name:") +brk_ml_cursor_persistLedgerErrors(namespace="", ledger_name="", cursor_name:"") +brk_ml_cursor_persistZookeeperSucceed(namespace="", ledger_name="", cursor_name:"") +brk_ml_cursor_persistZookeeperErrors(namespace="", ledger_name="", cursor_name:"") + +``` + +Those metrics are added in the Prometheus interface, you can monitor and check the metrics stats in the Grafana. + +## Configure Prometheus + +You can use Prometheus to collect all the metrics exposed for Pulsar components and set up [Grafana](https://grafana.com/) dashboards to display the metrics and monitor your Pulsar cluster. For details, refer to [Prometheus guide](https://prometheus.io/docs/introduction/getting_started/). + +When you run Pulsar on bare metal, you can provide the list of nodes to be probed. When you deploy Pulsar in a Kubernetes cluster, the monitoring is setup automatically. For details, refer to [Kubernetes instructions](helm-deploy). + +## Dashboards + +When you collect time series statistics, the major problem is to make sure the number of dimensions attached to the data does not explode. Thus you only need to collect time series of metrics aggregated at the namespace level. + +### Pulsar per-topic dashboard + +The per-topic dashboard instructions are available at [Pulsar manager](administration-pulsar-manager). + +### Grafana + +You can use grafana to create dashboard driven by the data that is stored in Prometheus. + +When you deploy Pulsar on Kubernetes, a `pulsar-grafana` Docker image is enabled by default. You can use the docker image with the principal dashboards. + +Enter the command below to use the dashboard manually: + +```shell + +docker run -p3000:3000 \ + -e PROMETHEUS_URL=http://$PROMETHEUS_HOST:9090/ \ + apachepulsar/pulsar-grafana:latest + +``` + +The following are some Grafana dashboards examples: + +- [pulsar-grafana](http://pulsar.apache.org/docs/en/deploy-monitoring/#grafana): a Grafana dashboard that displays metrics collected in Prometheus for Pulsar clusters running on Kubernetes. +- [apache-pulsar-grafana-dashboard](https://github.com/streamnative/apache-pulsar-grafana-dashboard): a collection of Grafana dashboard templates for different Pulsar components running on both Kubernetes and on-premise machines. + + ## Alerting rules + You can set alerting rules according to your Pulsar environment. To configure alerting rules for Apache Pulsar, refer to [alerting rules](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/). diff --git a/site2/website-next/versioned_sidebars/version-2.7.1-sidebars.json b/site2/website-next/versioned_sidebars/version-2.7.1-sidebars.json index 2900041919a67..5244cdccc2e66 100644 --- a/site2/website-next/versioned_sidebars/version-2.7.1-sidebars.json +++ b/site2/website-next/versioned_sidebars/version-2.7.1-sidebars.json @@ -261,6 +261,40 @@ "id": "version-2.7.1/helm-tools" } ] + }, + { + "type": "category", + "label": "Deployment", + "items": [ + { + "type": "doc", + "id": "version-2.7.1/deploy-aws" + }, + { + "type": "doc", + "id": "version-2.7.1/deploy-kubernetes" + }, + { + "type": "doc", + "id": "version-2.7.1/deploy-bare-metal" + }, + { + "type": "doc", + "id": "version-2.7.1/deploy-bare-metal-multi-cluster" + }, + { + "type": "doc", + "id": "version-2.7.1/deploy-dcos" + }, + { + "type": "doc", + "id": "version-2.7.1/deploy-docker" + }, + { + "type": "doc", + "id": "version-2.7.1/deploy-monitoring" + } + ] } ] } \ No newline at end of file From 21d8bbb1fe8c9f645c6aabb14a4c0ac42a3da378 Mon Sep 17 00:00:00 2001 From: LiLi Date: Thu, 4 Nov 2021 09:09:23 +0800 Subject: [PATCH 2/3] [website][upgrade]feat: docs migration - 2.7.1 / administration Signed-off-by: LiLi --- .../version-2.7.1/administration-geo.md | 218 ++++++++++ .../version-2.7.1/administration-isolation.md | 129 ++++++ .../administration-load-balance.md | 204 ++++++++++ .../version-2.7.1/administration-proxy.md | 90 +++++ .../administration-pulsar-manager.md | 209 ++++++++++ .../version-2.7.1/administration-stats.md | 68 ++++ .../version-2.7.1/administration-upgrade.md | 172 ++++++++ .../version-2.7.1/administration-zk-bk.md | 381 ++++++++++++++++++ .../version-2.7.1-sidebars.json | 38 ++ 9 files changed, 1509 insertions(+) create mode 100644 site2/website-next/versioned_docs/version-2.7.1/administration-geo.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/administration-isolation.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/administration-load-balance.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/administration-proxy.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/administration-pulsar-manager.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/administration-stats.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/administration-upgrade.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/administration-zk-bk.md diff --git a/site2/website-next/versioned_docs/version-2.7.1/administration-geo.md b/site2/website-next/versioned_docs/version-2.7.1/administration-geo.md new file mode 100644 index 0000000000000..37b52e27d1532 --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/administration-geo.md @@ -0,0 +1,218 @@ +--- +id: administration-geo +title: Pulsar geo-replication +sidebar_label: "Geo-replication" +original_id: administration-geo +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +*Geo-replication* is the replication of persistently stored message data across multiple clusters of a Pulsar instance. + +## How geo-replication works + +The diagram below illustrates the process of geo-replication across Pulsar clusters: + +![Replication Diagram](/assets/geo-replication.png) + +In this diagram, whenever **P1**, **P2**, and **P3** producers publish messages to the **T1** topic on **Cluster-A**, **Cluster-B**, and **Cluster-C** clusters respectively, those messages are instantly replicated across clusters. Once the messages are replicated, **C1** and **C2** consumers can consume those messages from their respective clusters. + +Without geo-replication, **C1** and **C2** consumers are not able to consume messages that **P3** producer publishes. + +## Geo-replication and Pulsar properties + +You must enable geo-replication on a per-tenant basis in Pulsar. You can enable geo-replication between clusters only when a tenant is created that allows access to both clusters. + +Although geo-replication must be enabled between two clusters, actually geo-replication is managed at the namespace level. You must complete the following tasks to enable geo-replication for a namespace: + +* [Enable geo-replication namespaces](#enable-geo-replication-namespaces) +* Configure that namespace to replicate across two or more provisioned clusters + +Any message published on *any* topic in that namespace is replicated to all clusters in the specified set. + +## Local persistence and forwarding + +When messages are produced on a Pulsar topic, messages are first persisted in the local cluster, and then forwarded asynchronously to the remote clusters. + +In normal cases, when connectivity issues are none, messages are replicated immediately, at the same time as they are dispatched to local consumers. Typically, the network [round-trip time](https://en.wikipedia.org/wiki/Round-trip_delay_time) (RTT) between the remote regions defines end-to-end delivery latency. + +Applications can create producers and consumers in any of the clusters, even when the remote clusters are not reachable (like during a network partition). + +Producers and consumers can publish messages to and consume messages from any cluster in a Pulsar instance. However, subscriptions cannot only be local to the cluster where the subscriptions are created but also can be transferred between clusters after replicated subscription is enabled. Once replicated subscription is enabled, you can keep subscription state in synchronization. Therefore, a topic can be asynchronously replicated across multiple geographical regions. In case of failover, a consumer can restart consuming messages from the failure point in a different cluster. + +In the aforementioned example, the **T1** topic is replicated among three clusters, **Cluster-A**, **Cluster-B**, and **Cluster-C**. + +All messages produced in any of the three clusters are delivered to all subscriptions in other clusters. In this case, **C1** and **C2** consumers receive all messages that **P1**, **P2**, and **P3** producers publish. Ordering is still guaranteed on a per-producer basis. + +## Configure replication + +As stated in [Geo-replication and Pulsar properties](#geo-replication-and-pulsar-properties) section, geo-replication in Pulsar is managed at the [tenant](reference-terminology.md#tenant) level. + +The following example connects three clusters: **us-east**, **us-west**, and **us-cent**. + +### Connect replication clusters + +To replicate data among clusters, you need to configure each cluster to connect to the other. You can use the [`pulsar-admin`](https://pulsar.apache.org/tools/pulsar-admin/) tool to create a connection. + +**Example** + +Suppose that you have 3 replication clusters: `us-west`, `us-cent`, and `us-east`. + +1. Configure the connection from `us-west` to `us-east`. + + Run the following command on `us-west`. + +```shell + +$ bin/pulsar-admin clusters create \ + --broker-url pulsar://: \ + --url http://: \ + us-east + +``` + +:::tip + +- If you want to use a secure connection for a cluster, you can use the flags `--broker-url-secure` and `--url-secure`. For more information, see [pulsar-admin clusters create](https://pulsar.apache.org/tools/pulsar-admin/). +- Different clusters may have different authentications. You can use the authentication flag `--auth-plugin` and `--auth-parameters` together to set cluster authentication, which overrides `brokerClientAuthenticationPlugin` and `brokerClientAuthenticationParameters` if `authenticationEnabled` sets to `true` in `broker.conf` and `standalone.conf`. For more information, see [authentication and authorization](concepts-authentication). + +::: + +2. Configure the connection from `us-west` to `us-cent`. + + Run the following command on `us-west`. + +```shell + +$ bin/pulsar-admin clusters create \ + --broker-url pulsar://: \ + --url http://: \ + us-cent + +``` + +3. Run similar commands on `us-east` and `us-cent` to create connections among clusters. + +### Grant permissions to properties + +To replicate to a cluster, the tenant needs permission to use that cluster. You can grant permission to the tenant when you create the tenant or grant later. + +Specify all the intended clusters when you create a tenant: + +```shell + +$ bin/pulsar-admin tenants create my-tenant \ + --admin-roles my-admin-role \ + --allowed-clusters us-west,us-east,us-cent + +``` + +To update permissions of an existing tenant, use `update` instead of `create`. + +### Enable geo-replication namespaces + +You can create a namespace with the following command sample. + +```shell + +$ bin/pulsar-admin namespaces create my-tenant/my-namespace + +``` + +Initially, the namespace is not assigned to any cluster. You can assign the namespace to clusters using the `set-clusters` subcommand: + +```shell + +$ bin/pulsar-admin namespaces set-clusters my-tenant/my-namespace \ + --clusters us-west,us-east,us-cent + +``` + +You can change the replication clusters for a namespace at any time, without disruption to ongoing traffic. Replication channels are immediately set up or stopped in all clusters as soon as the configuration changes. + +### Use topics with geo-replication + +Once you create a geo-replication namespace, any topics that producers or consumers create within that namespace is replicated across clusters. Typically, each application uses the `serviceUrl` for the local cluster. + +#### Selective replication + +By default, messages are replicated to all clusters configured for the namespace. You can restrict replication selectively by specifying a replication list for a message, and then that message is replicated only to the subset in the replication list. + +The following is an example for the [Java API](client-libraries-java). Note the use of the `setReplicationClusters` method when you construct the {@inject: javadoc:Message:/client/org/apache/pulsar/client/api/Message} object: + +```java + +List restrictReplicationTo = Arrays.asList( + "us-west", + "us-east" +); + +Producer producer = client.newProducer() + .topic("some-topic") + .create(); + +producer.newMessage() + .value("my-payload".getBytes()) + .setReplicationClusters(restrictReplicationTo) + .send(); + +``` + +#### Topic stats + +Topic-specific statistics for geo-replication topics are available via the [`pulsar-admin`](reference-pulsar-admin) tool and {@inject: rest:REST:/} API: + +```shell + +$ bin/pulsar-admin persistent stats persistent://my-tenant/my-namespace/my-topic + +``` + +Each cluster reports its own local stats, including the incoming and outgoing replication rates and backlogs. + +#### Delete a geo-replication topic + +Given that geo-replication topics exist in multiple regions, directly deleting a geo-replication topic is not possible. Instead, you should rely on automatic topic garbage collection. + +In Pulsar, a topic is automatically deleted when the topic meets the following three conditions: +- no producers or consumers are connected to it; +- no subscriptions to it; +- no more messages are kept for retention. +For geo-replication topics, each region uses a fault-tolerant mechanism to decide when deleting the topic locally is safe. + +You can explicitly disable topic garbage collection by setting `brokerDeleteInactiveTopicsEnabled` to `false` in your [broker configuration](reference-configuration.md#broker). + +To delete a geo-replication topic, close all producers and consumers on the topic, and delete all of its local subscriptions in every replication cluster. When Pulsar determines that no valid subscription for the topic remains across the system, it will garbage collect the topic. + +## Replicated subscriptions + +Pulsar supports replicated subscriptions, so you can keep subscription state in sync, within a sub-second timeframe, in the context of a topic that is being asynchronously replicated across multiple geographical regions. + +In case of failover, a consumer can restart consuming from the failure point in a different cluster. + +### Enable replicated subscription + +Replicated subscription is disabled by default. You can enable replicated subscription when creating a consumer. + +```java + +Consumer consumer = client.newConsumer(Schema.STRING) + .topic("my-topic") + .subscriptionName("my-subscription") + .replicateSubscriptionState(true) + .subscribe(); + +``` + +### Advantages + + * It is easy to implement the logic. + * You can choose to enable or disable replicated subscription. + * When you enable it, the overhead is low, and it is easy to configure. + * When you disable it, the overhead is zero. + +### Limitations + +When you enable replicated subscription, you're creating a consistent distributed snapshot to establish an association between message ids from different clusters. The snapshots are taken periodically. The default value is `1 second`. It means that a consumer failing over to a different cluster can potentially receive 1 second of duplicates. You can also configure the frequency of the snapshot in the `broker.conf` file. diff --git a/site2/website-next/versioned_docs/version-2.7.1/administration-isolation.md b/site2/website-next/versioned_docs/version-2.7.1/administration-isolation.md new file mode 100644 index 0000000000000..f57844b1ee22a --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/administration-isolation.md @@ -0,0 +1,129 @@ +--- +id: administration-isolation +title: Pulsar isolation +sidebar_label: "Pulsar isolation" +original_id: administration-isolation +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +In an organization, a Pulsar instance provides services to multiple teams. When organizing the resources across multiple teams, you want to make a suitable isolation plan to avoid the resource competition between different teams and applications and provide high-quality messaging service. In this case, you need to take resource isolation into consideration and weigh your intended actions against expected and unexpected consequences. + +To enforce resource isolation, you can use the Pulsar isolation policy, which allows you to allocate resources (**broker** and **bookie**) for the namespace. + +## Broker isolation + +In Pulsar, when namespaces (more specifically, namespace bundles) are assigned dynamically to brokers, the namespace isolation policy limits the set of brokers that can be used for assignment. Before topics are assigned to brokers, you can set the namespace isolation policy with a primary or a secondary regex to select desired brokers. + +You can set a namespace isolation policy for a cluster using one of the following methods. + + + + + +``` + +pulsar-admin ns-isolation-policy set options + +``` + +For more information about the command `pulsar-admin ns-isolation-policy set options`, see [here](https://pulsar.apache.org/tools/pulsar-admin/). + +**Example** + +```shell + +bin/pulsar-admin ns-isolation-policy set \ +--auto-failover-policy-type min_available \ +--auto-failover-policy-params min_limit=1,usage_threshold=80 \ +--namespaces my-tenant/my-namespace \ +--primary 10.193.216.* my-cluster policy-name + +``` + + + + +[PUT /admin/v2/namespaces/{tenant}/{namespace}](https://pulsar.apache.org/admin-rest-api/?version=2.7.0&apiversion=v2#operation/createNamespace) + + + + +For how to set namespace isolation policy using Java admin API, see [here](https://github.com/apache/pulsar/blob/master/pulsar-client-admin/src/main/java/org/apache/pulsar/client/admin/internal/NamespacesImpl.java#L251). + + + + + +## Bookie isolation + +A namespace can be isolated into user-defined groups of bookies, which guarantees all the data that belongs to the namespace is stored in desired bookies. The bookie affinity group uses the BookKeeper [rack-aware placement policy](https://bookkeeper.apache.org/docs/latest/api/javadoc/org/apache/bookkeeper/client/EnsemblePlacementPolicy.html) and it is a way to feed rack information which is stored as JSON format in znode. + +You can set a bookie affinity group using one of the following methods. + + + + + +``` + +pulsar-admin namespaces set-bookie-affinity-group options + +``` + +For more information about the command `pulsar-admin namespaces set-bookie-affinity-group options`, see [here](https://pulsar.apache.org/tools/pulsar-admin/). + +**Example** + +```shell + +bin/pulsar-admin namespaces set-bookie-affinity-group public/default \ +--primary-group group-bookie1 + +``` + + + + +[POST /admin/v2/namespaces/{tenant}/{namespace}/persistence/bookieAffinity](https://pulsar.apache.org/admin-rest-api/?version=2.7.0&apiversion=v2#operation/setBookieAffinityGroup) + + + + +For how to set bookie affinity group for a namespace using Java admin API, see [here](https://github.com/apache/pulsar/blob/master/pulsar-client-admin/src/main/java/org/apache/pulsar/client/admin/internal/NamespacesImpl.java#L1164). + + + + diff --git a/site2/website-next/versioned_docs/version-2.7.1/administration-load-balance.md b/site2/website-next/versioned_docs/version-2.7.1/administration-load-balance.md new file mode 100644 index 0000000000000..a14142bcac7de --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/administration-load-balance.md @@ -0,0 +1,204 @@ +--- +id: administration-load-balance +title: Pulsar load balance +sidebar_label: "Load balance" +original_id: administration-load-balance +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +## Load balance across Pulsar brokers + +Pulsar is an horizontally scalable messaging system, so the traffic +in a logical cluster must be spread across all the available Pulsar brokers as evenly as possible, which is a core requirement. + +You can use multiple settings and tools to control the traffic distribution which require a bit of context to understand how the traffic is managed in Pulsar. Though, in most cases, the core requirement mentioned above is true out of the box and you should not worry about it. + +## Pulsar load manager architecture + +The following part introduces the basic architecture of the Pulsar load manager. + +### Assign topics to brokers dynamically + +Topics are dynamically assigned to brokers based on the load conditions of all brokers in the cluster. + +When a client starts using new topics that are not assigned to any broker, a process is triggered to choose the best suited broker to acquire ownership of these topics according to the load conditions. + +In case of partitioned topics, different partitions are assigned to different brokers. Here "topic" means either a non-partitioned topic or one partition of a topic. + +The assignment is "dynamic" because the assignment changes quickly. For example, if the broker owning the topic crashes, the topic is reassigned immediately to another broker. Another scenario is that the broker owning the topic becomes overloaded. In this case, the topic is reassigned to a less loaded broker. + +The stateless nature of brokers makes the dynamic assignment possible, so you can quickly expand or shrink the cluster based on usage. + +#### Assignment granularity + +The assignment of topics or partitions to brokers is not done at the topics or partitions level, but done at the Bundle level (a higher level). The reason is to amortize the amount of information that you need to keep track. Based on CPU, memory, traffic load and other indexes, topics are assigned to a particular broker dynamically. + +Instead of individual topic or partition assignment, each broker takes ownership of a subset of the topics for a namespace. This subset is called a "*bundle*" and effectively this subset is a sharding mechanism. + +The namespace is the "administrative" unit: many config knobs or operations are done at the namespace level. + +For assignment, a namespaces is sharded into a list of "bundles", with each bundle comprising +a portion of overall hash range of the namespace. + +Topics are assigned to a particular bundle by taking the hash of the topic name and checking in which +bundle the hash falls into. + +Each bundle is independent of the others and thus is independently assigned to different brokers. + +### Create namespaces and bundles + +When you create a new namespace, the new namespace sets to use the default number of bundles. You can set this in `conf/broker.conf`: + +```properties + +# When a namespace is created without specifying the number of bundle, this +# value will be used as the default +defaultNumberOfNamespaceBundles=4 + +``` + +You can either change the system default, or override it when you create a new namespace: + +```shell + +$ bin/pulsar-admin namespaces create my-tenant/my-namespace --clusters us-west --bundles 16 + +``` + +With this command, you create a namespace with 16 initial bundles. Therefore the topics for this namespaces can immediately be spread across up to 16 brokers. + +In general, if you know the expected traffic and number of topics in advance, you had better start with a reasonable number of bundles instead of waiting for the system to auto-correct the distribution. + +On the same note, it is beneficial to start with more bundles than the number of brokers, because of the hashing nature of the distribution of topics into bundles. For example, for a namespace with 1000 topics, using something like 64 bundles achieves a good distribution of traffic across 16 brokers. + +### Unload topics and bundles + +You can "unload" a topic in Pulsar with admin operation. Unloading means to close the topics, +release ownership and reassign the topics to a new broker, based on current load. + +When unloading happens, the client experiences a small latency blip, typically in the order of tens of milliseconds, while the topic is reassigned. + +Unloading is the mechanism that the load-manager uses to perform the load shedding, but you can also trigger the unloading manually, for example to correct the assignments and redistribute traffic even before having any broker overloaded. + +Unloading a topic has no effect on the assignment, but just closes and reopens the particular topic: + +```shell + +pulsar-admin topics unload persistent://tenant/namespace/topic + +``` + +To unload all topics for a namespace and trigger reassignments: + +```shell + +pulsar-admin namespaces unload tenant/namespace + +``` + +### Split namespace bundles + +Since the load for the topics in a bundle might change over time, or predicting upfront might just be hard, brokers can split bundles into two. The new smaller bundles can be reassigned to different brokers. + +The splitting happens based on some tunable thresholds. Any existing bundle that exceeds any of the threshold is a candidate to be split. By default the newly split bundles are also immediately offloaded to other brokers, to facilitate the traffic distribution. + +```properties + +# enable/disable namespace bundle auto split +loadBalancerAutoBundleSplitEnabled=true + +# enable/disable automatic unloading of split bundles +loadBalancerAutoUnloadSplitBundlesEnabled=true + +# maximum topics in a bundle, otherwise bundle split will be triggered +loadBalancerNamespaceBundleMaxTopics=1000 + +# maximum sessions (producers + consumers) in a bundle, otherwise bundle split will be triggered +loadBalancerNamespaceBundleMaxSessions=1000 + +# maximum msgRate (in + out) in a bundle, otherwise bundle split will be triggered +loadBalancerNamespaceBundleMaxMsgRate=30000 + +# maximum bandwidth (in + out) in a bundle, otherwise bundle split will be triggered +loadBalancerNamespaceBundleMaxBandwidthMbytes=100 + +# maximum number of bundles in a namespace (for auto-split) +loadBalancerNamespaceMaximumBundles=128 + +``` + +### Shed load automatically + +The support for automatic load shedding is available in the load manager of Pulsar. This means that whenever the system recognizes a particular broker is overloaded, the system forces some traffic to be reassigned to less loaded brokers. + +When a broker is identified as overloaded, the broker forces to "unload" a subset of the bundles, the +ones with higher traffic, that make up for the overload percentage. + +For example, the default threshold is 85% and if a broker is over quota at 95% CPU usage, then the broker unloads the percent difference plus a 5% margin: `(95% - 85%) + 5% = 15%`. + +Given the selection of bundles to offload is based on traffic (as a proxy measure for cpu, network +and memory), broker unloads bundles for at least 15% of traffic. + +The automatic load shedding is enabled by default and you can disable the automatic load shedding with this setting: + +```properties + +# Enable/disable automatic bundle unloading for load-shedding +loadBalancerSheddingEnabled=true + +``` + +Additional settings that apply to shedding: + +```properties + +# Load shedding interval. Broker periodically checks whether some traffic should be offload from +# some over-loaded broker to other under-loaded brokers +loadBalancerSheddingIntervalMinutes=1 + +# Prevent the same topics to be shed and moved to other brokers more that once within this timeframe +loadBalancerSheddingGracePeriodMinutes=30 + +``` + +#### Broker overload thresholds + +The determinations of when a broker is overloaded is based on threshold of CPU, network and memory usage. Whenever either of those metrics reaches the threshold, the system triggers the shedding (if enabled). + +By default, overload threshold is set at 85%: + +```properties + +# Usage threshold to determine a broker as over-loaded +loadBalancerBrokerOverloadedThresholdPercentage=85 + +``` + +Pulsar gathers the usage stats from the system metrics. + +In case of network utilization, in some cases the network interface speed that Linux reports is +not correct and needs to be manually overridden. This is the case in AWS EC2 instances with 1Gbps +NIC speed for which the OS reports 10Gbps speed. + +Because of the incorrect max speed, the Pulsar load manager might think the broker has not reached the NIC capacity, while in fact the broker already uses all the bandwidth and the traffic is slowed down. + +You can use the following setting to correct the max NIC speed: + +```properties + +# Override the auto-detection of the network interfaces max speed. +# This option is useful in some environments (eg: EC2 VMs) where the max speed +# reported by Linux is not reflecting the real bandwidth available to the broker. +# Since the network usage is employed by the load manager to decide when a broker +# is overloaded, it is important to make sure the info is correct or override it +# with the right value here. The configured value can be a double (eg: 0.8) and that +# can be used to trigger load-shedding even before hitting on NIC limits. +loadBalancerOverrideBrokerNicSpeedGbps= + +``` + +When the value is empty, Pulsar uses the value that the OS reports. + diff --git a/site2/website-next/versioned_docs/version-2.7.1/administration-proxy.md b/site2/website-next/versioned_docs/version-2.7.1/administration-proxy.md new file mode 100644 index 0000000000000..821aa4d905a02 --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/administration-proxy.md @@ -0,0 +1,90 @@ +--- +id: administration-proxy +title: Pulsar proxy +sidebar_label: "Pulsar proxy" +original_id: administration-proxy +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +Pulsar proxy is an optional gateway. Pulsar proxy is used when direction connections between clients and Pulsar brokers are either infeasible or undesirable. For example, when you run Pulsar in a cloud environment or on [Kubernetes](https://kubernetes.io) or an analogous platform, you can run Pulsar proxy. + +## Configure the proxy + +Before using the proxy, you need to configure it with the brokers addresses in the cluster. You can configure the proxy to connect directly to service discovery, or specify a broker URL in the configuration. + +### Use service discovery + +Pulsar uses [ZooKeeper](https://zookeeper.apache.org) for service discovery. To connect the proxy to ZooKeeper, specify the following in `conf/proxy.conf`. + +```properties + +zookeeperServers=zk-0,zk-1,zk-2 +configurationStoreServers=zk-0:2184,zk-remote:2184 + +``` + +> To use service discovery, you need to open the network ACLs, so the proxy can connects to the ZooKeeper nodes through the ZooKeeper client port (port `2181`) and the configuration store client port (port `2184`). + +> However, it is not secure to use service discovery. Because if the network ACL is open, when someone compromises a proxy, they have full access to ZooKeeper. + +### Use broker URLs + +It is more secure to specify a URL to connect to the brokers. + +Proxy authorization requires access to ZooKeeper, so if you use these broker URLs to connect to the brokers, you need to disable authorization at the Proxy level. Brokers still authorize requests after the proxy forwards them. + +You can configure the broker URLs in `conf/proxy.conf` as follows. + +```properties + +brokerServiceURL=pulsar://brokers.example.com:6650 +brokerWebServiceURL=http://brokers.example.com:8080 +functionWorkerWebServiceURL=http://function-workers.example.com:8080 + +``` + +If you use TLS, configure the broker URLs in the following way: + +```properties + +brokerServiceURLTLS=pulsar+ssl://brokers.example.com:6651 +brokerWebServiceURLTLS=https://brokers.example.com:8443 +functionWorkerWebServiceURL=https://function-workers.example.com:8443 + +``` + +The hostname in the URLs provided should be a DNS entry which points to multiple brokers or a virtual IP address, which is backed by multiple broker IP addresses, so that the proxy does not lose connectivity to Pulsar cluster if a single broker becomes unavailable. + +The ports to connect to the brokers (6650 and 8080, or in the case of TLS, 6651 and 8443) should be open in the network ACLs. + +Note that if you do not use functions, you do not need to configure `functionWorkerWebServiceURL`. + +## Start the proxy + +To start the proxy: + +```bash + +$ cd /path/to/pulsar/directory +$ bin/pulsar proxy + +``` + +> You can run multiple instances of the Pulsar proxy in a cluster. + +## Stop the proxy + +Pulsar proxy runs in the foreground by default. To stop the proxy, simply stop the process in which the proxy is running. + +## Proxy frontends + +You can run Pulsar proxy behind some kind of load-distributing frontend, such as an [HAProxy](https://www.digitalocean.com/community/tutorials/an-introduction-to-haproxy-and-load-balancing-concepts) load balancer. + +## Use Pulsar clients with the proxy + +Once your Pulsar proxy is up and running, preferably behind a load-distributing [frontend](#proxy-frontends), clients can connect to the proxy via whichever address that the frontend uses. If the address is the DNS address `pulsar.cluster.default`, for example, the connection URL for clients is `pulsar://pulsar.cluster.default:6650`. + +For more information on Proxy configuration, refer to [Pulsar proxy](reference-configuration.md#pulsar-proxy). diff --git a/site2/website-next/versioned_docs/version-2.7.1/administration-pulsar-manager.md b/site2/website-next/versioned_docs/version-2.7.1/administration-pulsar-manager.md new file mode 100644 index 0000000000000..6c8945e9e9783 --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/administration-pulsar-manager.md @@ -0,0 +1,209 @@ +--- +id: administration-pulsar-manager +title: Pulsar Manager +sidebar_label: "Pulsar Manager" +original_id: administration-pulsar-manager +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +Pulsar Manager is a web-based GUI management and monitoring tool that helps administrators and users manage and monitor tenants, namespaces, topics, subscriptions, brokers, clusters, and so on, and supports dynamic configuration of multiple environments. + +:::note + +If you monitor your current stats with [Pulsar dashboard](administration-dashboard), you can try to use Pulsar Manager instead. Pulsar dashboard is deprecated. + +::: + +## Install + +The easiest way to use the Pulsar Manager is to run it inside a [Docker](https://www.docker.com/products/docker) container. + +```shell + +docker pull apachepulsar/pulsar-manager:v0.2.0 +docker run -it \ + -p 9527:9527 -p 7750:7750 \ + -e SPRING_CONFIGURATION_FILE=/pulsar-manager/pulsar-manager/application.properties \ + apachepulsar/pulsar-manager:v0.2.0 + +``` + +* `SPRING_CONFIGURATION_FILE`: Default configuration file for spring. + +### Set administrator account and password + + ```shell + + CSRF_TOKEN=$(curl http://localhost:7750/pulsar-manager/csrf-token) + curl \ + -H 'X-XSRF-TOKEN: $CSRF_TOKEN' \ + -H 'Cookie: XSRF-TOKEN=$CSRF_TOKEN;' \ + -H "Content-Type: application/json" \ + -X PUT http://localhost:7750/pulsar-manager/users/superuser \ + -d '{"name": "admin", "password": "apachepulsar", "description": "test", "email": "username@test.org"}' + + ``` + +You can find the docker image in the [Docker Hub](https://github.com/apache/pulsar-manager/tree/master/docker) directory and build an image from the source code as well: + +``` + +git clone https://github.com/apache/pulsar-manager +cd pulsar-manager/front-end +npm install --save +npm run build:prod +cd .. +./gradlew build -x test +cd .. +docker build -f docker/Dockerfile --build-arg BUILD_DATE=`date -u +"%Y-%m-%dT%H:%M:%SZ"` --build-arg VCS_REF=`latest` --build-arg VERSION=`latest` -t apachepulsar/pulsar-manager . + +``` + +### Use custom databases + +If you have a large amount of data, you can use a custom database. The following is an example of PostgreSQL. + +1. Initialize database and table structures using the [file](https://github.com/apache/pulsar-manager/tree/master/src/main/resources/META-INF/sql/postgresql-schema.sql). + +2. Modify the [configuration file](https://github.com/apache/pulsar-manager/blob/master/src/main/resources/application.properties) and add PostgreSQL configuration. + +``` + +spring.datasource.driver-class-name=org.postgresql.Driver +spring.datasource.url=jdbc:postgresql://127.0.0.1:5432/pulsar_manager +spring.datasource.username=postgres +spring.datasource.password=postgres + +``` + +3. Compile to generate a new executable jar package. + +``` + +./gradlew build -x test + +``` + +### Enable JWT authentication + +If you want to turn on JWT authentication, configure the following parameters: + +* `backend.jwt.token`: token for the superuser. You need to configure this parameter during cluster initialization. +* `jwt.broker.token.mode`: multiple modes of generating token, including PUBLIC, PRIVATE, and SECRET. +* `jwt.broker.public.key`: configure this option if you use the PUBLIC mode. +* `jwt.broker.private.key`: configure this option if you use the PRIVATE mode. +* `jwt.broker.secret.key`: configure this option if you use the SECRET mode. + +For more information, see [Token Authentication Admin of Pulsar](http://pulsar.apache.org/docs/en/security-token-admin/). + + +If you want to enable JWT authentication, use one of the following methods. + + +* Method 1: use command-line tool + +``` + +wget https://dist.apache.org/repos/dist/release/pulsar/pulsar-manager/apache-pulsar-manager-0.2.0/apache-pulsar-manager-0.2.0-bin.tar.gz +tar -zxvf apache-pulsar-manager-0.2.0-bin.tar.gz +cd pulsar-manager +tar -zxvf pulsar-manager.tar +cd pulsar-manager +cp -r ../dist ui +./bin/pulsar-manager --redirect.host=http://localhost --redirect.port=9527 insert.stats.interval=600000 --backend.jwt.token=token --jwt.broker.token.mode=PRIVATE --jwt.broker.private.key=file:///path/broker-private.key --jwt.broker.public.key=file:///path/broker-public.key + +``` + +Firstly, [set the administrator account and password](#set-administrator-account-and-password) + +Secondly, log in to Pulsar manager through http://localhost:7750/ui/index.html. + +* Method 2: configure the application.properties file + +``` + +backend.jwt.token=token + +jwt.broker.token.mode=PRIVATE +jwt.broker.public.key=file:///path/broker-public.key +jwt.broker.private.key=file:///path/broker-private.key + +or +jwt.broker.token.mode=SECRET +jwt.broker.secret.key=file:///path/broker-secret.key + +``` + +* Method 3: use Docker and enable token authentication. + +``` + +export JWT_TOKEN="your-token" +docker run -it -p 9527:9527 -p 7750:7750 -e REDIRECT_HOST=http://localhost -e REDIRECT_PORT=9527 -e DRIVER_CLASS_NAME=org.postgresql.Driver -e URL='jdbc:postgresql://127.0.0.1:5432/pulsar_manager' -e USERNAME=pulsar -e PASSWORD=pulsar -e LOG_LEVEL=DEBUG -e JWT_TOKEN=$JWT_TOKEN -v $PWD:/data apachepulsar/pulsar-manager:v0.2.0 /bin/sh + +``` + +* `JWT_TOKEN`: the token of superuser configured for the broker. It is generated by the `bin/pulsar tokens create --secret-key` or `bin/pulsar tokens create --private-key` command. +* `REDIRECT_HOST`: the IP address of the front-end server. +* `REDIRECT_PORT`: the port of the front-end server. +* `DRIVER_CLASS_NAME`: the driver class name of the PostgreSQL database. +* `URL`: the JDBC URL of your PostgreSQL database, such as jdbc:postgresql://127.0.0.1:5432/pulsar_manager. The docker image automatically start a local instance of the PostgreSQL database. +* `USERNAME`: the username of PostgreSQL. +* `PASSWORD`: the password of PostgreSQL. +* `LOG_LEVEL`: the level of log. + +* Method 4: use Docker and turn on **token authentication** and **token management** by private key and public key. + +``` + +export JWT_TOKEN="your-token" +export PRIVATE_KEY="file:///pulsar-manager/secret/my-private.key" +export PUBLIC_KEY="file:///pulsar-manager/secret/my-public.key" +docker run -it -p 9527:9527 -p 7750:7750 -e REDIRECT_HOST=http://localhost -e REDIRECT_PORT=9527 -e DRIVER_CLASS_NAME=org.postgresql.Driver -e URL='jdbc:postgresql://127.0.0.1:5432/pulsar_manager' -e USERNAME=pulsar -e PASSWORD=pulsar -e LOG_LEVEL=DEBUG -e JWT_TOKEN=$JWT_TOKEN -e PRIVATE_KEY=$PRIVATE_KEY -e PUBLIC_KEY=$PUBLIC_KEY -v $PWD:/data -v $PWD/secret:/pulsar-manager/secret apachepulsar/pulsar-manager:v0.2.0 /bin/sh + +``` + +* `JWT_TOKEN`: the token of superuser configured for the broker. It is generated by the `bin/pulsar tokens create --private-key` command. +* `PRIVATE_KEY`: private key path mounted in container, generated by `bin/pulsar tokens create-key-pair` command. +* `PUBLIC_KEY`: public key path mounted in container, generated by `bin/pulsar tokens create-key-pair` command. +* `$PWD/secret`: the folder where the private key and public key generated by the `bin/pulsar tokens create-key-pair` command are placed locally +* `REDIRECT_HOST`: the IP address of the front-end server. +* `REDIRECT_PORT`: the port of the front-end server. +* `DRIVER_CLASS_NAME`: the driver class name of the PostgreSQL database. +* `URL`: the JDBC URL of your PostgreSQL database, such as jdbc:postgresql://127.0.0.1:5432/pulsar_manager. The docker image automatically start a local instance of the PostgreSQL database. +* `USERNAME`: the username of PostgreSQL. +* `PASSWORD`: the password of PostgreSQL. +* `LOG_LEVEL`: the level of log. + +* Method 5: use Docker and turn on **token authentication** and **token management** by secret key. + +``` + +export JWT_TOKEN="your-token" +export SECRET_KEY="file:///pulsar-manager/secret/my-secret.key" +docker run -it -p 9527:9527 -p 7750:7750 -e REDIRECT_HOST=http://localhost -e REDIRECT_PORT=9527 -e DRIVER_CLASS_NAME=org.postgresql.Driver -e URL='jdbc:postgresql://127.0.0.1:5432/pulsar_manager' -e USERNAME=pulsar -e PASSWORD=pulsar -e LOG_LEVEL=DEBUG -e JWT_TOKEN=$JWT_TOKEN -e SECRET_KEY=$SECRET_KEY -v $PWD:/data -v $PWD/secret:/pulsar-manager/secret apachepulsar/pulsar-manager:v0.2.0 /bin/sh + +``` + +* `JWT_TOKEN`: the token of superuser configured for the broker. It is generated by the `bin/pulsar tokens create --secret-key` command. +* `SECRET_KEY`: secret key path mounted in container, generated by `bin/pulsar tokens create-secret-key` command. +* `$PWD/secret`: the folder where the secret key generated by the `bin/pulsar tokens create-secret-key` command are placed locally +* `REDIRECT_HOST`: the IP address of the front-end server. +* `REDIRECT_PORT`: the port of the front-end server. +* `DRIVER_CLASS_NAME`: the driver class name of the PostgreSQL database. +* `URL`: the JDBC URL of your PostgreSQL database, such as jdbc:postgresql://127.0.0.1:5432/pulsar_manager. The docker image automatically start a local instance of the PostgreSQL database. +* `USERNAME`: the username of PostgreSQL. +* `PASSWORD`: the password of PostgreSQL. +* `LOG_LEVEL`: the level of log. + +* For more information about backend configurations, see [here](https://github.com/apache/pulsar-manager/blob/master/src/README). +* For more information about frontend configurations, see [here](https://github.com/apache/pulsar-manager/tree/master/front-end). + +## Log in + +[Set the administrator account and password](#set-administrator-account-and-password). + +Visit http://localhost:9527 to log in. diff --git a/site2/website-next/versioned_docs/version-2.7.1/administration-stats.md b/site2/website-next/versioned_docs/version-2.7.1/administration-stats.md new file mode 100644 index 0000000000000..029ebf2d1d8f8 --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/administration-stats.md @@ -0,0 +1,68 @@ +--- +id: administration-stats +title: Pulsar stats +sidebar_label: "Pulsar statistics" +original_id: administration-stats +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +## Partitioned topics + +|Stat|Description| +|---|---| +|msgRateIn| The sum of publish rates of all local and replication publishers in messages per second.| +|msgThroughputIn| Same as msgRateIn but in bytes per second instead of messages per second.| +|msgRateOut| The sum of dispatch rates of all local and replication consumers in messages per second.| +|msgThroughputOut| Same as msgRateOut but in bytes per second instead of messages per second.| +|averageMsgSize| Average message size, in bytes, from this publisher within the last interval.| +|storageSize| The sum of storage size of the ledgers for this topic.| +|publishers| The list of all local publishers into the topic. Publishers can be anywhere from zero to thousands.| +|producerId| Internal identifier for this producer on this topic.| +|producerName| Internal identifier for this producer, generated by the client library.| +|address| IP address and source port for the connection of this producer.| +|connectedSince| Timestamp this producer is created or last reconnected.| +|subscriptions| The list of all local subscriptions to the topic.| +|my-subscription| The name of this subscription (client defined).| +|msgBacklog| The count of messages in backlog for this subscription.| +|type| This subscription type.| +|msgRateExpired| The rate at which messages are discarded instead of dispatched from this subscription due to TTL.| +|consumers| The list of connected consumers for this subscription.| +|consumerName| Internal identifier for this consumer, generated by the client library.| +|availablePermits| The number of messages this consumer has space for in the listen queue of client library. A value of 0 means the queue of client library is full and receive() is not being called. A nonzero value means this consumer is ready to be dispatched messages.| +|replication| This section gives the stats for cross-colo replication of this topic.| +|replicationBacklog| The outbound replication backlog in messages.| +|connected| Whether the outbound replicator is connected.| +|replicationDelayInSeconds| How long the oldest message has been waiting to be sent through the connection, if connected is true.| +|inboundConnection| The IP and port of the broker in the publisher connection of remote cluster to this broker. | +|inboundConnectedSince| The TCP connection being used to publish messages to the remote cluster. If no local publishers are connected, this connection is automatically closed after a minute.| + + +## Topics + +|Stat|Description| +|---|---| +|entriesAddedCounter| Messages published since this broker loads this topic.| +|numberOfEntries| Total number of messages being tracked.| +|totalSize| Total storage size in bytes of all messages.| +|currentLedgerEntries| Count of messages written to the ledger currently open for writing.| +|currentLedgerSize| Size in bytes of messages written to ledger currently open for writing.| +|lastLedgerCreatedTimestamp| Time when last ledger is created.| +|lastLedgerCreationFailureTimestamp| Time when last ledger is failed.| +|waitingCursorsCount| How many cursors are caught up and waiting for a new message to be published.| +|pendingAddEntriesCount| How many messages have (asynchronous) write requests you are waiting on completion.| +|lastConfirmedEntry| The ledgerid:entryid of the last message successfully written. If the entryid is -1, then the ledger is opened or is being currently opened but has no entries written yet.| +|state| The state of the cursor ledger. Open means you have a cursor ledger for saving updates of the markDeletePosition.| +|ledgers| The ordered list of all ledgers for this topic holding its messages.| +|cursors| The list of all cursors on this topic. Every subscription you saw in the topic stats has one.| +|markDeletePosition| The ack position: the last message the subscriber acknowledges receiving.| +|readPosition| The latest position of subscriber for reading message.| +|waitingReadOp| This is true when the subscription reads the latest message that is published to the topic and waits on new messages to be published.| +|pendingReadOps| The counter for how many outstanding read requests to the BookKeepers you have in progress.| +|messagesConsumedCounter| Number of messages this cursor acks since this broker loads this topic.| +|cursorLedger| The ledger used to persistently store the current markDeletePosition.| +|cursorLedgerLastEntry| The last entryid used to persistently store the current markDeletePosition.| +|individuallyDeletedMessages| If Acks are done out of order, shows the ranges of messages Acked between the markDeletePosition and the read-position.| +|lastLedgerSwitchTimestamp| The last time the cursor ledger is rolled over.| diff --git a/site2/website-next/versioned_docs/version-2.7.1/administration-upgrade.md b/site2/website-next/versioned_docs/version-2.7.1/administration-upgrade.md new file mode 100644 index 0000000000000..aefdd8125ddcc --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/administration-upgrade.md @@ -0,0 +1,172 @@ +--- +id: administration-upgrade +title: Upgrade Guide +sidebar_label: "Upgrade" +original_id: administration-upgrade +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +## Upgrade guidelines + +Apache Pulsar is comprised of multiple components, ZooKeeper, bookies, and brokers. These components are either stateful or stateless. You do not have to upgrade ZooKeeper nodes unless you have special requirement. While you upgrade, you need to pay attention to bookies (stateful), brokers and proxies (stateless). + +The following are some guidelines on upgrading a Pulsar cluster. Read the guidelines before upgrading. + +- Backup all your configuration files before upgrading. +- Read guide entirely, make a plan, and then execute the plan. When you make upgrade plan, you need to take your specific requirements and environment into consideration. +- Pay attention to the upgrading order of components. In general, you do not need to upgrade your ZooKeeper or configuration store cluster (the global ZooKeeper cluster). You need to upgrade bookies first, and then upgrade brokers, proxies, and your clients. +- If `autorecovery` is enabled, you need to disable `autorecovery` in the upgrade process, and re-enable it after completing the process. +- Read the release notes carefully for each release. Release notes contain features, configuration changes that might impact your upgrade. +- Upgrade a small subset of nodes of each type to canary test the new version before upgrading all nodes of that type in the cluster. When you have upgraded the canary nodes, run for a while to ensure that they work correctly. +- Upgrade one data center to verify new version before upgrading all data centers if your cluster runs in multi-cluster replicated mode. + +> Note: Currently, Apache Pulsar is compatible between versions. + +## Upgrade sequence + +To upgrade an Apache Pulsar cluster, follow the upgrade sequence. + +1. Upgrade ZooKeeper (optional) +- Canary test: test an upgraded version in one or a small set of ZooKeeper nodes. +- Rolling upgrade: rollout the upgraded version to all ZooKeeper servers incrementally, one at a time. Monitor your dashboard during the whole rolling upgrade process. +2. Upgrade bookies +- Canary test: test an upgraded version in one or a small set of bookies. +- Rolling upgrade: + - a. Disable `autorecovery` with the following command. + + ```shell + + bin/bookkeeper shell autorecovery -disable + + ``` + + + - b. Rollout the upgraded version to all bookies in the cluster after you determine that a version is safe after canary. + - c. After you upgrade all bookies, re-enable `autorecovery` with the following command. + + ```shell + + bin/bookkeeper shell autorecovery -enable + + ``` + +3. Upgrade brokers +- Canary test: test an upgraded version in one or a small set of brokers. +- Rolling upgrade: rollout the upgraded version to all brokers in the cluster after you determine that a version is safe after canary. +4. Upgrade proxies +- Canary test: test an upgraded version in one or a small set of proxies. +- Rolling upgrade: rollout the upgraded version to all proxies in the cluster after you determine that a version is safe after canary. + +## Upgrade ZooKeeper (optional) +While you upgrade ZooKeeper servers, you can do canary test first, and then upgrade all ZooKeeper servers in the cluster. + +### Canary test + +You can test an upgraded version in one of ZooKeeper servers before upgrading all ZooKeeper servers in your cluster. + +To upgrade ZooKeeper server to a new version, complete the following steps: + +1. Stop a ZooKeeper server. +2. Upgrade the binary and configuration files. +3. Start the ZooKeeper server with the new binary files. +4. Use `pulsar zookeeper-shell` to connect to the newly upgraded ZooKeeper server and run a few commands to verify if it works as expected. +5. Run the ZooKeeper server for a few days, observe and make sure the ZooKeeper cluster runs well. + +#### Canary rollback + +If issues occur during canary test, you can shut down the problematic ZooKeeper node, revert the binary and configuration, and restart the ZooKeeper with the reverted binary. + +### Upgrade all ZooKeeper servers + +After canary test to upgrade one ZooKeeper in your cluster, you can upgrade all ZooKeeper servers in your cluster. + +You can upgrade all ZooKeeper servers one by one by following steps in canary test. + +## Upgrade bookies + +While you upgrade bookies, you can do canary test first, and then upgrade all bookies in the cluster. +For more details, you can read Apache BookKeeper [Upgrade guide](http://bookkeeper.apache.org/docs/latest/admin/upgrade). + +### Canary test + +You can test an upgraded version in one or a small set of bookies before upgrading all bookies in your cluster. + +To upgrade bookie to a new version, complete the following steps: + +1. Stop a bookie. +2. Upgrade the binary and configuration files. +3. Start the bookie in `ReadOnly` mode to verify if the bookie of this new version runs well for read workload. + + ```shell + + bin/pulsar bookie --readOnly + + ``` + +4. When the bookie runs successfully in `ReadOnly` mode, stop the bookie and restart it in `Write/Read` mode. + + ```shell + + bin/pulsar bookie + + ``` + +5. Observe and make sure the cluster serves both write and read traffic. + +#### Canary rollback + +If issues occur during the canary test, you can shut down the problematic bookie node. Other bookies in the cluster replaces this problematic bookie node with autorecovery. + +### Upgrade all bookies + +After canary test to upgrade some bookies in your cluster, you can upgrade all bookies in your cluster. + +Before upgrading, you have to decide whether to upgrade the whole cluster at once, including downtime and rolling upgrade scenarios. + +In a rolling upgrade scenario, upgrade one bookie at a time. In a downtime upgrade scenario, shut down the entire cluster, upgrade each bookie, and then start the cluster. + +While you upgrade in both scenarios, the procedure is the same for each bookie. + +1. Stop the bookie. +2. Upgrade the software (either new binary or new configuration files). +2. Start the bookie. + +> **Advanced operations** +> When you upgrade a large BookKeeper cluster in a rolling upgrade scenario, upgrading one bookie at a time is slow. If you configure rack-aware or region-aware placement policy, you can upgrade bookies rack by rack or region by region, which speeds up the whole upgrade process. + +## Upgrade brokers and proxies + +The upgrade procedure for brokers and proxies is the same. Brokers and proxies are `stateless`, so upgrading the two services is easy. + +### Canary test + +You can test an upgraded version in one or a small set of nodes before upgrading all nodes in your cluster. + +To upgrade to a new version, complete the following steps: + +1. Stop a broker (or proxy). +2. Upgrade the binary and configuration file. +3. Start a broker (or proxy). + +#### Canary rollback + +If issues occur during canary test, you can shut down the problematic broker (or proxy) node. Revert to the old version and restart the broker (or proxy). + +### Upgrade all brokers or proxies + +After canary test to upgrade some brokers or proxies in your cluster, you can upgrade all brokers or proxies in your cluster. + +Before upgrading, you have to decide whether to upgrade the whole cluster at once, including downtime and rolling upgrade scenarios. + +In a rolling upgrade scenario, you can upgrade one broker or one proxy at a time if the size of the cluster is small. If your cluster is large, you can upgrade brokers or proxies in batches. When you upgrade a batch of brokers or proxies, make sure the remaining brokers and proxies in the cluster have enough capacity to handle the traffic during upgrade. + +In a downtime upgrade scenario, shut down the entire cluster, upgrade each broker or proxy, and then start the cluster. + +While you upgrade in both scenarios, the procedure is the same for each broker or proxy. + +1. Stop the broker or proxy. +2. Upgrade the software (either new binary or new configuration files). +3. Start the broker or proxy. diff --git a/site2/website-next/versioned_docs/version-2.7.1/administration-zk-bk.md b/site2/website-next/versioned_docs/version-2.7.1/administration-zk-bk.md new file mode 100644 index 0000000000000..a4da422aaa960 --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/administration-zk-bk.md @@ -0,0 +1,381 @@ +--- +id: administration-zk-bk +title: ZooKeeper and BookKeeper administration +sidebar_label: "ZooKeeper and BookKeeper" +original_id: administration-zk-bk +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +Pulsar relies on two external systems for essential tasks: + +* [ZooKeeper](https://zookeeper.apache.org/) is responsible for a wide variety of configuration-related and coordination-related tasks. +* [BookKeeper](http://bookkeeper.apache.org/) is responsible for [persistent storage](concepts-architecture-overview.md#persistent-storage) of message data. + +ZooKeeper and BookKeeper are both open-source [Apache](https://www.apache.org/) projects. + +> Skip to the [How Pulsar uses ZooKeeper and BookKeeper](#how-pulsar-uses-zookeeper-and-bookkeeper) section below for a more schematic explanation of the role of these two systems in Pulsar. + + +## ZooKeeper + +Each Pulsar instance relies on two separate ZooKeeper quorums. + +* [Local ZooKeeper](#deploy-local-zookeeper) operates at the cluster level and provides cluster-specific configuration management and coordination. Each Pulsar cluster needs to have a dedicated ZooKeeper cluster. +* [Configuration Store](#deploy-configuration-store) operates at the instance level and provides configuration management for the entire system (and thus across clusters). An independent cluster of machines or the same machines that local ZooKeeper uses can provide the configuration store quorum. + +### Deploy local ZooKeeper + +ZooKeeper manages a variety of essential coordination-related and configuration-related tasks for Pulsar. + +To deploy a Pulsar instance, you need to stand up one local ZooKeeper cluster *per Pulsar cluster*. + +To begin, add all ZooKeeper servers to the quorum configuration specified in the [`conf/zookeeper.conf`](reference-configuration.md#zookeeper) file. Add a `server.N` line for each node in the cluster to the configuration, where `N` is the number of the ZooKeeper node. The following is an example for a three-node cluster: + +```properties + +server.1=zk1.us-west.example.com:2888:3888 +server.2=zk2.us-west.example.com:2888:3888 +server.3=zk3.us-west.example.com:2888:3888 + +``` + +On each host, you need to specify the node ID in `myid` file of each node, which is in `data/zookeeper` folder of each server by default (you can change the file location via the [`dataDir`](reference-configuration.md#zookeeper-dataDir) parameter). + +> See the [Multi-server setup guide](https://zookeeper.apache.org/doc/r3.4.10/zookeeperAdmin.html#sc_zkMulitServerSetup) in the ZooKeeper documentation for detailed information on `myid` and more. + + +On a ZooKeeper server at `zk1.us-west.example.com`, for example, you can set the `myid` value like this: + +```shell + +$ mkdir -p data/zookeeper +$ echo 1 > data/zookeeper/myid + +``` + +On `zk2.us-west.example.com` the command is `echo 2 > data/zookeeper/myid` and so on. + +Once you add each server to the `zookeeper.conf` configuration and each server has the appropriate `myid` entry, you can start ZooKeeper on all hosts (in the background, using nohup) with the [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) CLI tool: + +```shell + +$ bin/pulsar-daemon start zookeeper + +``` + +### Deploy configuration store + +The ZooKeeper cluster configured and started up in the section above is a *local* ZooKeeper cluster that you can use to manage a single Pulsar cluster. In addition to a local cluster, however, a full Pulsar instance also requires a configuration store for handling some instance-level configuration and coordination tasks. + +If you deploy a [single-cluster](#single-cluster-pulsar-instance) instance, you do not need a separate cluster for the configuration store. If, however, you deploy a [multi-cluster](#multi-cluster-pulsar-instance) instance, you need to stand up a separate ZooKeeper cluster for configuration tasks. + +#### Single-cluster Pulsar instance + +If your Pulsar instance consists of just one cluster, then you can deploy a configuration store on the same machines as the local ZooKeeper quorum but run on different TCP ports. + +To deploy a ZooKeeper configuration store in a single-cluster instance, add the same ZooKeeper servers that the local quorum uses to the configuration file in [`conf/global_zookeeper.conf`](reference-configuration.md#configuration-store) using the same method for [local ZooKeeper](#local-zookeeper), but make sure to use a different port (2181 is the default for ZooKeeper). The following is an example that uses port 2184 for a three-node ZooKeeper cluster: + +```properties + +clientPort=2184 +server.1=zk1.us-west.example.com:2185:2186 +server.2=zk2.us-west.example.com:2185:2186 +server.3=zk3.us-west.example.com:2185:2186 + +``` + +As before, create the `myid` files for each server on `data/global-zookeeper/myid`. + +#### Multi-cluster Pulsar instance + +When you deploy a global Pulsar instance, with clusters distributed across different geographical regions, the configuration store serves as a highly available and strongly consistent metadata store that can tolerate failures and partitions spanning whole regions. + +The key here is to make sure the ZK quorum members are spread across at least 3 regions and that other regions run as observers. + +Again, given the very low expected load on the configuration store servers, you can share the same hosts used for the local ZooKeeper quorum. + +For example, you can assume a Pulsar instance with the following clusters `us-west`, `us-east`, `us-central`, `eu-central`, `ap-south`. Also you can assume, each cluster has its own local ZK servers named such as + +``` + +zk[1-3].${CLUSTER}.example.com + +``` + +In this scenario you want to pick the quorum participants from few clusters and let all the others be ZK observers. For example, to form a 7 servers quorum, you can pick 3 servers from `us-west`, 2 from `us-central` and 2 from `us-east`. + +This guarantees that writes to configuration store is possible even if one of these regions is unreachable. + +The ZK configuration in all the servers looks like: + +```properties + +clientPort=2184 +server.1=zk1.us-west.example.com:2185:2186 +server.2=zk2.us-west.example.com:2185:2186 +server.3=zk3.us-west.example.com:2185:2186 +server.4=zk1.us-central.example.com:2185:2186 +server.5=zk2.us-central.example.com:2185:2186 +server.6=zk3.us-central.example.com:2185:2186:observer +server.7=zk1.us-east.example.com:2185:2186 +server.8=zk2.us-east.example.com:2185:2186 +server.9=zk3.us-east.example.com:2185:2186:observer +server.10=zk1.eu-central.example.com:2185:2186:observer +server.11=zk2.eu-central.example.com:2185:2186:observer +server.12=zk3.eu-central.example.com:2185:2186:observer +server.13=zk1.ap-south.example.com:2185:2186:observer +server.14=zk2.ap-south.example.com:2185:2186:observer +server.15=zk3.ap-south.example.com:2185:2186:observer + +``` + +Additionally, ZK observers need to have: + +```properties + +peerType=observer + +``` + +##### Start the service + +Once your configuration store configuration is in place, you can start up the service using [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) + +```shell + +$ bin/pulsar-daemon start configuration-store + +``` + +### ZooKeeper configuration + +In Pulsar, ZooKeeper configuration is handled by two separate configuration files in the `conf` directory of your Pulsar installation: `conf/zookeeper.conf` for [local ZooKeeper](#local-zookeeper) and `conf/global-zookeeper.conf` for [configuration store](#configuration-store). + +#### Local ZooKeeper + +The [`conf/zookeeper.conf`](reference-configuration.md#zookeeper) file handles the configuration for local ZooKeeper. The table below shows the available parameters: + +|Name|Description|Default| +|---|---|---| +|tickTime| The tick is the basic unit of time in ZooKeeper, measured in milliseconds and used to regulate things like heartbeats and timeouts. tickTime is the length of a single tick. |2000| +|initLimit| The maximum time, in ticks, that the leader ZooKeeper server allows follower ZooKeeper servers to successfully connect and sync. The tick time is set in milliseconds using the tickTime parameter. |10| +|syncLimit| The maximum time, in ticks, that a follower ZooKeeper server is allowed to sync with other ZooKeeper servers. The tick time is set in milliseconds using the tickTime parameter. |5| +|dataDir| The location where ZooKeeper stores in-memory database snapshots as well as the transaction log of updates to the database. |data/zookeeper| +|clientPort| The port on which the ZooKeeper server listens for connections. |2181| +|autopurge.snapRetainCount| In ZooKeeper, auto purge determines how many recent snapshots of the database stored in dataDir to retain within the time interval specified by autopurge.purgeInterval (while deleting the rest). |3| +|autopurge.purgeInterval| The time interval, in hours, which triggers the ZooKeeper database purge task. Setting to a non-zero number enables auto purge; setting to 0 disables. Read this guide before enabling auto purge. |1| +|maxClientCnxns| The maximum number of client connections. Increase this if you need to handle more ZooKeeper clients. |60| + + +#### Configuration Store + +The [`conf/global-zookeeper.conf`](reference-configuration.md#configuration-store) file handles the configuration for configuration store. The table below shows the available parameters: + + +## BookKeeper + +BookKeeper stores all durable message in Pulsar. BookKeeper is a distributed [write-ahead log](https://en.wikipedia.org/wiki/Write-ahead_logging) WAL system that guarantees read consistency of independent message logs calls ledgers. Individual BookKeeper servers are also called *bookies*. + +> To manage message persistence, retention, and expiry in Pulsar, refer to [cookbook](cookbooks-retention-expiry). + +### Hardware requirements + +Bookie hosts store message data on disk. To provide optimal performance, ensure that the bookies have a suitable hardware configuration. The following are two key dimensions of bookie hardware capacity: + +- Disk I/O capacity read/write +- Storage capacity + +Message entries written to bookies are always synced to disk before returning an acknowledgement to the Pulsar broker by default. To ensure low write latency, BookKeeper is designed to use multiple devices: + +- A **journal** to ensure durability. For sequential writes, it is critical to have fast [fsync](https://linux.die.net/man/2/fsync) operations on bookie hosts. Typically, small and fast [solid-state drives](https://en.wikipedia.org/wiki/Solid-state_drive) (SSDs) should suffice, or [hard disk drives](https://en.wikipedia.org/wiki/Hard_disk_drive) (HDDs) with a [RAID](https://en.wikipedia.org/wiki/RAID) controller and a battery-backed write cache. Both solutions can reach fsync latency of ~0.4 ms. +- A **ledger storage device** stores data. Writes happen in the background, so write I/O is not a big concern. Reads happen sequentially most of the time and the backlog is drained only in case of consumer drain. To store large amounts of data, a typical configuration involves multiple HDDs with a RAID controller. + +### Configure BookKeeper + +You can configure BookKeeper bookies using the [`conf/bookkeeper.conf`](reference-configuration.md#bookkeeper) configuration file. When you configure each bookie, ensure that the [`zkServers`](reference-configuration.md#bookkeeper-zkServers) parameter is set to the connection string for local ZooKeeper of the Pulsar cluster. + +The minimum configuration changes required in `conf/bookkeeper.conf` are as follows: + +```properties + +# Change to point to journal disk mount point +journalDirectory=data/bookkeeper/journal + +# Point to ledger storage disk mount point +ledgerDirectories=data/bookkeeper/ledgers + +# Point to local ZK quorum +zkServers=zk1.example.com:2181,zk2.example.com:2181,zk3.example.com:2181 + +``` + +To change the ZooKeeper root path that BookKeeper uses, use `zkLedgersRootPath=/MY-PREFIX/ledgers` instead of `zkServers=localhost:2181/MY-PREFIX`. + +> For more information about BookKeeper, refer to the official [BookKeeper docs](http://bookkeeper.apache.org). + +### Deploy BookKeeper + +BookKeeper provides [persistent message storage](concepts-architecture-overview.md#persistent-storage) for Pulsar. Each Pulsar broker has its own cluster of bookies. The BookKeeper cluster shares a local ZooKeeper quorum with the Pulsar cluster. + +### Start bookies manually + +You can start a bookie in the foreground or as a background daemon. + +To start a bookie in the foreground, use the [`bookkeeper`](reference-cli-tools.md#bookkeeper) CLI tool: + +```bash + +$ bin/bookkeeper bookie + +``` + +To start a bookie in the background, use the [`pulsar-daemon`](reference-cli-tools.md#pulsar-daemon) CLI tool: + +```bash + +$ bin/pulsar-daemon start bookie + +``` + +You can verify whether the bookie works properly with the `bookiesanity` command for the [BookKeeper shell](reference-cli-tools.md#bookkeeper-shell): + +```shell + +$ bin/bookkeeper shell bookiesanity + +``` + +When you use this command, you create a new ledger on the local bookie, write a few entries, read them back and finally delete the ledger. + +### Decommission bookies cleanly + +Before you decommission a bookie, you need to check your environment and meet the following requirements. + +1. Ensure the state of your cluster supports decommissioning the target bookie. Check if `EnsembleSize >= Write Quorum >= Ack Quorum` is `true` with one less bookie. + +2. Ensure the target bookie is listed after using the `listbookies` command. + +3. Ensure that no other process is ongoing (upgrade etc). + +And then you can decommission bookies safely. To decommission bookies, complete the following steps. + +1. Log in to the bookie node, check if there are underreplicated ledgers. The decommission command force to replicate the underreplicated ledgers. +`$ bin/bookkeeper shell listunderreplicated` + +2. Stop the bookie by killing the bookie process. Make sure that no liveness/readiness probes setup for the bookies to spin them back up if you deploy it in a Kubernetes environment. + +3. Run the decommission command. + - If you have logged in to the node to be decommissioned, you do not need to provide `-bookieid`. + - If you are running the decommission command for the target bookie node from another bookie node, you should mention the target bookie ID in the arguments for `-bookieid` + `$ bin/bookkeeper shell decommissionbookie` + or + `$ bin/bookkeeper shell decommissionbookie -bookieid ` + +4. Validate that no ledgers are on the decommissioned bookie. +`$ bin/bookkeeper shell listledgers -bookieid ` + +You can run the following command to check if the bookie you have decommissioned is listed in the bookies list: + +```bash + +./bookkeeper shell listbookies -rw -h +./bookkeeper shell listbookies -ro -h + +``` + +## BookKeeper persistence policies + +In Pulsar, you can set *persistence policies* at the namespace level, which determines how BookKeeper handles persistent storage of messages. Policies determine four things: + +* The number of acks (guaranteed copies) to wait for each ledger entry. +* The number of bookies to use for a topic. +* The number of writes to make for each ledger entry. +* The throttling rate for mark-delete operations. + +### Set persistence policies + +You can set persistence policies for BookKeeper at the [namespace](reference-terminology.md#namespace) level. + +#### Pulsar-admin + +Use the [`set-persistence`](reference-pulsar-admin.md#namespaces-set-persistence) subcommand and specify a namespace as well as any policies that you want to apply. The available flags are: + +Flag | Description | Default +:----|:------------|:------- +`-a`, `--bookkeeper-ack-quorum` | The number of acks (guaranteed copies) to wait on for each entry | 0 +`-e`, `--bookkeeper-ensemble` | The number of [bookies](reference-terminology.md#bookie) to use for topics in the namespace | 0 +`-w`, `--bookkeeper-write-quorum` | The number of writes to make for each entry | 0 +`-r`, `--ml-mark-delete-max-rate` | Throttling rate for mark-delete operations (0 means no throttle) | 0 + +The following is an example: + +```shell + +$ pulsar-admin namespaces set-persistence my-tenant/my-ns \ + --bookkeeper-ack-quorum 3 \ + --bookeeper-ensemble 2 + +``` + +#### REST API + +{@inject: endpoint|POST|/admin/v2/namespaces/:tenant/:namespace/persistence|operation/setPersistence?version=@pulsar:version_number@} + +#### Java + +```java + +int bkEnsemble = 2; +int bkQuorum = 3; +int bkAckQuorum = 2; +double markDeleteRate = 0.7; +PersistencePolicies policies = + new PersistencePolicies(ensemble, quorum, ackQuorum, markDeleteRate); +admin.namespaces().setPersistence(namespace, policies); + +``` + +### List persistence policies + +You can see which persistence policy currently applies to a namespace. + +#### Pulsar-admin + +Use the [`get-persistence`](reference-pulsar-admin.md#namespaces-get-persistence) subcommand and specify the namespace. + +The following is an example: + +```shell + +$ pulsar-admin namespaces get-persistence my-tenant/my-ns +{ + "bookkeeperEnsemble": 1, + "bookkeeperWriteQuorum": 1, + "bookkeeperAckQuorum", 1, + "managedLedgerMaxMarkDeleteRate": 0 +} + +``` + +#### REST API + +{@inject: endpoint|GET|/admin/v2/namespaces/:tenant/:namespace/persistence|operation/getPersistence?version=@pulsar:version_number@} + +#### Java + +```java + +PersistencePolicies policies = admin.namespaces().getPersistence(namespace); + +``` + +## How Pulsar uses ZooKeeper and BookKeeper + +This diagram illustrates the role of ZooKeeper and BookKeeper in a Pulsar cluster: + +![ZooKeeper and BookKeeper](/assets/pulsar-system-architecture.png) + +Each Pulsar cluster consists of one or more message brokers. Each broker relies on an ensemble of bookies. diff --git a/site2/website-next/versioned_sidebars/version-2.7.1-sidebars.json b/site2/website-next/versioned_sidebars/version-2.7.1-sidebars.json index 5244cdccc2e66..a95a781eb8b8b 100644 --- a/site2/website-next/versioned_sidebars/version-2.7.1-sidebars.json +++ b/site2/website-next/versioned_sidebars/version-2.7.1-sidebars.json @@ -295,6 +295,44 @@ "id": "version-2.7.1/deploy-monitoring" } ] + }, + { + "type": "category", + "label": "Administration", + "items": [ + { + "type": "doc", + "id": "version-2.7.1/administration-zk-bk" + }, + { + "type": "doc", + "id": "version-2.7.1/administration-geo" + }, + { + "type": "doc", + "id": "version-2.7.1/administration-pulsar-manager" + }, + { + "type": "doc", + "id": "version-2.7.1/administration-stats" + }, + { + "type": "doc", + "id": "version-2.7.1/administration-load-balance" + }, + { + "type": "doc", + "id": "version-2.7.1/administration-proxy" + }, + { + "type": "doc", + "id": "version-2.7.1/administration-upgrade" + }, + { + "type": "doc", + "id": "version-2.7.1/administration-isolation" + } + ] } ] } \ No newline at end of file From 81990a2456fefc8af47b1dce88afbc0fe424c767 Mon Sep 17 00:00:00 2001 From: LiLi Date: Thu, 4 Nov 2021 09:10:44 +0800 Subject: [PATCH 3/3] [website][upgrade]feat: docs migration - 2.7.1 / security Signed-off-by: LiLi --- .../version-2.7.1/security-athenz.md | 102 ++++ .../version-2.7.1/security-authorization.md | 118 +++++ .../version-2.7.1/security-bouncy-castle.md | 161 +++++++ .../version-2.7.1/security-encryption.md | 194 ++++++++ .../version-2.7.1/security-extending.md | 211 +++++++++ .../version-2.7.1/security-jwt.md | 344 ++++++++++++++ .../version-2.7.1/security-kerberos.md | 447 ++++++++++++++++++ .../version-2.7.1/security-oauth2.md | 231 +++++++++ .../version-2.7.1/security-overview.md | 39 ++ .../security-tls-authentication.md | 224 +++++++++ .../version-2.7.1/security-tls-keystore.md | 326 +++++++++++++ .../version-2.7.1/security-tls-transport.md | 292 ++++++++++++ .../version-2.7.1-sidebars.json | 54 +++ 13 files changed, 2743 insertions(+) create mode 100644 site2/website-next/versioned_docs/version-2.7.1/security-athenz.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/security-authorization.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/security-bouncy-castle.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/security-encryption.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/security-extending.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/security-jwt.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/security-kerberos.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/security-oauth2.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/security-overview.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/security-tls-authentication.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/security-tls-keystore.md create mode 100644 site2/website-next/versioned_docs/version-2.7.1/security-tls-transport.md diff --git a/site2/website-next/versioned_docs/version-2.7.1/security-athenz.md b/site2/website-next/versioned_docs/version-2.7.1/security-athenz.md new file mode 100644 index 0000000000000..c4f79cb7f229c --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/security-athenz.md @@ -0,0 +1,102 @@ +--- +id: security-athenz +title: Authentication using Athenz +sidebar_label: "Authentication using Athenz" +original_id: security-athenz +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +[Athenz](https://github.com/AthenZ/athenz) is a role-based authentication/authorization system. In Pulsar, you can use Athenz role tokens (also known as *z-tokens*) to establish the identify of the client. + +## Athenz authentication settings + +A [decentralized Athenz system](https://github.com/AthenZ/athenz/blob/master/docs/decent_authz_flow.md) contains an [authori**Z**ation **M**anagement **S**ystem](https://github.com/AthenZ/athenz/blob/master/docs/setup_zms.md) (ZMS) server and an [authori**Z**ation **T**oken **S**ystem](https://github.com/AthenZ/athenz/blob/master/docs/setup_zts) (ZTS) server. + +To begin, you need to set up Athenz service access control. You need to create domains for the *provider* (which provides some resources to other services with some authentication/authorization policies) and the *tenant* (which is provisioned to access some resources in a provider). In this case, the provider corresponds to the Pulsar service itself and the tenant corresponds to each application using Pulsar (typically, a [tenant](reference-terminology.md#tenant) in Pulsar). + +### Create the tenant domain and service + +On the [tenant](reference-terminology.md#tenant) side, you need to do the following things: + +1. Create a domain, such as `shopping` +2. Generate a private/public key pair +3. Create a service, such as `some_app`, on the domain with the public key + +Note that you need to specify the private key generated in step 2 when the Pulsar client connects to the [broker](reference-terminology.md#broker) (see client configuration examples for [Java](client-libraries-java.md#tls-authentication) and [C++](client-libraries-cpp.md#tls-authentication)). + +For more specific steps involving the Athenz UI, refer to [Example Service Access Control Setup](https://github.com/AthenZ/athenz/blob/master/docs/example_service_athenz_setup.md#client-tenant-domain). + +### Create the provider domain and add the tenant service to some role members + +On the provider side, you need to do the following things: + +1. Create a domain, such as `pulsar` +2. Create a role +3. Add the tenant service to members of the role + +Note that you can specify any action and resource in step 2 since they are not used on Pulsar. In other words, Pulsar uses the Athenz role token only for authentication, *not* for authorization. + +For more specific steps involving UI, refer to [Example Service Access Control Setup](https://github.com/AthenZ/athenz/blob/master/docs/example_service_athenz_setup.md#server-provider-domain). + +## Configure the broker for Athenz + +> ### TLS encryption +> +> Note that when you are using Athenz as an authentication provider, you had better use TLS encryption +> as it can protect role tokens from being intercepted and reused. (for more details involving TLS encryption see [Architecture - Data Model](https://github.com/AthenZ/athenz/blob/master/docs/data_model)). + +In the `conf/broker.conf` configuration file in your Pulsar installation, you need to provide the class name of the Athenz authentication provider as well as a comma-separated list of provider domain names. + +```properties + +# Add the Athenz auth provider +authenticationEnabled=true +authorizationEnabled=true +authenticationProviders=org.apache.pulsar.broker.authentication.AuthenticationProviderAthenz +athenzDomainNames=pulsar + +# Enable TLS +tlsEnabled=true +tlsCertificateFilePath=/path/to/broker-cert.pem +tlsKeyFilePath=/path/to/broker-key.pem + +# Authentication settings of the broker itself. Used when the broker connects to other brokers, either in same or other clusters +brokerClientAuthenticationPlugin=org.apache.pulsar.client.impl.auth.AuthenticationAthenz +brokerClientAuthenticationParameters={"tenantDomain":"shopping","tenantService":"some_app","providerDomain":"pulsar","privateKey":"file:///path/to/private.pem","keyId":"v1"} + +``` + +> A full listing of parameters is available in the `conf/broker.conf` file, you can also find the default +> values for those parameters in [Broker Configuration](reference-configuration.md#broker). + +## Configure clients for Athenz + +For more information on Pulsar client authentication using Athenz, see the following language-specific docs: + +* [Java client](client-libraries-java.md#athenz) + +## Configure CLI tools for Athenz + +[Command-line tools](reference-cli-tools.md) like [`pulsar-admin`](reference-pulsar-admin), [`pulsar-perf`](reference-cli-tools.md#pulsar-perf), and [`pulsar-client`](reference-cli-tools.md#pulsar-client) use the `conf/client.conf` config file in a Pulsar installation. + +You need to add the following authentication parameters to the `conf/client.conf` config file to use Athenz with CLI tools of Pulsar: + +```properties + +# URL for the broker +serviceUrl=https://broker.example.com:8443/ + +# Set Athenz auth plugin and its parameters +authPlugin=org.apache.pulsar.client.impl.auth.AuthenticationAthenz +authParams={"tenantDomain":"shopping","tenantService":"some_app","providerDomain":"pulsar","privateKey":"file:///path/to/private.pem","keyId":"v1"} + +# Enable TLS +useTls=true +tlsAllowInsecureConnection=false +tlsTrustCertsFilePath=/path/to/cacert.pem + +``` + diff --git a/site2/website-next/versioned_docs/version-2.7.1/security-authorization.md b/site2/website-next/versioned_docs/version-2.7.1/security-authorization.md new file mode 100644 index 0000000000000..9ca30ea1dd999 --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/security-authorization.md @@ -0,0 +1,118 @@ +--- +id: security-authorization +title: Authentication and authorization in Pulsar +sidebar_label: "Authorization and ACLs" +original_id: security-authorization +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + + +In Pulsar, the [authentication provider](security-overview.md#authentication-providers) is responsible for properly identifying clients and associating the clients with [role tokens](security-overview.md#role-tokens). If you only enable authentication, an authenticated role token has the ability to access all resources in the cluster. *Authorization* is the process that determines *what* clients are able to do. + +The role tokens with the most privileges are the *superusers*. The *superusers* can create and destroy tenants, along with having full access to all tenant resources. + +When a superuser creates a [tenant](reference-terminology.md#tenant), that tenant is assigned an admin role. A client with the admin role token can then create, modify and destroy namespaces, and grant and revoke permissions to *other role tokens* on those namespaces. + +## Broker and Proxy Setup + +### Enable authorization and assign superusers +You can enable the authorization and assign the superusers in the broker ([`conf/broker.conf`](reference-configuration.md#broker)) configuration files. + +```properties + +authorizationEnabled=true +superUserRoles=my-super-user-1,my-super-user-2 + +``` + +> A full list of parameters is available in the `conf/broker.conf` file. +> You can also find the default values for those parameters in [Broker Configuration](reference-configuration.md#broker). + +Typically, you use superuser roles for administrators, clients as well as broker-to-broker authorization. When you use [geo-replication](concepts-replication), every broker needs to be able to publish to all the other topics of clusters. + +You can also enable the authorization for the proxy in the proxy configuration file (`conf/proxy.conf`). Once you enable the authorization on the proxy, the proxy does an additional authorization check before forwarding the request to a broker. +If you enable authorization on the broker, the broker checks the authorization of the request when the broker receives the forwarded request. + +### Proxy Roles + +By default, the broker treats the connection between a proxy and the broker as a normal user connection. The broker authenticates the user as the role configured in `proxy.conf`(see ["Enable TLS Authentication on Proxies"](security-tls-authentication.md#enable-tls-authentication-on-proxies)). However, when the user connects to the cluster through a proxy, the user rarely requires the authentication. The user expects to be able to interact with the cluster as the role for which they have authenticated with the proxy. + +Pulsar uses *Proxy roles* to enable the authentication. Proxy roles are specified in the broker configuration file, [`conf/broker.conf`](reference-configuration.md#broker). If a client that is authenticated with a broker is one of its ```proxyRoles```, all requests from that client must also carry information about the role of the client that is authenticated with the proxy. This information is called the *original principal*. If the *original principal* is absent, the client is not able to access anything. + +You must authorize both the *proxy role* and the *original principal* to access a resource to ensure that the resource is accessible via the proxy. Administrators can take two approaches to authorize the *proxy role* and the *original principal*. + +The more secure approach is to grant access to the proxy roles each time you grant access to a resource. For example, if you have a proxy role named `proxy1`, when the superuser creates a tenant, you should specify `proxy1` as one of the admin roles. When a role is granted permissions to produce or consume from a namespace, if that client wants to produce or consume through a proxy, you should also grant `proxy1` the same permissions. + +Another approach is to make the proxy role a superuser. This allows the proxy to access all resources. The client still needs to authenticate with the proxy, and all requests made through the proxy have their role downgraded to the *original principal* of the authenticated client. However, if the proxy is compromised, a bad actor could get full access to your cluster. + +You can specify the roles as proxy roles in [`conf/broker.conf`](reference-configuration.md#broker). + +``` + +proxyRoles=my-proxy-role + +# if you want to allow superusers to use the proxy (see above) +superUserRoles=my-super-user-1,my-super-user-2,my-proxy-role + +``` + +## Administer tenants + +Pulsar [instance](reference-terminology.md#instance) administrators or some kind of self-service portal typically provisions a Pulsar [tenant](reference-terminology.md#tenant). + +You can manage tenants using the [`pulsar-admin`](reference-pulsar-admin) tool. + +### Create a new tenant + +The following is an example tenant creation command: + +``` + +$ bin/pulsar-admin tenants create my-tenant \ + --admin-roles my-admin-role \ + --allowed-clusters us-west,us-east + +``` + +This command creates a new tenant `my-tenant` that is allowed to use the clusters `us-west` and `us-east`. + +A client that successfully identifies itself as having the role `my-admin-role` is allowed to perform all administrative tasks on this tenant. + +The structure of topic names in Pulsar reflects the hierarchy between tenants, clusters, and namespaces: + +``` + +persistent://tenant/namespace/topic + +``` + +### Manage permissions + +You can use [Pulsar Admin Tools](admin-api-permissions) for managing permission in Pulsar. + +### Pulsar admin authentication + +``` + +PulsarAdmin admin = PulsarAdmin.builder() + .serviceHttpUrl("http://broker:8080") + .authentication("com.org.MyAuthPluginClass", "param1:value1") + .build(); + +``` + +To use TLS: + +``` + +PulsarAdmin admin = PulsarAdmin.builder() + .serviceHttpUrl("https://broker:8080") + .authentication("com.org.MyAuthPluginClass", "param1:value1") + .tlsTrustCertsFilePath("/path/to/trust/cert") + .build(); + +``` + diff --git a/site2/website-next/versioned_docs/version-2.7.1/security-bouncy-castle.md b/site2/website-next/versioned_docs/version-2.7.1/security-bouncy-castle.md new file mode 100644 index 0000000000000..b9bb8155bab83 --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/security-bouncy-castle.md @@ -0,0 +1,161 @@ +--- +id: security-bouncy-castle +title: Bouncy Castle Providers +sidebar_label: "Bouncy Castle Providers" +original_id: security-bouncy-castle +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +## BouncyCastle Introduce + +`Bouncy Castle` is a Java library that complements the default Java Cryptographic Extension (JCE), +and it provides more cipher suites and algorithms than the default JCE provided by Sun. + +In addition to that, `Bouncy Castle` has lots of utilities for reading arcane formats like PEM and ASN.1 that no sane person would want to rewrite themselves. + +In Pulsar, security and crypto have dependencies on BouncyCastle Jars. For the detailed installing and configuring Bouncy Castle FIPS, see [BC FIPS Documentation](https://www.bouncycastle.org/documentation.html), especially the **User Guides** and **Security Policy** PDFs. + +`Bouncy Castle` provides both [FIPS](https://www.bouncycastle.org/fips_faq.html) and non-FIPS version. But in a JVM, you can not include both of the 2 versions, and you need to exclude the current version before include the other. + +In Pulsar, the security and crypto methods also depends on `Bouncy Castle`, especially in [TLS Authentication](security-tls-authentication.md) and [Transport Encryption](security-encryption). This document contains the configuration between BouncyCastle FIPS(BC-FIPS) and non-FIPS(BC-non-FIPS) version while using Pulsar. + +## How BouncyCastle modules packaged in Pulsar + +In Pulsar's `bouncy-castle` module, We provide 2 sub modules: `bouncy-castle-bc`(for non-FIPS version) and `bouncy-castle-bcfips`(for FIPS version), to package BC jars together to make the include and exclude of `Bouncy Castle` easier. + +To achieve this goal, we will need to package several `bouncy-castle` jars together into `bouncy-castle-bc` or `bouncy-castle-bcfips` jar. +Each of the original bouncy-castle jar is related with security, so BouncyCastle dutifully supplies signed of each JAR. +But when we do the re-package, Maven shade explodes the BouncyCastle jar file which puts the signatures into META-INF, +these signatures aren't valid for this new, uber-jar (signatures are only for the original BC jar). +Usually, You will meet error like `java.lang.SecurityException: Invalid signature file digest for Manifest main attributes`. + +You could exclude these signatures in mvn pom file to avoid above error, by + +```access transformers + +META-INF/*.SF +META-INF/*.DSA +META-INF/*.RSA + +``` + +But it can also lead to new, cryptic errors, e.g. `java.security.NoSuchAlgorithmException: PBEWithSHA256And256BitAES-CBC-BC SecretKeyFactory not available` +By explicitly specifying where to find the algorithm like this: `SecretKeyFactory.getInstance("PBEWithSHA256And256BitAES-CBC-BC","BC")` +It will get the real error: `java.security.NoSuchProviderException: JCE cannot authenticate the provider BC` + +So, we used a [executable packer plugin](https://github.com/nthuemmel/executable-packer-maven-plugin) that uses a jar-in-jar approach to preserve the BouncyCastle signature in a single, executable jar. + +### Include dependencies of BC-non-FIPS + +Pulsar module `bouncy-castle-bc`, which defined by `bouncy-castle/bc/pom.xml` contains the needed non-FIPS jars for Pulsar, and packaged as a jar-in-jar(need to provide `pkg`). + +```xml + + + org.bouncycastle + bcpkix-jdk15on + ${bouncycastle.version} + + + + org.bouncycastle + bcprov-ext-jdk15on + ${bouncycastle.version} + + +``` + +By using this `bouncy-castle-bc` module, you can easily include and exclude BouncyCastle non-FIPS jars. + +### Modules that include BC-non-FIPS module (`bouncy-castle-bc`) + +For Pulsar client, user need the bouncy-castle module, so `pulsar-client-original` will include the `bouncy-castle-bc` module, and have `pkg` set to reference the `jar-in-jar` package. +It is included as following example: + +```xml + + + org.apache.pulsar + bouncy-castle-bc + ${pulsar.version} + pkg + + +``` + +By default `bouncy-castle-bc` already included in `pulsar-client-original`, And `pulsar-client-original` has been included in a lot of other modules like `pulsar-client-admin`, `pulsar-broker`. +But for the above shaded jar and signatures reason, we should not package Pulsar's `bouncy-castle` module into `pulsar-client-all` other shaded modules directly, such as `pulsar-client-shaded`, `pulsar-client-admin-shaded` and `pulsar-broker-shaded`. +So in the shaded modules, we will exclude the `bouncy-castle` modules. + +```xml + + + + org.apache.pulsar:pulsar-client-original + + ** + + + org/bouncycastle/** + + + + +``` + +That means, `bouncy-castle` related jars are not shaded in these fat jars. + +### Module BC-FIPS (`bouncy-castle-bcfips`) + +Pulsar module `bouncy-castle-bcfips`, which defined by `bouncy-castle/bcfips/pom.xml` contains the needed FIPS jars for Pulsar. +Similar to `bouncy-castle-bc`, `bouncy-castle-bcfips` also packaged as a `jar-in-jar` package for easy include/exclude. + +```xml + + + org.bouncycastle + bc-fips + ${bouncycastlefips.version} + + + + org.bouncycastle + bcpkix-fips + ${bouncycastlefips.version} + + +``` + +### Exclude BC-non-FIPS and include BC-FIPS + +If you want to switch from BC-non-FIPS to BC-FIPS version, Here is an example for `pulsar-broker` module: + +```xml + + + org.apache.pulsar + pulsar-broker + ${pulsar.version} + + + org.apache.pulsar + bouncy-castle-bc + + + + + + org.apache.pulsar + bouncy-castle-bcfips + ${pulsar.version} + pkg + + +``` + + +For more example, you can reference module `bcfips-include-test`. + diff --git a/site2/website-next/versioned_docs/version-2.7.1/security-encryption.md b/site2/website-next/versioned_docs/version-2.7.1/security-encryption.md new file mode 100644 index 0000000000000..b0f2f4616b649 --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/security-encryption.md @@ -0,0 +1,194 @@ +--- +id: security-encryption +title: Pulsar Encryption +sidebar_label: "End-to-End Encryption" +original_id: security-encryption +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +Applications can use Pulsar encryption to encrypt messages at the producer side and decrypt messages at the consumer side. You can use the public and private key pair that the application configures to perform encryption. Only the consumers with a valid key can decrypt the encrypted messages. + +## Asymmetric and symmetric encryption + +Pulsar uses dynamically generated symmetric AES key to encrypt messages(data). You can use the application provided ECDSA/RSA key pair to encrypt the AES key(data key), so you do not have to share the secret with everyone. + +Key is a public and private key pair used for encryption or decryption. The producer key is the public key of the key pair, and the consumer key is the private key of the key pair. + +The application configures the producer with the public key. You can use this key to encrypt the AES data key. The encrypted data key is sent as part of message header. Only entities with the private key (in this case the consumer) are able to decrypt the data key which is used to decrypt the message. + +You can encrypt a message with more than one key. Any one of the keys used for encrypting the message is sufficient to decrypt the message. + +Pulsar does not store the encryption key anywhere in the Pulsar service. If you lose or delete the private key, your message is irretrievably lost, and is unrecoverable. + +## Producer +![alt text](/assets/pulsar-encryption-producer.jpg "Pulsar Encryption Producer") + +## Consumer +![alt text](/assets/pulsar-encryption-consumer.jpg "Pulsar Encryption Consumer") + +## Get started + +1. Enter the commands below to create your ECDSA or RSA public and private key pair. + +```shell + +openssl ecparam -name secp521r1 -genkey -param_enc explicit -out test_ecdsa_privkey.pem +openssl ec -in test_ecdsa_privkey.pem -pubout -outform pem -out test_ecdsa_pubkey.pem + +``` + +2. Add the public and private key to the key management and configure your producers to retrieve public keys and consumers clients to retrieve private keys. + +3. Implement the CryptoKeyReader interface, specifically CryptoKeyReader.getPublicKey() for producer and CryptoKeyReader.getPrivateKey() for consumer, which Pulsar client invokes to load the key. + +4. Add encryption key name to producer builder: PulsarClient.newProducer().addEncryptionKey("myapp.key"). + +5. Add CryptoKeyReader implementation to producer or consumer builder: PulsarClient.newProducer().cryptoKeyReader(keyReader) / PulsarClient.newConsumer().cryptoKeyReader(keyReader). + +6. Sample producer application: + +```java + +class RawFileKeyReader implements CryptoKeyReader { + + String publicKeyFile = ""; + String privateKeyFile = ""; + + RawFileKeyReader(String pubKeyFile, String privKeyFile) { + publicKeyFile = pubKeyFile; + privateKeyFile = privKeyFile; + } + + @Override + public EncryptionKeyInfo getPublicKey(String keyName, Map keyMeta) { + EncryptionKeyInfo keyInfo = new EncryptionKeyInfo(); + try { + keyInfo.setKey(Files.readAllBytes(Paths.get(publicKeyFile))); + } catch (IOException e) { + System.out.println("ERROR: Failed to read public key from file " + publicKeyFile); + e.printStackTrace(); + } + return keyInfo; + } + + @Override + public EncryptionKeyInfo getPrivateKey(String keyName, Map keyMeta) { + EncryptionKeyInfo keyInfo = new EncryptionKeyInfo(); + try { + keyInfo.setKey(Files.readAllBytes(Paths.get(privateKeyFile))); + } catch (IOException e) { + System.out.println("ERROR: Failed to read private key from file " + privateKeyFile); + e.printStackTrace(); + } + return keyInfo; + } +} + +PulsarClient pulsarClient = PulsarClient.builder().serviceUrl("pulsar://localhost:6650").build(); + +Producer producer = pulsarClient.newProducer() + .topic("persistent://my-tenant/my-ns/my-topic") + .addEncryptionKey("myappkey") + .cryptoKeyReader(new RawFileKeyReader("test_ecdsa_pubkey.pem", "test_ecdsa_privkey.pem")) + .create(); + +for (int i = 0; i < 10; i++) { + producer.send("my-message".getBytes()); +} + +producer.close(); +pulsarClient.close(); + +``` + +7. Sample Consumer Application: + +```java + +class RawFileKeyReader implements CryptoKeyReader { + + String publicKeyFile = ""; + String privateKeyFile = ""; + + RawFileKeyReader(String pubKeyFile, String privKeyFile) { + publicKeyFile = pubKeyFile; + privateKeyFile = privKeyFile; + } + + @Override + public EncryptionKeyInfo getPublicKey(String keyName, Map keyMeta) { + EncryptionKeyInfo keyInfo = new EncryptionKeyInfo(); + try { + keyInfo.setKey(Files.readAllBytes(Paths.get(publicKeyFile))); + } catch (IOException e) { + System.out.println("ERROR: Failed to read public key from file " + publicKeyFile); + e.printStackTrace(); + } + return keyInfo; + } + + @Override + public EncryptionKeyInfo getPrivateKey(String keyName, Map keyMeta) { + EncryptionKeyInfo keyInfo = new EncryptionKeyInfo(); + try { + keyInfo.setKey(Files.readAllBytes(Paths.get(privateKeyFile))); + } catch (IOException e) { + System.out.println("ERROR: Failed to read private key from file " + privateKeyFile); + e.printStackTrace(); + } + return keyInfo; + } +} + +PulsarClient pulsarClient = PulsarClient.builder().serviceUrl("pulsar://localhost:6650").build(); +Consumer consumer = pulsarClient.newConsumer() + .topic("persistent://my-tenant/my-ns/my-topic") + .subscriptionName("my-subscriber-name") + .cryptoKeyReader(new RawFileKeyReader("test_ecdsa_pubkey.pem", "test_ecdsa_privkey.pem")) + .subscribe(); +Message msg = null; + +for (int i = 0; i < 10; i++) { + msg = consumer.receive(); + // do something + System.out.println("Received: " + new String(msg.getData())); +} + +// Acknowledge the consumption of all messages at once +consumer.acknowledgeCumulative(msg); +consumer.close(); +pulsarClient.close(); + +``` + +## Key rotation +Pulsar generates a new AES data key every 4 hours or after publishing a certain number of messages. A producer fetches the asymmetric public key every 4 hours by calling CryptoKeyReader.getPublicKey() to retrieve the latest version. + +## Enable encryption at the producer application +If you produce messages that are consumed across application boundaries, you need to ensure that consumers in other applications have access to one of the private keys that can decrypt the messages. You can do this in two ways: +1. The consumer application provides you access to their public key, which you add to your producer keys. +2. You grant access to one of the private keys from the pairs that producer uses. + +When producers want to encrypt the messages with multiple keys, producers add all such keys to the config. Consumer can decrypt the message as long as the consumer has access to at least one of the keys. + +If you need to encrypt the messages using 2 keys (myapp.messagekey1 and myapp.messagekey2), refer to the following example. + +```java + +PulsarClient.newProducer().addEncryptionKey("myapp.messagekey1").addEncryptionKey("myapp.messagekey2"); + +``` + +## Decrypt encrypted messages at the consumer application +Consumers require access one of the private keys to decrypt messages that the producer produces. If you want to receive encrypted messages, create a public or private key and give your public key to the producer application to encrypt messages using your public key. + +## Handle failures +* Producer/ Consumer loses access to the key + * Producer action fails indicating the cause of the failure. Application has the option to proceed with sending unencrypted message in such cases. Call PulsarClient.newProducer().cryptoFailureAction(ProducerCryptoFailureAction) to control the producer behavior. The default behavior is to fail the request. + * If consumption fails due to decryption failure or missing keys in consumer, application has the option to consume the encrypted message or discard it. Call PulsarClient.newConsumer().cryptoFailureAction(ConsumerCryptoFailureAction) to control the consumer behavior. The default behavior is to fail the request. Application is never able to decrypt the messages if the private key is permanently lost. +* Batch messaging + * If decryption fails and the message contains batch messages, client is not able to retrieve individual messages in the batch, hence message consumption fails even if cryptoFailureAction() is set to ConsumerCryptoFailureAction.CONSUME. +* If decryption fails, the message consumption stops and application notices backlog growth in addition to decryption failure messages in the client log. If application does not have access to the private key to decrypt the message, the only option is to skip or discard backlogged messages. diff --git a/site2/website-next/versioned_docs/version-2.7.1/security-extending.md b/site2/website-next/versioned_docs/version-2.7.1/security-extending.md new file mode 100644 index 0000000000000..57128da01c25d --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/security-extending.md @@ -0,0 +1,211 @@ +--- +id: security-extending +title: Extending Authentication and Authorization in Pulsar +sidebar_label: "Extending" +original_id: security-extending +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +Pulsar provides a way to use custom authentication and authorization mechanisms. + +## Authentication + +Pulsar supports mutual TLS and Athenz authentication plugins. For how to use these authentication plugins, you can refer to the description in [Security](security-overview). + +You can use a custom authentication mechanism by providing the implementation in the form of two plugins. One plugin is for the Client library and the other plugin is for the Pulsar Proxy and/or Pulsar Broker to validate the credentials. + +### Client authentication plugin + +For the client library, you need to implement `org.apache.pulsar.client.api.Authentication`. By entering the command below you can pass this class when you create a Pulsar client: + +```java + +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar://localhost:6650") + .authentication(new MyAuthentication()) + .build(); + +``` + +You can use 2 interfaces to implement on the client side: + * `Authentication` -> http://pulsar.apache.org/api/client/org/apache/pulsar/client/api/Authentication.html + * `AuthenticationDataProvider` -> http://pulsar.apache.org/api/client/org/apache/pulsar/client/api/AuthenticationDataProvider.html + + +This in turn needs to provide the client credentials in the form of `org.apache.pulsar.client.api.AuthenticationDataProvider`. This leaves the chance to return different kinds of authentication token for different types of connection or by passing a certificate chain to use for TLS. + + +You can find examples for client authentication providers at: + + * Mutual TLS Auth -- https://github.com/apache/pulsar/tree/master/pulsar-client/src/main/java/org/apache/pulsar/client/impl/auth + * Athenz -- https://github.com/apache/pulsar/tree/master/pulsar-client-auth-athenz/src/main/java/org/apache/pulsar/client/impl/auth + +### Proxy/Broker authentication plugin + +On the proxy/broker side, you need to configure the corresponding plugin to validate the credentials that the client sends. The Proxy and Broker can support multiple authentication providers at the same time. + +In `conf/broker.conf` you can choose to specify a list of valid providers: + +```properties + +# Authentication provider name list, which is comma separated list of class names +authenticationProviders= + +``` + +To implement `org.apache.pulsar.broker.authentication.AuthenticationProvider` on one single interface: + +```java + +/** + * Provider of authentication mechanism + */ +public interface AuthenticationProvider extends Closeable { + + /** + * Perform initialization for the authentication provider + * + * @param config + * broker config object + * @throws IOException + * if the initialization fails + */ + void initialize(ServiceConfiguration config) throws IOException; + + /** + * @return the authentication method name supported by this provider + */ + String getAuthMethodName(); + + /** + * Validate the authentication for the given credentials with the specified authentication data + * + * @param authData + * provider specific authentication data + * @return the "role" string for the authenticated connection, if the authentication was successful + * @throws AuthenticationException + * if the credentials are not valid + */ + String authenticate(AuthenticationDataSource authData) throws AuthenticationException; + +} + +``` + +The following is the example for Broker authentication plugins: + + * Mutual TLS -- https://github.com/apache/pulsar/blob/master/pulsar-broker-common/src/main/java/org/apache/pulsar/broker/authentication/AuthenticationProviderTls.java + * Athenz -- https://github.com/apache/pulsar/blob/master/pulsar-broker-auth-athenz/src/main/java/org/apache/pulsar/broker/authentication/AuthenticationProviderAthenz.java + +## Authorization + +Authorization is the operation that checks whether a particular "role" or "principal" has permission to perform a certain operation. + +By default, you can use the embedded authorization provider provided by Pulsar. You can also configure a different authorization provider through a plugin. +Note that although the Authentication plugin is designed for use in both the Proxy and Broker, +the Authorization plugin is designed only for use on the Broker however the Proxy does perform some simple Authorization checks of Roles if authorization is enabled. + +To provide a custom provider, you need to implement the `org.apache.pulsar.broker.authorization.AuthorizationProvider` interface, put this class in the Pulsar broker classpath and configure the class in `conf/broker.conf`: + + ```properties + + # Authorization provider fully qualified class-name + authorizationProvider=org.apache.pulsar.broker.authorization.PulsarAuthorizationProvider + + ``` + +```java + +/** + * Provider of authorization mechanism + */ +public interface AuthorizationProvider extends Closeable { + + /** + * Perform initialization for the authorization provider + * + * @param conf + * broker config object + * @param configCache + * pulsar zk configuration cache service + * @throws IOException + * if the initialization fails + */ + void initialize(ServiceConfiguration conf, ConfigurationCacheService configCache) throws IOException; + + /** + * Check if the specified role has permission to send messages to the specified fully qualified topic name. + * + * @param topicName + * the fully qualified topic name associated with the topic. + * @param role + * the app id used to send messages to the topic. + */ + CompletableFuture canProduceAsync(TopicName topicName, String role, + AuthenticationDataSource authenticationData); + + /** + * Check if the specified role has permission to receive messages from the specified fully qualified topic name. + * + * @param topicName + * the fully qualified topic name associated with the topic. + * @param role + * the app id used to receive messages from the topic. + * @param subscription + * the subscription name defined by the client + */ + CompletableFuture canConsumeAsync(TopicName topicName, String role, + AuthenticationDataSource authenticationData, String subscription); + + /** + * Check whether the specified role can perform a lookup for the specified topic. + * + * For that the caller needs to have producer or consumer permission. + * + * @param topicName + * @param role + * @return + * @throws Exception + */ + CompletableFuture canLookupAsync(TopicName topicName, String role, + AuthenticationDataSource authenticationData); + + /** + * + * Grant authorization-action permission on a namespace to the given client + * + * @param namespace + * @param actions + * @param role + * @param authDataJson + * additional authdata in json format + * @return CompletableFuture + * @completesWith
+ * IllegalArgumentException when namespace not found
+ * IllegalStateException when failed to grant permission + */ + CompletableFuture grantPermissionAsync(NamespaceName namespace, Set actions, String role, + String authDataJson); + + /** + * Grant authorization-action permission on a topic to the given client + * + * @param topicName + * @param role + * @param authDataJson + * additional authdata in json format + * @return CompletableFuture + * @completesWith
+ * IllegalArgumentException when namespace not found
+ * IllegalStateException when failed to grant permission + */ + CompletableFuture grantPermissionAsync(TopicName topicName, Set actions, String role, + String authDataJson); + +} + +``` + diff --git a/site2/website-next/versioned_docs/version-2.7.1/security-jwt.md b/site2/website-next/versioned_docs/version-2.7.1/security-jwt.md new file mode 100644 index 0000000000000..006c6642c8b36 --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/security-jwt.md @@ -0,0 +1,344 @@ +--- +id: security-jwt +title: Client authentication using tokens based on JSON Web Tokens +sidebar_label: "Authentication using JWT" +original_id: security-jwt +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +## Token authentication overview + +Pulsar supports authenticating clients using security tokens that are based on [JSON Web Tokens](https://jwt.io/introduction/) ([RFC-7519](https://tools.ietf.org/html/rfc7519)). + +You can use tokens to identify a Pulsar client and associate with some "principal" (or "role") that +is permitted to do some actions (eg: publish to a topic or consume from a topic). + +A user typically gets a token string from the administrator (or some automated service). + +The compact representation of a signed JWT is a string that looks like as the following: + +``` + +eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJKb2UifQ.ipevRNuRP6HflG8cFKnmUPtypruRC4fb1DWtoLL62SY + +``` + +Application specifies the token when you create the client instance. An alternative is to pass a "token supplier" (a function that returns the token when the client library needs one). + +> #### Always use TLS transport encryption +> Sending a token is equivalent to sending a password over the wire. You had better use TLS encryption all the time when you connect to the Pulsar service. See +> [Transport Encryption using TLS](security-tls-transport) for more details. + +### CLI Tools + +[Command-line tools](reference-cli-tools.md) like [`pulsar-admin`](reference-pulsar-admin), [`pulsar-perf`](reference-cli-tools.md#pulsar-perf), and [`pulsar-client`](reference-cli-tools.md#pulsar-client) use the `conf/client.conf` config file in a Pulsar installation. + +You need to add the following parameters to that file to use the token authentication with CLI tools of Pulsar: + +```properties + +webServiceUrl=http://broker.example.com:8080/ +brokerServiceUrl=pulsar://broker.example.com:6650/ +authPlugin=org.apache.pulsar.client.impl.auth.AuthenticationToken +authParams=token:eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJKb2UifQ.ipevRNuRP6HflG8cFKnmUPtypruRC4fb1DWtoLL62SY + +``` + +The token string can also be read from a file, for example: + +``` + +authParams=file:///path/to/token/file + +``` + +### Pulsar client + +You can use tokens to authenticate the following Pulsar clients. + + + + +```java + +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar://broker.example.com:6650/") + .authentication( + AuthenticationFactory.token("eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJKb2UifQ.ipevRNuRP6HflG8cFKnmUPtypruRC4fb1DWtoLL62SY")) + .build(); + +``` + +Similarly, you can also pass a `Supplier`: + +```java + +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar://broker.example.com:6650/") + .authentication( + AuthenticationFactory.token(() -> { + // Read token from custom source + return readToken(); + })) + .build(); + +``` + + + + +```python + +from pulsar import Client, AuthenticationToken + +client = Client('pulsar://broker.example.com:6650/' + authentication=AuthenticationToken('eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJKb2UifQ.ipevRNuRP6HflG8cFKnmUPtypruRC4fb1DWtoLL62SY')) + +``` + +Alternatively, you can also pass a `Supplier`: + +```python + +def read_token(): + with open('/path/to/token.txt') as tf: + return tf.read().strip() + +client = Client('pulsar://broker.example.com:6650/' + authentication=AuthenticationToken(read_token)) + +``` + + + + +```go + +client, err := NewClient(ClientOptions{ + URL: "pulsar://localhost:6650", + Authentication: NewAuthenticationToken("eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJKb2UifQ.ipevRNuRP6HflG8cFKnmUPtypruRC4fb1DWtoLL62SY"), +}) + +``` + +Similarly, you can also pass a `Supplier`: + +```go + +client, err := NewClient(ClientOptions{ + URL: "pulsar://localhost:6650", + Authentication: NewAuthenticationTokenSupplier(func () string { + // Read token from custom source + return readToken() + }), +}) + +``` + + + + +```c++ + +#include + +pulsar::ClientConfiguration config; +config.setAuth(pulsar::AuthToken::createWithToken("eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJKb2UifQ.ipevRNuRP6HflG8cFKnmUPtypruRC4fb1DWtoLL62SY")); + +pulsar::Client client("pulsar://broker.example.com:6650/", config); + +``` + + + + +```c# + +var client = PulsarClient.Builder() + .AuthenticateUsingToken("eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJKb2UifQ.ipevRNuRP6HflG8cFKnmUPtypruRC4fb1DWtoLL62SY") + .Build(); + +``` + + + + + +## Enable token authentication + +On how to enable token authentication on a Pulsar cluster, you can refer to the guide below. + +JWT supports two different kinds of keys in order to generate and validate the tokens: + + * Symmetric : + - You can use a single ***Secret*** key to generate and validate tokens. + * Asymmetric: A pair of keys consists of the Private key and the Public key. + - You can use ***Private*** key to generate tokens. + - You can use ***Public*** key to validate tokens. + +### Create a secret key + +When you use a secret key, the administrator creates the key and uses the key to generate the client tokens. You can also configure this key to brokers in order to validate the clients. + +Output file is generated in the root of your Pulsar installation directory. You can also provide absolute path for the output file using the command below. + +```shell + +$ bin/pulsar tokens create-secret-key --output my-secret.key + +``` + +Enter this command to generate base64 encoded private key. + +```shell + +$ bin/pulsar tokens create-secret-key --output /opt/my-secret.key --base64 + +``` + +### Create a key pair + +With Public and Private keys, you need to create a pair of keys. Pulsar supports all algorithms that the Java JWT library (shown [here](https://github.com/jwtk/jjwt#signature-algorithms-keys)) supports. + +Output file is generated in the root of your Pulsar installation directory. You can also provide absolute path for the output file using the command below. + +```shell + +$ bin/pulsar tokens create-key-pair --output-private-key my-private.key --output-public-key my-public.key + +``` + + * Store `my-private.key` in a safe location and only administrator can use `my-private.key` to generate new tokens. + * `my-public.key` is distributed to all Pulsar brokers. You can publicly share this file without any security concern. + +### Generate tokens + +A token is the credential associated with a user. The association is done through the "principal" or "role". In the case of JWT tokens, this field is typically referred as **subject**, though they are exactly the same concept. + +Then, you need to use this command to require the generated token to have a **subject** field set. + +```shell + +$ bin/pulsar tokens create --secret-key file:///path/to/my-secret.key \ + --subject test-user + +``` + +This command prints the token string on stdout. + +Similarly, you can create a token by passing the "private" key using the command below: + +```shell + +$ bin/pulsar tokens create --private-key file:///path/to/my-private.key \ + --subject test-user + +``` + +Finally, you can enter the following command to create a token with a pre-defined TTL. And then the token is automatically invalidated. + +```shell + +$ bin/pulsar tokens create --secret-key file:///path/to/my-secret.key \ + --subject test-user \ + --expiry-time 1y + +``` + +### Authorization + +The token itself does not have any permission associated. The authorization engine determines whether the token should have permissions or not. Once you have created the token, you can grant permission for this token to do certain actions. The following is an example. + +```shell + +$ bin/pulsar-admin namespaces grant-permission my-tenant/my-namespace \ + --role test-user \ + --actions produce,consume + +``` + +### Enable token authentication on Brokers + +To configure brokers to authenticate clients, add the following parameters to `broker.conf`: + +```properties + +# Configuration to enable authentication and authorization +authenticationEnabled=true +authorizationEnabled=true +authenticationProviders=org.apache.pulsar.broker.authentication.AuthenticationProviderToken + +# Authentication settings of the broker itself. Used when the broker connects to other brokers, either in same or other clusters +brokerClientTlsEnabled=true +brokerClientAuthenticationPlugin=org.apache.pulsar.client.impl.auth.AuthenticationToken +brokerClientAuthenticationParameters={"token":"eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ0ZXN0LXVzZXIifQ.9OHgE9ZUDeBTZs7nSMEFIuGNEX18FLR3qvy8mqxSxXw"} +# Or, alternatively, read token from file +# brokerClientAuthenticationParameters={"file":"///path/to/proxy-token.txt"} +brokerClientTrustCertsFilePath=/path/my-ca/certs/ca.cert.pem + +# If this flag is set then the broker authenticates the original Auth data +# else it just accepts the originalPrincipal and authorizes it (if required). +authenticateOriginalAuthData=true + +# If using secret key +tokenSecretKey=file:///path/to/secret.key +# The key can also be passed inline: +# tokenSecretKey=data:;base64,FLFyW0oLJ2Fi22KKCm21J18mbAdztfSHN/lAT5ucEKU= + +# If using public/private +# tokenPublicKey=file:///path/to/public.key + +``` + +### Enable token authentication on Proxies + +To configure proxies to authenticate clients, add the following parameters to `proxy.conf`: + +The proxy uses its own token when connecting to brokers. You need to configure the role token for this key pair in the `proxyRoles` of the brokers. For more details, see the [authorization guide](security-authorization). + +```properties + +# For clients connecting to the proxy +authenticationEnabled=true +authorizationEnabled=true +authenticationProviders=org.apache.pulsar.broker.authentication.AuthenticationProviderToken +tokenSecretKey=file:///path/to/secret.key + +# For the proxy to connect to brokers +brokerClientAuthenticationPlugin=org.apache.pulsar.client.impl.auth.AuthenticationToken +brokerClientAuthenticationParameters={"token":"eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ0ZXN0LXVzZXIifQ.9OHgE9ZUDeBTZs7nSMEFIuGNEX18FLR3qvy8mqxSxXw"} +# Or, alternatively, read token from file +# brokerClientAuthenticationParameters={"file":"///path/to/proxy-token.txt"} + +# Whether client authorization credentials are forwarded to the broker for re-authorization. +# Authentication must be enabled via authenticationEnabled=true for this to take effect. +forwardAuthorizationCredentials=true + +``` + diff --git a/site2/website-next/versioned_docs/version-2.7.1/security-kerberos.md b/site2/website-next/versioned_docs/version-2.7.1/security-kerberos.md new file mode 100644 index 0000000000000..32e12f1b6988f --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/security-kerberos.md @@ -0,0 +1,447 @@ +--- +id: security-kerberos +title: Authentication using Kerberos +sidebar_label: "Authentication using Kerberos" +original_id: security-kerberos +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +[Kerberos](https://web.mit.edu/kerberos/) is a network authentication protocol. By using secret-key cryptography, [Kerberos](https://web.mit.edu/kerberos/) is designed to provide strong authentication for client applications and server applications. + +In Pulsar, you can use Kerberos with [SASL](https://en.wikipedia.org/wiki/Simple_Authentication_and_Security_Layer) as a choice for authentication. And Pulsar uses the [Java Authentication and Authorization Service (JAAS)](https://en.wikipedia.org/wiki/Java_Authentication_and_Authorization_Service) for SASL configuration. You need to provide JAAS configurations for Kerberos authentication. + +This document introduces how to configure `Kerberos` with `SASL` between Pulsar clients and brokers and how to configure Kerberos for Pulsar proxy in detail. + +## Configuration for Kerberos between Client and Broker + +### Prerequisites + +To begin, you need to set up (or already have) a [Key Distribution Center(KDC)](https://en.wikipedia.org/wiki/Key_distribution_center). Also you need to configure and run the [Key Distribution Center(KDC)](https://en.wikipedia.org/wiki/Key_distribution_center)in advance. + +If your organization already uses a Kerberos server (for example, by using `Active Directory`), you do not have to install a new server for Pulsar. If your organization does not use a Kerberos server, you need to install one. Your Linux vendor might have packages for `Kerberos`. On how to install and configure Kerberos, refer to [Ubuntu](https://help.ubuntu.com/community/Kerberos), +[Redhat](https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Managing_Smart_Cards/installing-kerberos.html). + +Note that if you use Oracle Java, you need to download JCE policy files for your Java version and copy them to the `$JAVA_HOME/jre/lib/security` directory. + +#### Kerberos principals + +If you use the existing Kerberos system, ask your Kerberos administrator for a principal for each Brokers in your cluster and for every operating system user that accesses Pulsar with Kerberos authentication(via clients and tools). + +If you have installed your own Kerberos system, you can create these principals with the following commands: + +```shell + +### add Principals for broker +sudo /usr/sbin/kadmin.local -q 'addprinc -randkey broker/{hostname}@{REALM}' +sudo /usr/sbin/kadmin.local -q "ktadd -k /etc/security/keytabs/{broker-keytabname}.keytab broker/{hostname}@{REALM}" +### add Principals for client +sudo /usr/sbin/kadmin.local -q 'addprinc -randkey client/{hostname}@{REALM}' +sudo /usr/sbin/kadmin.local -q "ktadd -k /etc/security/keytabs/{client-keytabname}.keytab client/{hostname}@{REALM}" + +``` + +Note that *Kerberos* requires that all your hosts can be resolved with their FQDNs. + +The first part of Broker principal (for example, `broker` in `broker/{hostname}@{REALM}`) is the `serverType` of each host. The suggested values of `serverType` are `broker` (host machine runs service Pulsar Broker) and `proxy` (host machine runs service Pulsar Proxy). + +#### Configure how to connect to KDC + +You need to enter the command below to specify the path to the `krb5.conf` file for the client side and the broker side. The content of `krb5.conf` file indicates the default Realm and KDC information. See [JDK’s Kerberos Requirements](https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html) for more details. + +```shell + +-Djava.security.krb5.conf=/etc/pulsar/krb5.conf + +``` + +Here is an example of the krb5.conf file: + +In the configuration file, `EXAMPLE.COM` is the default realm; `kdc = localhost:62037` is the kdc server url for realm `EXAMPLE.COM `: + +``` + +[libdefaults] + default_realm = EXAMPLE.COM + +[realms] + EXAMPLE.COM = { + kdc = localhost:62037 + } + +``` + +Usually machines configured with kerberos already have a system wide configuration and this configuration is optional. + +#### JAAS configuration file + +You need JAAS configuration file for the client side and the broker side. JAAS configuration file provides the section of information that is used to connect KDC. Here is an example named `pulsar_jaas.conf`: + +``` + + PulsarBroker { + com.sun.security.auth.module.Krb5LoginModule required + useKeyTab=true + storeKey=true + useTicketCache=false + keyTab="/etc/security/keytabs/pulsarbroker.keytab" + principal="broker/localhost@EXAMPLE.COM"; +}; + + PulsarClient { + com.sun.security.auth.module.Krb5LoginModule required + useKeyTab=true + storeKey=true + useTicketCache=false + keyTab="/etc/security/keytabs/pulsarclient.keytab" + principal="client/localhost@EXAMPLE.COM"; +}; + +``` + +You need to set the `JAAS` configuration file path as JVM parameter for client and broker. For example: + +```shell + + -Djava.security.auth.login.config=/etc/pulsar/pulsar_jaas.conf + +``` + +In the `pulsar_jaas.conf` file above + +1. `PulsarBroker` is a section name in the JAAS file that each broker uses. This section tells the broker to use which principal inside Kerberos and the location of the keytab where the principal is stored. `PulsarBroker` allows the broker to use the keytab specified in this section. +2. `PulsarClient` is a section name in the JASS file that each broker uses. This section tells the client to use which principal inside Kerberos and the location of the keytab where the principal is stored. `PulsarClient` allows the client to use the keytab specified in this section. + The following example also reuses this `PulsarClient` section in both the Pulsar internal admin configuration and in CLI command of `bin/pulsar-client`, `bin/pulsar-perf` and `bin/pulsar-admin`. You can also add different sections for different use cases. + +You can have 2 separate JAAS configuration files: +* the file for a broker that has sections of both `PulsarBroker` and `PulsarClient`; +* the file for a client that only has a `PulsarClient` section. + + +### Kerberos configuration for Brokers + +#### Configure the `broker.conf` file + + In the `broker.conf` file, set Kerberos related configurations. + + - Set `authenticationEnabled` to `true`; + - Set `authenticationProviders` to choose `AuthenticationProviderSasl`; + - Set `saslJaasClientAllowedIds` regex for principal that is allowed to connect to broker; + - Set `saslJaasBrokerSectionName` that corresponds to the section in JAAS configuration file for broker; + + To make Pulsar internal admin client work properly, you need to set the configuration in the `broker.conf` file as below: + - Set `brokerClientAuthenticationPlugin` to client plugin `AuthenticationSasl`; + - Set `brokerClientAuthenticationParameters` to value in JSON string `{"saslJaasClientSectionName":"PulsarClient", "serverType":"broker"}`, in which `PulsarClient` is the section name in the `pulsar_jaas.conf` file, and `"serverType":"broker"` indicates that the internal admin client connects to a Pulsar Broker; + + Here is an example: + +``` + +authenticationEnabled=true +authenticationProviders=org.apache.pulsar.broker.authentication.AuthenticationProviderSasl +saslJaasClientAllowedIds=.*client.* +saslJaasBrokerSectionName=PulsarBroker + +## Authentication settings of the broker itself. Used when the broker connects to other brokers +brokerClientAuthenticationPlugin=org.apache.pulsar.client.impl.auth.AuthenticationSasl +brokerClientAuthenticationParameters={"saslJaasClientSectionName":"PulsarClient", "serverType":"broker"} + +``` + +#### Set Broker JVM parameter + + Set JVM parameters for JAAS configuration file and krb5 configuration file with additional options. + +```shell + + -Djava.security.auth.login.config=/etc/pulsar/pulsar_jaas.conf -Djava.security.krb5.conf=/etc/pulsar/krb5.conf + +``` + +You can add this at the end of `PULSAR_EXTRA_OPTS` in the file [`pulsar_env.sh`](https://github.com/apache/pulsar/blob/master/conf/pulsar_env.sh) + +You must ensure that the operating system user who starts broker can reach the keytabs configured in the `pulsar_jaas.conf` file and kdc server in the `krb5.conf` file. + +### Kerberos configuration for clients + +#### Java Client and Java Admin Client + +In client application, include `pulsar-client-auth-sasl` in your project dependency. + +``` + + + org.apache.pulsar + pulsar-client-auth-sasl + ${pulsar.version} + + +``` + +Configure the authentication type to use `AuthenticationSasl`, and also provide the authentication parameters to it. + +You need 2 parameters: +- `saslJaasClientSectionName`. This parameter corresponds to the section in JAAS configuration file for client; +- `serverType`. This parameter stands for whether this client connects to broker or proxy. And client uses this parameter to know which server side principal should be used. + +When you authenticate between client and broker with the setting in above JAAS configuration file, we need to set `saslJaasClientSectionName` to `PulsarClient` and set `serverType` to `broker`. + +The following is an example of creating a Java client: + + ```java + + System.setProperty("java.security.auth.login.config", "/etc/pulsar/pulsar_jaas.conf"); + System.setProperty("java.security.krb5.conf", "/etc/pulsar/krb5.conf"); + + Map authParams = Maps.newHashMap(); + authParams.put("saslJaasClientSectionName", "PulsarClient"); + authParams.put("serverType", "broker"); + + Authentication saslAuth = AuthenticationFactory + .create(org.apache.pulsar.client.impl.auth.AuthenticationSasl.class.getName(), authParams); + + PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar://my-broker.com:6650") + .authentication(saslAuth) + .build(); + + ``` + +> The first two lines in the example above are hard coded, alternatively, you can set additional JVM parameters for JAAS and krb5 configuration file when you run the application like below: + +``` + +java -cp -Djava.security.auth.login.config=/etc/pulsar/pulsar_jaas.conf -Djava.security.krb5.conf=/etc/pulsar/krb5.conf $APP-jar-with-dependencies.jar $CLASSNAME + +``` + +You must ensure that the operating system user who starts pulsar client can reach the keytabs configured in the `pulsar_jaas.conf` file and kdc server in the `krb5.conf` file. + +#### Configure CLI tools + +If you use a command-line tool (such as `bin/pulsar-client`, `bin/pulsar-perf` and `bin/pulsar-admin`), you need to perform the following steps: + +Step 1. Enter the command below to configure your `client.conf`. + +```shell + +authPlugin=org.apache.pulsar.client.impl.auth.AuthenticationSasl +authParams={"saslJaasClientSectionName":"PulsarClient", "serverType":"broker"} + +``` + +Step 2. Enter the command below to set JVM parameters for JAAS configuration file and krb5 configuration file with additional options. + +```shell + + -Djava.security.auth.login.config=/etc/pulsar/pulsar_jaas.conf -Djava.security.krb5.conf=/etc/pulsar/krb5.conf + +``` + +You can add this at the end of `PULSAR_EXTRA_OPTS` in the file [`pulsar_tools_env.sh`](https://github.com/apache/pulsar/blob/master/conf/pulsar_tools_env.sh), +or add this line `OPTS="$OPTS -Djava.security.auth.login.config=/etc/pulsar/pulsar_jaas.conf -Djava.security.krb5.conf=/etc/pulsar/krb5.conf "` directly to the CLI tool script. + +The meaning of configurations is the same as the meaning of configurations in Java client section. + +## Kerberos configuration for working with Pulsar Proxy + +With the above configuration, client and broker can do authentication using Kerberos. + +A client that connects to Pulsar Proxy is a little different. Pulsar Proxy (as a SASL Server in Kerberos) authenticates Client (as a SASL client in Kerberos) first; and then Pulsar broker authenticates Pulsar Proxy. + +Now in comparison with the above configuration between client and broker, we show you how to configure Pulsar Proxy as follows. + +### Create principal for Pulsar Proxy in Kerberos + +You need to add new principals for Pulsar Proxy comparing with the above configuration. If you already have principals for client and broker, you only need to add the proxy principal here. + +```shell + +### add Principals for Pulsar Proxy +sudo /usr/sbin/kadmin.local -q 'addprinc -randkey proxy/{hostname}@{REALM}' +sudo /usr/sbin/kadmin.local -q "ktadd -k /etc/security/keytabs/{proxy-keytabname}.keytab proxy/{hostname}@{REALM}" +### add Principals for broker +sudo /usr/sbin/kadmin.local -q 'addprinc -randkey broker/{hostname}@{REALM}' +sudo /usr/sbin/kadmin.local -q "ktadd -k /etc/security/keytabs/{broker-keytabname}.keytab broker/{hostname}@{REALM}" +### add Principals for client +sudo /usr/sbin/kadmin.local -q 'addprinc -randkey client/{hostname}@{REALM}' +sudo /usr/sbin/kadmin.local -q "ktadd -k /etc/security/keytabs/{client-keytabname}.keytab client/{hostname}@{REALM}" + +``` + +### Add a section in JAAS configuration file for Pulsar Proxy + +In comparison with the above configuration, add a new section for Pulsar Proxy in JAAS configuration file. + +Here is an example named `pulsar_jaas.conf`: + +``` + + PulsarBroker { + com.sun.security.auth.module.Krb5LoginModule required + useKeyTab=true + storeKey=true + useTicketCache=false + keyTab="/etc/security/keytabs/pulsarbroker.keytab" + principal="broker/localhost@EXAMPLE.COM"; +}; + + PulsarProxy { + com.sun.security.auth.module.Krb5LoginModule required + useKeyTab=true + storeKey=true + useTicketCache=false + keyTab="/etc/security/keytabs/pulsarproxy.keytab" + principal="proxy/localhost@EXAMPLE.COM"; +}; + + PulsarClient { + com.sun.security.auth.module.Krb5LoginModule required + useKeyTab=true + storeKey=true + useTicketCache=false + keyTab="/etc/security/keytabs/pulsarclient.keytab" + principal="client/localhost@EXAMPLE.COM"; +}; + +``` + +### Proxy client configuration + +Pulsar client configuration is similar with client and broker configuration, except that you need to set `serverType` to `proxy` instead of `broker`, for the reason that you need to do the Kerberos authentication between client and proxy. + + ```java + + System.setProperty("java.security.auth.login.config", "/etc/pulsar/pulsar_jaas.conf"); + System.setProperty("java.security.krb5.conf", "/etc/pulsar/krb5.conf"); + + Map authParams = Maps.newHashMap(); + authParams.put("saslJaasClientSectionName", "PulsarClient"); + authParams.put("serverType", "proxy"); // ** here is the different ** + + Authentication saslAuth = AuthenticationFactory + .create(org.apache.pulsar.client.impl.auth.AuthenticationSasl.class.getName(), authParams); + + PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar://my-broker.com:6650") + .authentication(saslAuth) + .build(); + + ``` + +> The first two lines in the example above are hard coded, alternatively, you can set additional JVM parameters for JAAS and krb5 configuration file when you run the application like below: + +``` + +java -cp -Djava.security.auth.login.config=/etc/pulsar/pulsar_jaas.conf -Djava.security.krb5.conf=/etc/pulsar/krb5.conf $APP-jar-with-dependencies.jar $CLASSNAME + +``` + +### Kerberos configuration for Pulsar proxy service + +In the `proxy.conf` file, set Kerberos related configuration. Here is an example: + +```shell + +## related to authenticate client. +authenticationEnabled=true +authenticationProviders=org.apache.pulsar.broker.authentication.AuthenticationProviderSasl +saslJaasClientAllowedIds=.*client.* +saslJaasBrokerSectionName=PulsarProxy + +## related to be authenticated by broker +brokerClientAuthenticationPlugin=org.apache.pulsar.client.impl.auth.AuthenticationSasl +brokerClientAuthenticationParameters={"saslJaasClientSectionName":"PulsarProxy", "serverType":"broker"} +forwardAuthorizationCredentials=true + +``` + +The first part relates to authenticating between client and Pulsar Proxy. In this phase, client works as SASL client, while Pulsar Proxy works as SASL server. + +The second part relates to authenticating between Pulsar Proxy and Pulsar Broker. In this phase, Pulsar Proxy works as SASL client, while Pulsar Broker works as SASL server. + +### Broker side configuration. + +The broker side configuration file is the same with the above `broker.conf`, you do not need special configuration for Pulsar Proxy. + +``` + +authenticationEnabled=true +authenticationProviders=org.apache.pulsar.broker.authentication.AuthenticationProviderSasl +saslJaasClientAllowedIds=.*client.* +saslJaasBrokerSectionName=PulsarBroker + +``` + +## Regarding authorization and role token + +For Kerberos authentication, we usually use the authenticated principal as the role token for Pulsar authorization. For more information of authorization in Pulsar, see [security authorization](security-authorization). + +If you enable 'authorizationEnabled', you need to set `superUserRoles` in `broker.conf` that corresponds to the name registered in kdc. + +For example: + +```bash + +superUserRoles=client/{clientIp}@EXAMPLE.COM + +``` + +## Regarding authentication between ZooKeeper and Broker + +Pulsar Broker acts as a Kerberos client when you authenticate with Zookeeper. According to [ZooKeeper document](https://cwiki.apache.org/confluence/display/ZOOKEEPER/Client-Server+mutual+authentication), you need these settings in `conf/zookeeper.conf`: + +``` + +authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider +requireClientAuthScheme=sasl + +``` + +Enter the following commands to add a section of `Client` configurations in the file `pulsar_jaas.conf`, which Pulsar Broker uses: + +``` + + Client { + com.sun.security.auth.module.Krb5LoginModule required + useKeyTab=true + storeKey=true + useTicketCache=false + keyTab="/etc/security/keytabs/pulsarbroker.keytab" + principal="broker/localhost@EXAMPLE.COM"; +}; + +``` + +In this setting, the principal of Pulsar Broker and keyTab file indicates the role of Broker when you authenticate with ZooKeeper. + +## Regarding authentication between BookKeeper and Broker + +Pulsar Broker acts as a Kerberos client when you authenticate with Bookie. According to [BookKeeper document](http://bookkeeper.apache.org/docs/latest/security/sasl/), you need to add `bookkeeperClientAuthenticationPlugin` parameter in `broker.conf`: + +``` + +bookkeeperClientAuthenticationPlugin=org.apache.bookkeeper.sasl.SASLClientProviderFactory + +``` + +In this setting, `SASLClientProviderFactory` creates a BookKeeper SASL client in a Broker, and the Broker uses the created SASL client to authenticate with a Bookie node. + +Enter the following commands to add a section of `BookKeeper` configurations in the `pulsar_jaas.conf` that Pulsar Broker uses: + +``` + + BookKeeper { + com.sun.security.auth.module.Krb5LoginModule required + useKeyTab=true + storeKey=true + useTicketCache=false + keyTab="/etc/security/keytabs/pulsarbroker.keytab" + principal="broker/localhost@EXAMPLE.COM"; +}; + +``` + +In this setting, the principal of Pulsar Broker and keyTab file indicates the role of Broker when you authenticate with Bookie. diff --git a/site2/website-next/versioned_docs/version-2.7.1/security-oauth2.md b/site2/website-next/versioned_docs/version-2.7.1/security-oauth2.md new file mode 100644 index 0000000000000..3596302d59834 --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/security-oauth2.md @@ -0,0 +1,231 @@ +--- +id: security-oauth2 +title: Client authentication using OAuth 2.0 access tokens +sidebar_label: "Authentication using OAuth 2.0 access tokens" +original_id: security-oauth2 +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +Pulsar supports authenticating clients using OAuth 2.0 access tokens. You can use OAuth 2.0 access tokens to identify a Pulsar client and associate the Pulsar client with some "principal" (or "role"), which is permitted to do some actions, such as publishing messages to a topic or consume messages from a topic. + +This module is used to support the Pulsar client authentication plugin for OAuth 2.0. After communicating with the Oauth 2.0 server, the Pulsar client gets an `access token` from the Oauth 2.0 server, and passes this `access token` to the Pulsar broker to do the authentication. The broker can use the `org.apache.pulsar.broker.authentication.AuthenticationProviderToken`. Or, you can add your own `AuthenticationProvider` to make it with this module. + +## Authentication provider configuration + +This library allows you to authenticate the Pulsar client by using an access token that is obtained from an OAuth 2.0 authorization service, which acts as a _token issuer_. + +### Authentication types + +The authentication type determines how to obtain an access token through an OAuth 2.0 authorization flow. + +#### Note +> Currently, the Pulsar Java client only supports the `client_credentials` authentication type . + +#### Client credentials + +The following table lists parameters supported for the `client credentials` authentication type. + +| Parameter | Description | Example | Required or not | +| --- | --- | --- | --- | +| `type` | Oauth 2.0 authentication type. | `client_credentials` (default) | Optional | +| `issuerUrl` | URL of the authentication provider which allows the Pulsar client to obtain an access token | `https://accounts.google.com` | Required | +| `privateKey` | URL to a JSON credentials file | Support the following pattern formats:
  • `file:///path/to/file`
  • `file:/path/to/file`
  • `data:application/json;base64,`
  • | Required | +| `audience` | An OAuth 2.0 "resource server" identifier for the Pulsar cluster | `https://broker.example.com` | Required | + +The credentials file contains service account credentials used with the client authentication type. The following shows an example of a credentials file `credentials_file.json`. + +```json + +{ + "type": "client_credentials", + "client_id": "d9ZyX97q1ef8Cr81WHVC4hFQ64vSlDK3", + "client_secret": "on1uJ...k6F6R", + "client_email": "1234567890-abcdefghijklmnopqrstuvwxyz@developer.gserviceaccount.com", + "issuer_url": "https://accounts.google.com" +} + +``` + +In the above example, the authentication type is set to `client_credentials` by default. And the fields "client_id" and "client_secret" are required. + +### Typical original OAuth2 request mapping + +The following shows a typical original OAuth2 request, which is used to obtain the access token from the OAuth2 server. + +```bash + +curl --request POST \ + --url https://dev-kt-aa9ne.us.auth0.com \ + --header 'content-type: application/json' \ + --data '{ + "client_id":"Xd23RHsUnvUlP7wchjNYOaIfazgeHd9x", + "client_secret":"rT7ps7WY8uhdVuBTKWZkttwLdQotmdEliaM5rLfmgNibvqziZ-g07ZH52N_poGAb", + "audience":"https://dev-kt-aa9ne.us.auth0.com/api/v2/", + "grant_type":"client_credentials"}' + +``` + +In the above example, the mapping relationship is shown as below. + +- The `issuerUrl` parameter in this plugin is mapped to `--url https://dev-kt-aa9ne.us.auth0.com`. +- The `privateKey` file parameter in this plugin should at least contains the `client_id` and `client_secret` fields. +- The `audience` parameter in this plugin is mapped to `"audience":"https://dev-kt-aa9ne.us.auth0.com/api/v2/"`. + +## Client Configuration + +You can use the OAuth2 authentication provider with the following Pulsar clients. + +### Java + +You can use the factory method to configure authentication for Pulsar Java client. + +```java + +String issuerUrl = "https://dev-kt-aa9ne.us.auth0.com"; +String credentialsUrl = "file:///path/to/KeyFile.json"; +String audience = "https://dev-kt-aa9ne.us.auth0.com/api/v2/"; + +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar://broker.example.com:6650/") + .authentication( + AuthenticationFactoryOAuth2.clientCredentials(issuerUrl, credentialsUrl, audience)) + .build(); + +``` + +In addition, you can also use the encoded parameters to configure authentication for Pulsar Java client. + +```java + +Authentication auth = AuthenticationFactory + .create(AuthenticationOAuth2.class.getName(), "{"type":"client_credentials","privateKey":"./key/path/..","issuerUrl":"...","audience":"..."}"); +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar://broker.example.com:6650/") + .authentication(auth) + .build(); + +``` + +### C++ client + +The C++ client is similar to the Java client. You need to provide parameters of `issuerUrl`, `private_key` (the credentials file path), and the audience. + +```c++ + +#include + +pulsar::ClientConfiguration config; +std::string params = R"({ + "issuer_url": "https://dev-kt-aa9ne.us.auth0.com", + "private_key": "../../pulsar-broker/src/test/resources/authentication/token/cpp_credentials_file.json", + "audience": "https://dev-kt-aa9ne.us.auth0.com/api/v2/"})"; + +config.setAuth(pulsar::AuthOauth2::create(params)); + +pulsar::Client client("pulsar://broker.example.com:6650/", config); + +``` + +### Go client + +To enable OAuth2 authentication in Go client, you need to configure OAuth2 authentication. +This example shows how to configure OAuth2 authentication in Go client. + +```go + +oauth := pulsar.NewAuthenticationOAuth2(map[string]string{ + "type": "client_credentials", + "issuerUrl": "https://dev-kt-aa9ne.us.auth0.com", + "audience": "https://dev-kt-aa9ne.us.auth0.com/api/v2/", + "privateKey": "/path/to/privateKey", + "clientId": "0Xx...Yyxeny", + }) +client, err := pulsar.NewClient(pulsar.ClientOptions{ + URL: "pulsar://my-cluster:6650", + Authentication: oauth, +}) + +``` + +### Python client + +To enable OAuth2 authentication in Python client, you need to configure OAuth2 authentication. +This example shows how to configure OAuth2 authentication in Python client. + +```python + +from pulsar import Client, AuthenticationOauth2 + +params = ''' +{ + "issuer_url": "https://dev-kt-aa9ne.us.auth0.com", + "private_key": "/path/to/privateKey", + "audience": "https://dev-kt-aa9ne.us.auth0.com/api/v2/" +} +''' + +client = Client("pulsar://my-cluster:6650", authentication=AuthenticationOauth2(params)) + +``` + +## CLI configuration + +This section describes how to use Pulsar CLI tools to connect a cluster through OAuth2 authentication plugin. + +### pulsar-admin + +This example shows how to use pulsar-admin to connect to a cluster through OAuth2 authentication plugin. + +```shell script + +bin/pulsar-admin --admin-url https://streamnative.cloud:443 \ +--auth-plugin org.apache.pulsar.client.impl.auth.oauth2.AuthenticationOAuth2 \ +--auth-params '{"privateKey":"file:///path/to/key/file.json", + "issuerUrl":"https://dev-kt-aa9ne.us.auth0.com", + "audience":"https://dev-kt-aa9ne.us.auth0.com/api/v2/"}' \ +tenants list + +``` + +Set the `admin-url` parameter to the Web service URL. A Web service URLis a combination of the protocol, hostname and port ID, such as `pulsar://localhost:6650`. +Set the `privateKey`, `issuerUrl`, and `audience` parameters to the values based on the configuration in the key file. For details, see [authentication types](#authentication-types). + +### pulsar-client + +This example shows how to use pulsar-client to connect to a cluster through OAuth2 authentication plugin. + +```shell script + +bin/pulsar-client \ +--url SERVICE_URL \ +--auth-plugin org.apache.pulsar.client.impl.auth.oauth2.AuthenticationOAuth2 \ +--auth-params '{"privateKey":"file:///path/to/key/file.json", + "issuerUrl":"https://dev-kt-aa9ne.us.auth0.com", + "audience":"https://dev-kt-aa9ne.us.auth0.com/api/v2/"}' \ +produce test-topic -m "test-message" -n 10 + +``` + +Set the `admin-url` parameter to the Web service URL. A Web service URLis a combination of the protocol, hostname and port ID, such as `pulsar://localhost:6650`. +Set the `privateKey`, `issuerUrl`, and `audience` parameters to the values based on the configuration in the key file. For details, see [authentication types](#authentication-types). + +### pulsar-perf + +This example shows how to use pulsar-perf to connect to a cluster through OAuth2 authentication plugin. + +```shell script + +bin/pulsar-perf produce --service-url pulsar+ssl://streamnative.cloud:6651 \ +--auth_plugin org.apache.pulsar.client.impl.auth.oauth2.AuthenticationOAuth2 \ +--auth-params '{"privateKey":"file:///path/to/key/file.json", + "issuerUrl":"https://dev-kt-aa9ne.us.auth0.com", + "audience":"https://dev-kt-aa9ne.us.auth0.com/api/v2/"}' \ +-r 1000 -s 1024 test-topic + +``` + +Set the `admin-url` parameter to the Web service URL. A Web service URLis a combination of the protocol, hostname and port ID, such as `pulsar://localhost:6650`. +Set the `privateKey`, `issuerUrl`, and `audience` parameters to the values based on the configuration in the key file. For details, see [authentication types](#authentication-types). \ No newline at end of file diff --git a/site2/website-next/versioned_docs/version-2.7.1/security-overview.md b/site2/website-next/versioned_docs/version-2.7.1/security-overview.md new file mode 100644 index 0000000000000..989c08359bb3c --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/security-overview.md @@ -0,0 +1,39 @@ +--- +id: security-overview +title: Pulsar security overview +sidebar_label: "Overview" +original_id: security-overview +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +As the central message bus for a business, Apache Pulsar is frequently used for storing mission-critical data. Therefore, enabling security features in Pulsar is crucial. + +By default, Pulsar configures no encryption, authentication, or authorization. Any client can communicate to Apache Pulsar via plain text service URLs. So we must ensure that Pulsar accessing via these plain text service URLs is restricted to trusted clients only. In such cases, you can use Network segmentation and/or authorization ACLs to restrict access to trusted IPs. If you use neither, the state of cluster is wide open and anyone can access the cluster. + +Pulsar supports a pluggable authentication mechanism. And Pulsar clients use this mechanism to authenticate with brokers and proxies. You can also configure Pulsar to support multiple authentication sources. + +The Pulsar broker validates the authentication credentials when a connection is established. After the initial connection is authenticated, the "principal" token is stored for authorization though the connection is not re-authenticated. The broker periodically checks the expiration status of every `ServerCnx` object. You can set the `authenticationRefreshCheckSeconds` on the broker to control the frequency to check the expiration status. By default, the `authenticationRefreshCheckSeconds` is set to 60s. When the authentication is expired, the broker forces to re-authenticate the connection. If the re-authentication fails, the broker disconnects the client. + +The broker supports learning whether a particular client supports authentication refreshing. If a client supports authentication refreshing and the credential is expired, the authentication provider calls the `refreshAuthentication` method to initiate the refreshing process. If a client does not support authentication refreshing and the credential is expired, the broker disconnects the client. + +You had better secure the service components in your Apache Pulsar deployment. + +## Role tokens + +In Pulsar, a *role* is a string, like `admin` or `app1`, which can represent a single client or multiple clients. You can use roles to control permission for clients to produce or consume from certain topics, administer the configuration for tenants, and so on. + +Apache Pulsar uses a [Authentication Provider](#authentication-providers) to establish the identity of a client and then assign a *role token* to that client. This role token is then used for [Authorization and ACLs](security-authorization) to determine what the client is authorized to do. + +## Authentication providers + +Currently Pulsar supports the following authentication providers: + +- [TLS Authentication](security-tls-authentication) +- [Athenz](security-athenz) +- [Kerberos](security-kerberos) +- [JSON Web Token Authentication](security-jwt) + + diff --git a/site2/website-next/versioned_docs/version-2.7.1/security-tls-authentication.md b/site2/website-next/versioned_docs/version-2.7.1/security-tls-authentication.md new file mode 100644 index 0000000000000..c1fe1d8fb2295 --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/security-tls-authentication.md @@ -0,0 +1,224 @@ +--- +id: security-tls-authentication +title: Authentication using TLS +sidebar_label: "Authentication using TLS" +original_id: security-tls-authentication +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +## TLS authentication overview + +TLS authentication is an extension of [TLS transport encryption](security-tls-transport). Not only servers have keys and certs that the client uses to verify the identity of servers, clients also have keys and certs that the server uses to verify the identity of clients. You must have TLS transport encryption configured on your cluster before you can use TLS authentication. This guide assumes you already have TLS transport encryption configured. + +`Bouncy Castle Provider` provides TLS related cipher suites and algorithms in Pulsar. If you need [FIPS](https://www.bouncycastle.org/fips_faq.html) version of `Bouncy Castle Provider`, please reference [Bouncy Castle page](security-bouncy-castle). + +### Create client certificates + +Client certificates are generated using the certificate authority. Server certificates are also generated with the same certificate authority. + +The biggest difference between client certs and server certs is that the **common name** for the client certificate is the **role token** which that client is authenticated as. + +First, you need to enter the following command to generate the key : + +```bash + +$ openssl genrsa -out admin.key.pem 2048 + +``` + +Similar to the broker, the client expects the key to be in [PKCS 8](https://en.wikipedia.org/wiki/PKCS_8) format, so you need to convert it by entering the following command: + +```bash + +$ openssl pkcs8 -topk8 -inform PEM -outform PEM \ + -in admin.key.pem -out admin.key-pk8.pem -nocrypt + +``` + +Next, enter the command below to generate the certificate request. When you are asked for a **common name**, enter the **role token** that you want this key pair to authenticate a client as. + +```bash + +$ openssl req -config openssl.cnf \ + -key admin.key.pem -new -sha256 -out admin.csr.pem + +``` + +:::note + +If openssl.cnf is not specified, read [Certificate authority](http://pulsar.apache.org/docs/en/security-tls-transport/#certificate-authority) to get the openssl.cnf. + +::: + +Then, enter the command below to sign with request with the certificate authority. Note that the client certs uses the **usr_cert** extension, which allows the cert to be used for client authentication. + +```bash + +$ openssl ca -config openssl.cnf -extensions usr_cert \ + -days 1000 -notext -md sha256 \ + -in admin.csr.pem -out admin.cert.pem + +``` + +You can get a cert, `admin.cert.pem`, and a key, `admin.key-pk8.pem` from this command. With `ca.cert.pem`, clients can use this cert and this key to authenticate themselves to brokers and proxies as the role token ``admin``. + +:::note + +If the "unable to load CA private key" error occurs and the reason of this error is "No such file or directory: /etc/pki/CA/private/cakey.pem" in this step. Try the command below: + +```bash + +$ cd /etc/pki/tls/misc/CA +$ ./CA -newca + +``` + +to generate `cakey.pem` . + +::: + +## Enable TLS authentication on brokers + +To configure brokers to authenticate clients, add the following parameters to `broker.conf`, alongside [the configuration to enable tls transport](security-tls-transport.md#broker-configuration): + +```properties + +# Configuration to enable authentication +authenticationEnabled=true +authenticationProviders=org.apache.pulsar.broker.authentication.AuthenticationProviderTls + +# operations and publish/consume from all topics +superUserRoles=admin + +# Authentication settings of the broker itself. Used when the broker connects to other brokers, either in same or other clusters +brokerClientTlsEnabled=true +brokerClientAuthenticationPlugin=org.apache.pulsar.client.impl.auth.AuthenticationTls +brokerClientAuthenticationParameters={"tlsCertFile":"/path/my-ca/admin.cert.pem","tlsKeyFile":"/path/my-ca/admin.key-pk8.pem"} +brokerClientTrustCertsFilePath=/path/my-ca/certs/ca.cert.pem + +``` + +## Enable TLS authentication on proxies + +To configure proxies to authenticate clients, add the following parameters to `proxy.conf`, alongside [the configuration to enable tls transport](security-tls-transport.md#proxy-configuration): + +The proxy should have its own client key pair for connecting to brokers. You need to configure the role token for this key pair in the ``proxyRoles`` of the brokers. See the [authorization guide](security-authorization) for more details. + +```properties + +# For clients connecting to the proxy +authenticationEnabled=true +authenticationProviders=org.apache.pulsar.broker.authentication.AuthenticationProviderTls + +# For the proxy to connect to brokers +brokerClientAuthenticationPlugin=org.apache.pulsar.client.impl.auth.AuthenticationTls +brokerClientAuthenticationParameters=tlsCertFile:/path/to/proxy.cert.pem,tlsKeyFile:/path/to/proxy.key-pk8.pem + +``` + +## Client configuration + +When you use TLS authentication, client connects via TLS transport. You need to configure the client to use ```https://``` and 8443 port for the web service URL, ```pulsar+ssl://``` and 6651 port for the broker service URL. + +### CLI tools + +[Command-line tools](reference-cli-tools.md) like [`pulsar-admin`](reference-pulsar-admin), [`pulsar-perf`](reference-cli-tools.md#pulsar-perf), and [`pulsar-client`](reference-cli-tools.md#pulsar-client) use the `conf/client.conf` config file in a Pulsar installation. + +You need to add the following parameters to that file to use TLS authentication with the CLI tools of Pulsar: + +``` + +webServiceUrl=https://broker.example.com:8443/ +brokerServiceUrl=pulsar+ssl://broker.example.com:6651/ +useTls=true +tlsAllowInsecureConnection=false +tlsTrustCertsFilePath=/path/to/ca.cert.pem +authPlugin=org.apache.pulsar.client.impl.auth.AuthenticationTls +authParams=tlsCertFile:/path/to/my-role.cert.pem,tlsKeyFile:/path/to/my-role.key-pk8.pem + +``` + +### Java client + +``` + +import org.apache.pulsar.client.api.PulsarClient; + +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar+ssl://broker.example.com:6651/") + .enableTls(true) + .tlsTrustCertsFilePath("/path/to/ca.cert.pem") + .authentication("org.apache.pulsar.client.impl.auth.AuthenticationTls", + "tlsCertFile:/path/to/my-role.cert.pem,tlsKeyFile:/path/to/my-role.key-pk8.pem") + .build(); + +``` + +### Python client + +``` + +from pulsar import Client, AuthenticationTLS + +auth = AuthenticationTLS("/path/to/my-role.cert.pem", "/path/to/my-role.key-pk8.pem") +client = Client("pulsar+ssl://broker.example.com:6651/", + tls_trust_certs_file_path="/path/to/ca.cert.pem", + tls_allow_insecure_connection=False, + authentication=auth) + +``` + +### C++ client + +``` + +#include + +pulsar::ClientConfiguration config; +config.setUseTls(true); +config.setTlsTrustCertsFilePath("/path/to/ca.cert.pem"); +config.setTlsAllowInsecureConnection(false); + +pulsar::AuthenticationPtr auth = pulsar::AuthTls::create("/path/to/my-role.cert.pem", + "/path/to/my-role.key-pk8.pem") +config.setAuth(auth); + +pulsar::Client client("pulsar+ssl://broker.example.com:6651/", config); + +``` + +### Node.js client + +``` + +const Pulsar = require('pulsar-client'); + +(async () => { + const auth = new Pulsar.AuthenticationTls({ + certificatePath: '/path/to/my-role.cert.pem', + privateKeyPath: '/path/to/my-role.key-pk8.pem', + }); + + const client = new Pulsar.Client({ + serviceUrl: 'pulsar+ssl://broker.example.com:6651/', + authentication: auth, + tlsTrustCertsFilePath: '/path/to/ca.cert.pem', + }); +})(); + +``` + +### C# client + +``` + +var clientCertificate = new X509Certificate2("admin.pfx"); +var client = PulsarClient.Builder() + .AuthenticateUsingClientCertificate(clientCertificate) + .Build(); + +``` + diff --git a/site2/website-next/versioned_docs/version-2.7.1/security-tls-keystore.md b/site2/website-next/versioned_docs/version-2.7.1/security-tls-keystore.md new file mode 100644 index 0000000000000..94f20d6dcb730 --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/security-tls-keystore.md @@ -0,0 +1,326 @@ +--- +id: security-tls-keystore +title: Using TLS with KeyStore configure +sidebar_label: "Using TLS with KeyStore configure" +original_id: security-tls-keystore +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +## Overview + +Apache Pulsar supports [TLS encryption](security-tls-transport.md) and [TLS authentication](security-tls-authentication) between clients and Apache Pulsar service. +By default it uses PEM format file configuration. This page tries to describe use [KeyStore](https://en.wikipedia.org/wiki/Java_KeyStore) type configure for TLS. + + +## TLS encryption with KeyStore configure + +### Generate TLS key and certificate + +The first step of deploying TLS is to generate the key and the certificate for each machine in the cluster. +You can use Java’s `keytool` utility to accomplish this task. We will generate the key into a temporary keystore +initially for broker, so that we can export and sign it later with CA. + +```shell + +keytool -keystore broker.keystore.jks -alias localhost -validity {validity} -genkeypair -keyalg RSA + +``` + +You need to specify two parameters in the above command: + +1. `keystore`: the keystore file that stores the certificate. The *keystore* file contains the private key of + the certificate; hence, it needs to be kept safely. +2. `validity`: the valid time of the certificate in days. + +> Ensure that common name (CN) matches exactly with the fully qualified domain name (FQDN) of the server. +The client compares the CN with the DNS domain name to ensure that it is indeed connecting to the desired server, not a malicious one. + +### Creating your own CA + +After the first step, each broker in the cluster has a public-private key pair, and a certificate to identify the machine. +The certificate, however, is unsigned, which means that an attacker can create such a certificate to pretend to be any machine. + +Therefore, it is important to prevent forged certificates by signing them for each machine in the cluster. +A `certificate authority (CA)` is responsible for signing certificates. CA works likes a government that issues passports — +the government stamps (signs) each passport so that the passport becomes difficult to forge. Other governments verify the stamps +to ensure the passport is authentic. Similarly, the CA signs the certificates, and the cryptography guarantees that a signed +certificate is computationally difficult to forge. Thus, as long as the CA is a genuine and trusted authority, the clients have +high assurance that they are connecting to the authentic machines. + +```shell + +openssl req -new -x509 -keyout ca-key -out ca-cert -days 365 + +``` + +The generated CA is simply a *public-private* key pair and certificate, and it is intended to sign other certificates. + +The next step is to add the generated CA to the clients' truststore so that the clients can trust this CA: + +```shell + +keytool -keystore client.truststore.jks -alias CARoot -import -file ca-cert + +``` + +NOTE: If you configure the brokers to require client authentication by setting `tlsRequireTrustedClientCertOnConnect` to `true` on the +broker configuration, then you must also provide a truststore for the brokers and it should have all the CA certificates that clients keys were signed by. + +```shell + +keytool -keystore broker.truststore.jks -alias CARoot -import -file ca-cert + +``` + +In contrast to the keystore, which stores each machine’s own identity, the truststore of a client stores all the certificates +that the client should trust. Importing a certificate into one’s truststore also means trusting all certificates that are signed +by that certificate. As the analogy above, trusting the government (CA) also means trusting all passports (certificates) that +it has issued. This attribute is called the chain of trust, and it is particularly useful when deploying TLS on a large BookKeeper cluster. +You can sign all certificates in the cluster with a single CA, and have all machines share the same truststore that trusts the CA. +That way all machines can authenticate all other machines. + + +### Signing the certificate + +The next step is to sign all certificates in the keystore with the CA we generated. First, you need to export the certificate from the keystore: + +```shell + +keytool -keystore broker.keystore.jks -alias localhost -certreq -file cert-file + +``` + +Then sign it with the CA: + +```shell + +openssl x509 -req -CA ca-cert -CAkey ca-key -in cert-file -out cert-signed -days {validity} -CAcreateserial -passin pass:{ca-password} + +``` + +Finally, you need to import both the certificate of the CA and the signed certificate into the keystore: + +```shell + +keytool -keystore broker.keystore.jks -alias CARoot -import -file ca-cert +keytool -keystore broker.keystore.jks -alias localhost -import -file cert-signed + +``` + +The definitions of the parameters are the following: + +1. `keystore`: the location of the keystore +2. `ca-cert`: the certificate of the CA +3. `ca-key`: the private key of the CA +4. `ca-password`: the passphrase of the CA +5. `cert-file`: the exported, unsigned certificate of the broker +6. `cert-signed`: the signed certificate of the broker + +### Configuring brokers + +Brokers enable TLS by provide valid `brokerServicePortTls` and `webServicePortTls`, and also need set `tlsEnabledWithKeyStore` to `true` for using KeyStore type configuration. +Besides this, KeyStore path, KeyStore password, TrustStore path, and TrustStore password need to provided. +And since broker will create internal client/admin client to communicate with other brokers, user also need to provide config for them, this is similar to how user config the outside client/admin-client. +If `tlsRequireTrustedClientCertOnConnect` is `true`, broker will reject the Connection if the Client Certificate is not trusted. + +The following TLS configs are needed on the broker side: + +```properties + +tlsEnabledWithKeyStore=true +# key store +tlsKeyStoreType=JKS +tlsKeyStore=/var/private/tls/broker.keystore.jks +tlsKeyStorePassword=brokerpw + +# trust store +tlsTrustStoreType=JKS +tlsTrustStore=/var/private/tls/broker.truststore.jks +tlsTrustStorePassword=brokerpw + +# internal client/admin-client config +brokerClientTlsEnabled=true +brokerClientTlsEnabledWithKeyStore=true +brokerClientTlsTrustStoreType=JKS +brokerClientTlsTrustStore=/var/private/tls/client.truststore.jks +brokerClientTlsTrustStorePassword=clientpw + +``` + +NOTE: it is important to restrict access to the store files via filesystem permissions. + +Optional settings that may worth consider: + +1. tlsClientAuthentication=false: Enable/Disable using TLS for authentication. This config when enabled will authenticate the other end + of the communication channel. It should be enabled on both brokers and clients for mutual TLS. +2. tlsCiphers=[TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256], A cipher suite is a named combination of authentication, encryption, MAC and key exchange + algorithm used to negotiate the security settings for a network connection using TLS network protocol. By default, + it is null. [OpenSSL Ciphers](https://www.openssl.org/docs/man1.0.2/apps/ciphers.html) + [JDK Ciphers](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html#ciphersuites) +3. tlsProtocols=[TLSv1.2,TLSv1.1,TLSv1] (list out the TLS protocols that you are going to accept from clients). + By default, it is not set. + +### Configuring Clients + +This is similar to [TLS encryption configuing for client with PEM type](security-tls-transport.md#Client configuration). +For a a minimal configuration, user need to provide the TrustStore information. + +e.g. +1. for [Command-line tools](reference-cli-tools) like [`pulsar-admin`](reference-cli-tools#pulsar-admin), [`pulsar-perf`](reference-cli-tools#pulsar-perf), and [`pulsar-client`](reference-cli-tools#pulsar-client) use the `conf/client.conf` config file in a Pulsar installation. + + ```properties + + webServiceUrl=https://broker.example.com:8443/ + brokerServiceUrl=pulsar+ssl://broker.example.com:6651/ + useKeyStoreTls=true + tlsTrustStoreType=JKS + tlsTrustStorePath=/var/private/tls/client.truststore.jks + tlsTrustStorePassword=clientpw + + ``` + +1. for java client + + ```java + + import org.apache.pulsar.client.api.PulsarClient; + + PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar+ssl://broker.example.com:6651/") + .enableTls(true) + .useKeyStoreTls(true) + .tlsTrustStorePath("/var/private/tls/client.truststore.jks") + .tlsTrustStorePassword("clientpw") + .allowTlsInsecureConnection(false) + .build(); + + ``` + +1. for java admin client + +```java + + PulsarAdmin amdin = PulsarAdmin.builder().serviceHttpUrl("https://broker.example.com:8443") + .useKeyStoreTls(true) + .tlsTrustStorePath("/var/private/tls/client.truststore.jks") + .tlsTrustStorePassword("clientpw") + .allowTlsInsecureConnection(false) + .build(); + +``` + +## TLS authentication with KeyStore configure + +This similar to [TLS authentication with PEM type](security-tls-authentication) + +### broker authentication config + +`broker.conf` + +```properties + +# Configuration to enable authentication +authenticationEnabled=true +authenticationProviders=org.apache.pulsar.broker.authentication.AuthenticationProviderTls + +# this should be the CN for one of client keystore. +superUserRoles=admin + +# Enable KeyStore type +tlsEnabledWithKeyStore=true +requireTrustedClientCertOnConnect=true + +# key store +tlsKeyStoreType=JKS +tlsKeyStore=/var/private/tls/broker.keystore.jks +tlsKeyStorePassword=brokerpw + +# trust store +tlsTrustStoreType=JKS +tlsTrustStore=/var/private/tls/broker.truststore.jks +tlsTrustStorePassword=brokerpw + +# internal client/admin-client config +brokerClientTlsEnabled=true +brokerClientTlsEnabledWithKeyStore=true +brokerClientTlsTrustStoreType=JKS +brokerClientTlsTrustStore=/var/private/tls/client.truststore.jks +brokerClientTlsTrustStorePassword=clientpw +# internal auth config +brokerClientAuthenticationPlugin=org.apache.pulsar.client.impl.auth.AuthenticationKeyStoreTls +brokerClientAuthenticationParameters={"keyStoreType":"JKS","keyStorePath":"/var/private/tls/client.keystore.jks","keyStorePassword":"clientpw"} +# currently websocket not support keystore type +webSocketServiceEnabled=false + +``` + +### client authentication configuring + +Besides the TLS encryption configuring. The main work is configuring the KeyStore, which contains a valid CN as client role, for client. + +e.g. +1. for [Command-line tools](reference-cli-tools) like [`pulsar-admin`](reference-cli-tools#pulsar-admin), [`pulsar-perf`](reference-cli-tools#pulsar-perf), and [`pulsar-client`](reference-cli-tools#pulsar-client) use the `conf/client.conf` config file in a Pulsar installation. + + ```properties + + webServiceUrl=https://broker.example.com:8443/ + brokerServiceUrl=pulsar+ssl://broker.example.com:6651/ + useKeyStoreTls=true + tlsTrustStoreType=JKS + tlsTrustStorePath=/var/private/tls/client.truststore.jks + tlsTrustStorePassword=clientpw + authPlugin=org.apache.pulsar.client.impl.auth.AuthenticationKeyStoreTls + authParams={"keyStoreType":"JKS","keyStorePath":"/path/to/keystorefile","keyStorePassword":"keystorepw"} + + ``` + +1. for java client + + ```java + + import org.apache.pulsar.client.api.PulsarClient; + + PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar+ssl://broker.example.com:6651/") + .enableTls(true) + .useKeyStoreTls(true) + .tlsTrustStorePath("/var/private/tls/client.truststore.jks") + .tlsTrustStorePassword("clientpw") + .allowTlsInsecureConnection(false) + .authentication( + "org.apache.pulsar.client.impl.auth.AuthenticationKeyStoreTls", + "keyStoreType:JKS,keyStorePath:/var/private/tls/client.keystore.jks,keyStorePassword:clientpw") + .build(); + + ``` + +1. for java admin client + + ```java + + PulsarAdmin amdin = PulsarAdmin.builder().serviceHttpUrl("https://broker.example.com:8443") + .useKeyStoreTls(true) + .tlsTrustStorePath("/var/private/tls/client.truststore.jks") + .tlsTrustStorePassword("clientpw") + .allowTlsInsecureConnection(false) + .authentication( + "org.apache.pulsar.client.impl.auth.AuthenticationKeyStoreTls", + "keyStoreType:JKS,keyStorePath:/var/private/tls/client.keystore.jks,keyStorePassword:clientpw") + .build(); + + ``` + +## Enabling TLS Logging + +You can enable TLS debug logging at the JVM level by starting the brokers and/or clients with `javax.net.debug` system property. For example: + +```shell + +-Djavax.net.debug=all + +``` + +You can find more details on this in [Oracle documentation](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html) on [debugging SSL/TLS connections](http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/ReadDebug.html). diff --git a/site2/website-next/versioned_docs/version-2.7.1/security-tls-transport.md b/site2/website-next/versioned_docs/version-2.7.1/security-tls-transport.md new file mode 100644 index 0000000000000..22e5d4b58366c --- /dev/null +++ b/site2/website-next/versioned_docs/version-2.7.1/security-tls-transport.md @@ -0,0 +1,292 @@ +--- +id: security-tls-transport +title: Transport Encryption using TLS +sidebar_label: "Transport Encryption using TLS" +original_id: security-tls-transport +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +## TLS overview + +By default, Apache Pulsar clients communicate with the Apache Pulsar service in plain text. This means that all data is sent in the clear. You can use TLS to encrypt this traffic to protect the traffic from the snooping of a man-in-the-middle attacker. + +You can also configure TLS for both encryption and authentication. Use this guide to configure just TLS transport encryption and refer to [here](security-tls-authentication.md) for TLS authentication configuration. Alternatively, you can use [another authentication mechanism](security-athenz) on top of TLS transport encryption. + +> Note that enabling TLS may impact the performance due to encryption overhead. + +## TLS concepts + +TLS is a form of [public key cryptography](https://en.wikipedia.org/wiki/Public-key_cryptography). Using key pairs consisting of a public key and a private key can perform the encryption. The public key encrpyts the messages and the private key decrypts the messages. + +To use TLS transport encryption, you need two kinds of key pairs, **server key pairs** and a **certificate authority**. + +You can use a third kind of key pair, **client key pairs**, for [client authentication](security-tls-authentication). + +You should store the **certificate authority** private key in a very secure location (a fully encrypted, disconnected, air gapped computer). As for the certificate authority public key, the **trust cert**, you can freely shared it. + +For both client and server key pairs, the administrator first generates a private key and a certificate request, then uses the certificate authority private key to sign the certificate request, finally generates a certificate. This certificate is the public key for the server/client key pair. + +For TLS transport encryption, the clients can use the **trust cert** to verify that the server has a key pair that the certificate authority signed when the clients are talking to the server. A man-in-the-middle attacker does not have access to the certificate authority, so they couldn't create a server with such a key pair. + +For TLS authentication, the server uses the **trust cert** to verify that the client has a key pair that the certificate authority signed. The common name of the **client cert** is then used as the client's role token (see [Overview](security-overview)). + +`Bouncy Castle Provider` provides cipher suites and algorithms in Pulsar. If you need [FIPS](https://www.bouncycastle.org/fips_faq.html) version of `Bouncy Castle Provider`, please reference [Bouncy Castle page](security-bouncy-castle). + +## Create TLS certificates + +Creating TLS certificates for Pulsar involves creating a [certificate authority](#certificate-authority) (CA), [server certificate](#server-certificate), and [client certificate](#client-certificate). + +Follow the guide below to set up a certificate authority. You can also refer to plenty of resources on the internet for more details. We recommend [this guide](https://jamielinux.com/docs/openssl-certificate-authority/index.html) for your detailed reference. + +### Certificate authority + +1. Create the certificate for the CA. You can use CA to sign both the broker and client certificates. This ensures that each party will trust the others. You should store CA in a very secure location (ideally completely disconnected from networks, air gapped, and fully encrypted). + +2. Entering the following command to create a directory for your CA, and place [this openssl configuration file](https://github.com/apache/pulsar/tree/master/site2/website/static/examples/openssl.cnf) in the directory. You may want to modify the default answers for company name and department in the configuration file. Export the location of the CA directory to the environment variable, CA_HOME. The configuration file uses this environment variable to find the rest of the files and directories that the CA needs. + +```bash + +mkdir my-ca +cd my-ca +wget https://raw.githubusercontent.com/apache/pulsar/master/site2/website/static/examples/openssl.cnf +export CA_HOME=$(pwd) + +``` + +3. Enter the commands below to create the necessary directories, keys and certs. + +```bash + +mkdir certs crl newcerts private +chmod 700 private/ +touch index.txt +echo 1000 > serial +openssl genrsa -aes256 -out private/ca.key.pem 4096 +chmod 400 private/ca.key.pem +openssl req -config openssl.cnf -key private/ca.key.pem \ + -new -x509 -days 7300 -sha256 -extensions v3_ca \ + -out certs/ca.cert.pem +chmod 444 certs/ca.cert.pem + +``` + +4. After you answer the question prompts, CA-related files are stored in the `./my-ca` directory. Within that directory: + +* `certs/ca.cert.pem` is the public certificate. This public certificates is meant to be distributed to all parties involved. +* `private/ca.key.pem` is the private key. You only need it when you are signing a new certificate for either broker or clients and you must safely guard this private key. + +### Server certificate + +Once you have created a CA certificate, you can create certificate requests and sign them with the CA. + +The following commands ask you a few questions and then create the certificates. When you are asked for the common name, you should match the hostname of the broker. You can also use a wildcard to match a group of broker hostnames, for example, `*.broker.usw.example.com`. This ensures that multiple machines can reuse the same certificate. + +> #### Tips +> +> Sometimes matching the hostname is not possible or makes no sense, +> such as when you create the brokers with random hostnames, or you +> plan to connect to the hosts via their IP. In these cases, you +> should configure the client to disable TLS hostname verification. For more +> details, you can see [the host verification section in client configuration](#hostname-verification). + +1. Enter the command below to generate the key. + +```bash + +openssl genrsa -out broker.key.pem 2048 + +``` + +The broker expects the key to be in [PKCS 8](https://en.wikipedia.org/wiki/PKCS_8) format, so enter the following command to convert it. + +```bash + +openssl pkcs8 -topk8 -inform PEM -outform PEM \ + -in broker.key.pem -out broker.key-pk8.pem -nocrypt + +``` + +2. Enter the following command to generate the certificate request. + +```bash + +openssl req -config openssl.cnf \ + -key broker.key.pem -new -sha256 -out broker.csr.pem + +``` + +3. Sign it with the certificate authority by entering the command below. + +```bash + +openssl ca -config openssl.cnf -extensions server_cert \ + -days 1000 -notext -md sha256 \ + -in broker.csr.pem -out broker.cert.pem + +``` + +At this point, you have a cert, `broker.cert.pem`, and a key, `broker.key-pk8.pem`, which you can use along with `ca.cert.pem` to configure TLS transport encryption for your broker and proxy nodes. + +## Broker Configuration + +To configure a Pulsar [broker](reference-terminology.md#broker) to use TLS transport encryption, you need to make some changes to `broker.conf`, which locates in the `conf` directory of your [Pulsar installation](getting-started-standalone). + +Add these values to the configuration file (substituting the appropriate certificate paths where necessary): + +```properties + +tlsEnabled=true +tlsCertificateFilePath=/path/to/broker.cert.pem +tlsKeyFilePath=/path/to/broker.key-pk8.pem +tlsTrustCertsFilePath=/path/to/ca.cert.pem + +``` + +> You can find a full list of parameters available in the `conf/broker.conf` file, +> as well as the default values for those parameters, in [Broker Configuration](reference-configuration.md#broker) +> +### TLS Protocol Version and Cipher + +You can configure the broker (and proxy) to require specific TLS protocol versions and ciphers for TLS negiotation. You can use the TLS protocol versions and ciphers to stop clients from requesting downgraded TLS protocol versions or ciphers that may have weaknesses. + +Both the TLS protocol versions and cipher properties can take multiple values, separated by commas. The possible values for protocol version and ciphers depend on the TLS provider that you are using. Pulsar uses OpenSSL if the OpenSSL is available, but if the OpenSSL is not available, Pulsar defaults back to the JDK implementation. + +```properties + +tlsProtocols=TLSv1.2,TLSv1.1 +tlsCiphers=TLS_DH_RSA_WITH_AES_256_GCM_SHA384,TLS_DH_RSA_WITH_AES_256_CBC_SHA + +``` + +OpenSSL currently supports ```SSL2```, ```SSL3```, ```TLSv1```, ```TLSv1.1``` and ```TLSv1.2``` for the protocol version. You can acquire a list of supported cipher from the openssl ciphers command, i.e. ```openssl ciphers -tls_v2```. + +For JDK 8, you can obtain a list of supported values from the documentation: +- [TLS protocol](https://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html#SSLContext) +- [Ciphers](https://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html#ciphersuites) + +## Proxy Configuration + +Proxies need to configure TLS in two directions, for clients connecting to the proxy, and for the proxy connecting to brokers. + +``` + +# For clients connecting to the proxy +tlsEnabledInProxy=true +tlsCertificateFilePath=/path/to/broker.cert.pem +tlsKeyFilePath=/path/to/broker.key-pk8.pem +tlsTrustCertsFilePath=/path/to/ca.cert.pem + +# For the proxy to connect to brokers +tlsEnabledWithBroker=true +brokerClientTrustCertsFilePath=/path/to/ca.cert.pem + +``` + +## Client configuration + +When you enable the TLS transport encryption, you need to configure the client to use ``` + +As the server certificate that you generated above does not belong to any of the default trust chains, you also need to either specify the path the **trust cert** (recommended), or tell the client to allow untrusted server certs. + +### Hostname verification + +Hostname verification is a TLS security feature whereby a client can refuse to connect to a server if the "CommonName" does not match the hostname to which the hostname is connecting. By default, Pulsar clients disable hostname verification, as it requires that each broker has a DNS record and a unique cert. + +Moreover, as the administrator has full control of the certificate authority, a bad actor is unlikely to be able to pull off a man-in-the-middle attack. "allowInsecureConnection" allows the client to connect to servers whose cert has not been signed by an approved CA. The client disables "allowInsecureConnection" by default, and you should always disable "allowInsecureConnection" in production environments. As long as you disable "allowInsecureConnection", a man-in-the-middle attack requires that the attacker has access to the CA. + +One scenario where you may want to enable hostname verification is where you have multiple proxy nodes behind a VIP, and the VIP has a DNS record, for example, pulsar.mycompany.com. In this case, you can generate a TLS cert with pulsar.mycompany.com as the "CommonName," and then enable hostname verification on the client. + +The examples below show hostname verification being disabled for the Java client, though you can omit this as the client disables the hostname verification by default. C++/python/Node.js clients do now allow configuring this at the moment. + +### CLI tools + +[Command-line tools](reference-cli-tools) like [`pulsar-admin`](reference-cli-tools.md#pulsar-admin), [`pulsar-perf`](reference-cli-tools.md#pulsar-perf), and [`pulsar-client`](reference-cli-tools.md#pulsar-client) use the `conf/client.conf` config file in a Pulsar installation. + +You need to add the following parameters to that file to use TLS transport with the CLI tools of Pulsar: + +```properties + +webServiceUrl=https://broker.example.com:8443/ +brokerServiceUrl=pulsar+ssl://broker.example.com:6651/ +useTls=true +tlsAllowInsecureConnection=false +tlsTrustCertsFilePath=/path/to/ca.cert.pem +tlsEnableHostnameVerification=false + +``` + +#### Java client + +```java + +import org.apache.pulsar.client.api.PulsarClient; + +PulsarClient client = PulsarClient.builder() + .serviceUrl("pulsar+ssl://broker.example.com:6651/") + .enableTls(true) + .tlsTrustCertsFilePath("/path/to/ca.cert.pem") + .enableTlsHostnameVerification(false) // false by default, in any case + .allowTlsInsecureConnection(false) // false by default, in any case + .build(); + +``` + +#### Python client + +```python + +from pulsar import Client + +client = Client("pulsar+ssl://broker.example.com:6651/", + tls_hostname_verification=True, + tls_trust_certs_file_path="/path/to/ca.cert.pem", + tls_allow_insecure_connection=False) // defaults to false from v2.2.0 onwards + +``` + +#### C++ client + +```c++ + +#include + +ClientConfiguration config = ClientConfiguration(); +config.setUseTls(true); // shouldn't be needed soon +config.setTlsTrustCertsFilePath(caPath); +config.setTlsAllowInsecureConnection(false); +config.setAuth(pulsar::AuthTls::create(clientPublicKeyPath, clientPrivateKeyPath)); +config.setValidateHostName(true); + +``` + +#### Node.js client + +```JavaScript + +const Pulsar = require('pulsar-client'); + +(async () => { + const client = new Pulsar.Client({ + serviceUrl: 'pulsar+ssl://broker.example.com:6651/', + tlsTrustCertsFilePath: '/path/to/ca.cert.pem', + }); +})(); + +``` + +#### C# client + +```c# + +var certificate = new X509Certificate2("ca.cert.pem"); +var client = PulsarClient.Builder() + .TrustedCertificateAuthority(certificate) //If the CA is not trusted on the host, you can add it explicitly. + .VerifyCertificateAuthority(true) //Default is 'true' + .VerifyCertificateName(false) //Default is 'false' + .Build(); + +``` + diff --git a/site2/website-next/versioned_sidebars/version-2.7.1-sidebars.json b/site2/website-next/versioned_sidebars/version-2.7.1-sidebars.json index a95a781eb8b8b..fc2a1071bc584 100644 --- a/site2/website-next/versioned_sidebars/version-2.7.1-sidebars.json +++ b/site2/website-next/versioned_sidebars/version-2.7.1-sidebars.json @@ -333,6 +333,60 @@ "id": "version-2.7.1/administration-isolation" } ] + }, + { + "type": "category", + "label": "Security", + "items": [ + { + "type": "doc", + "id": "version-2.7.1/security-overview" + }, + { + "type": "doc", + "id": "version-2.7.1/security-tls-transport" + }, + { + "type": "doc", + "id": "version-2.7.1/security-tls-authentication" + }, + { + "type": "doc", + "id": "version-2.7.1/security-tls-keystore" + }, + { + "type": "doc", + "id": "version-2.7.1/security-jwt" + }, + { + "type": "doc", + "id": "version-2.7.1/security-athenz" + }, + { + "type": "doc", + "id": "version-2.7.1/security-kerberos" + }, + { + "type": "doc", + "id": "version-2.7.1/security-oauth2" + }, + { + "type": "doc", + "id": "version-2.7.1/security-authorization" + }, + { + "type": "doc", + "id": "version-2.7.1/security-encryption" + }, + { + "type": "doc", + "id": "version-2.7.1/security-extending" + }, + { + "type": "doc", + "id": "version-2.7.1/security-bouncy-castle" + } + ] } ] } \ No newline at end of file