This repository has been archived by the owner on Mar 5, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathsequins.conf.example
141 lines (109 loc) · 5.73 KB
/
sequins.conf.example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# This configuration file is in the toml format, which is defined here:
# https://github.com/toml-lang/toml
# Unless specified otherwise, the below values are the defaults.
source = "hdfs://namenode:8020/path/to/sequins"
# The url or directory where the sequencefiles are. This can be a local
# directory, an HDFS url of the form hdfs://<namenode>:<port>/path/to/stuff,
# or an S3 url of the form s3://<bucket>/path/to/stuff. This should be a
# a directory of directories of directories; each first level represents a
# 'database', and each subdirectory therein represents a 'version' of that
# database. See the README for more information. This must be set, but can be
# overriden from the command line with --source.
# bind = "0.0.0.0:9599"
# The address to bind on. This can be overridden from the command line with
# --bind.
# local_store = "/var/sequins/"
# This is where sequins will store its internal copy of all the data it ingests.
# This can be overriden from the command line with --local-store.
# max_parallel_loads = 4
# Unset by default. If this flag is set, sequins will only update this many
# databases at a time, minimizing disk usage while new data is being loaded. If
# you set this to 1, then loads will be completely serialized.
# throttle_loads = "800μs"
# Unset by default. If this flag is set, sequins will sleep this long between
# writes while loading data, artificially slowing down loads and reducing disk
# i/o. If you are using disks where the latency is extremely sensitive to
# activity, then loading large amounts of data can negatively impact your
# latency, and you may want to experiment with this setting.
# refresh_period = "10m"
# Unset by default. If this is specified, sequins will periodically download new
# data this often (in seconds). If you enable this, you should also enable
# 'require_success_file', or sequins may start automatically downloading a
# partially-created set of files.
# require_success_file = false
# If this flag is set, sequins will only ingest data from directories that have
# a _SUCCESS file (which is produced by hadoop when it completes a job).
# content_type = "application/json"
# Unset by default. If this is set, sequins will set this Content-Type header on
# responses.
[storage]
# compression = "snappy"
# This can be either 'snappy' or 'none', and defines how data is compressed
# on disk.
# block_size = 4096
# This controls the block size for on-disk compression.
[s3]
# region = "us-west-1"
# Unset by default. The S3 region for the bucket where your data is. If unset,
# and sequins is running on EC2, this will be set to the instance region.
# access_key_id = "AKIAIOSFODNN7EXAMPLE"
# secret_access_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
# Unset by default. The access key and secret to use for S3. If unset, the env
# variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY will be used, or IAM
# instance role credentials if they are available.
[sharding]
# enabled = false
# If true, sequins will attempt to connect to zookeeper at the specified
# addresses (see below), and coordinate with peer instances to shard datasets.
# For a complete description of the sharding algorithm, see the manual.
# replication = 2
# This is the number of replicas responsible for each partition.
# time_to_converge = "10s"
# Upon startup, sequins will wait this long for the set of known peers to
# stabilize.
# proxy_timeout = "100ms"
# This is the total timeout (connect + request) for proxied requests to peers
# in a sequins cluster. You may want to increase this if you're running on
# particularly cold storage, or if there are other factors significantly
# increasing request time.
# proxy_stage_timeout = "50ms"
# Unset by default. After this interval, sequins will try another peer
# concurrently with the first, as long as there are other peers available and
# the total time is less than 'proxy_timeout'. If left unset, this defaults to
# the 'proxy_timeout' divided by 'replication_factor' - enough time for all
# peers to be tried within the total timeout.
# cluster_name = "sequins"
# This defines the root prefix to use for zookeeper state. If you are running
# multiple sequins clusters using the same zookeeper for coordination, you
# should change this so they can't conflict.
# advertised_hostname = "sequins1.example.com"
# Unset by default. This is the hostname sequins uses to advertise itself to
# peers in a cluster. It should be resolvable by those peers. If left unset, it
# will be set to the hostname of the server.
# shard_id = "sequins1"
# Unset by default. The shard ID is used to determine which partitions
# the node is responsible for. By default, it is the same as
# 'advertised_hostname'. Unlike the hostname, however, it doesn't have to be
# unique; two nodes can have the same shard_id, in which case they will download
# the same partitions. This can be useful if you don't have stable hostnames,
# but want to be able to rebuild a server to take the place of a dead or
# decomissioning one.
[zk]
# servers = ["localhost:2181"]
# If set and 'sharding.enabled' is true, sequins will connect to zookeeper at
# the given addresses.
# connect_timeout = "1s"
# This specifies how long to wait while connecting to zookeeper.
# session_timeout = "10s"
# This specifies the session timeout to use with zookeeper. The
# actual timeout is negotiated between server and client, but will never be
# lower than this number.
[debug]
# bind = "localhost:6060"
# Unset by default. If set, binds the golang debug http server, which can serve
# expvars and profiling information, to the specified address.
# expvars = true
# If set, this adds expvars to the debug HTTP server, including the default ones
# and a few sequins-specific ones.
# pprof = false
# If set, this adds the default pprof handlers to the debug HTTP server.