-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathswift.conf
278 lines (260 loc) · 14.2 KB
/
swift.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
sites: [localhost]
# sites can be set to a comma separated list of remote sites you have access to
# eg.: sites: [beagle] or sites: [beagle, midway]
# The sites definition in the config can be overridden by specifying -sites on the
# swift command line as follows : swift -sites beagle p4.swift
# Default site for examples 1-3
# This site runs tasks on the local machine
site.localhost {
execution {
type : "local" # Execution is local
URL : "localhost" # Point at localhost to run locally
}
staging : direct # Files are on the same machine, so can be accessed "directly"
workDirectory : "/tmp/"${env.USER}"/swiftwork" # Directory where work is done
maxParallelTasks : 101 # Maximum number of parallel tasks
initialParallelTasks: 100 # Maximum number of tasks at start
app.ALL { executable: "*" } # All tasks to be found from commandline
}
# Instructions for Beagle
# 1. If you are running on the beagle login nodes, set jobManager: "local:pbs"
# 2. Find your project name/allocation and set jobProject : "YOUR_PROJECT_ON_BEAGLE"
# 3. Set userHomeOverride : "/lustre/beagle2/YOUR_USERNAME_ON_BEAGLE/swiftwork"
# 4. Set workDirectory : "/tmp/YOUR_USERNAME_ON_BEAGLE/swiftwork"
site.beagle {
execution {
type : "coaster" # Use coasters to run on remote sites
URL : "login4.beagle.ci.uchicago.edu" # Beagle login URL
jobManager: "ssh-cl:pbs" # use ssh-cl to connect, pbs is the Local Resource manager(LRM)
options {
maxJobs : 1 # Max jobs submitted to LRM
nodeGranularity : 1 # Nodes per job
maxNodesPerJob : 1 # Nodes per job
tasksPerNode : 4 # Tasks per Node
jobQueue : "development" # Select queue
jobProject : ${env.BEAGLE_PROJECT} # Project|Allocation on Beagle
userHomeOverride: "/lustre/beagle2/"${env.BEAGLE_USERNAME}"/swiftwork" # Point to lustre shared-filesystem
maxJobTime : "00:25:00" # Time requested per job
jobOptions {
pbs.aprun: true # Submit jobs via aprun mechanism
pbs.mpp : true # Mpp enabled
depth : "4" # 4 cores per task
}
}
}
staging : "local" # Stage files from "local" system to Beagle
workDirectory : "/tmp/"${env.BEAGLE_USERNAME}"/swiftwork" # Location for intermediate files
maxParallelTasks : 101 # Maximum number of parallel tasks
initialParallelTasks: 100 # Maximum number of tasks at start
app.ALL { executable: "*" } # All tasks to be found from commandline
}
# Instructions for Midway:
# 1. If you are running on the midway login nodes set jobManager: "local:slurm"
# 2. Set workDirectory to /tmp/your_username_on_midway
site.midway {
execution {
type : "coaster" # Use coasters to run on remote sites
URL : "swift.rcc.uchicago.edu" # Midway login node | Alternatively use midway.rcc.uchicago.edu
jobManager: "ssh-cl:slurm" # Use ssh-cl to connect, slurm is the Local resource manager
options {
maxJobs : 1 # Max jobs submitted to LRM
nodeGranularity : 1 # Nodes per job
maxNodesPerJob : 1 # Nodes per job
tasksPerNode : 4 # Tasks per Node
jobQueue : "sandyb" # Select queue from (sandyb, westmere, ...)
maxJobTime : "00:25:00" # Time requested per job
}
}
staging : "local" # Stage files from "local" system to Midway
workDirectory : "/tmp/"${env.MIDWAY_USERNAME} # Location for intermediate files
maxParallelTasks : 101 # Maximum number of parallel tasks
initialParallelTasks: 100 # Maximum number of tasks at start
app.ALL { executable: "*" } # All tasks to be found from commandline
}
# Instruction for OSGConnect
# 1. If you are running on the OSGConnect login node set jobManager: "local:condor"
# 2. Set projectname : "YOUR_PROJECTNAME_ON_OSG"
# 3. Set workDirectory : "/tmp/YOUR_USERNAME_ON_OSG"
site.osgc {
execution {
type : "coaster" # Use coasters to run on remote sites
URL : "login.osgconnect.net" # OSGConnect login node
jobManager : "ssh-cl:condor" # Use ssh-cl to connect, Condor is the Local resource manager
options {
maxJobs : 1 # Max jobs submitted to LRM
nodeGranularity : 1 # Nodes per job
maxNodesPerJob : 1 # Nodes per job
tasksPerNode : 1 # Tasks per Node
maxJobTime : "00:25:00" # Time requested per job
jobOptions.condor {
projectname : ${env.OSG_PROJECT} # Set the Project name of your allocation on OSG
# Using requirements directive, you can add any requirement attributes for condor. Eg:
# requirements : "(HAS_CVMFS_oasis_opensciencegrid_org =?= TRUE)"
}
}
}
staging : "local" # Stage files from "local" system to OSGConnect
workDirectory : "/tmp/"${env.OSG_USERNAME} # Location for intermediate files
maxParallelTasks : 101 # Maximum number of parallel tasks
initialParallelTasks: 100 # Maximum number of tasks at start
app.ALL { executable: "*" } # All tasks to be found from commandline
}
# Instructions for Amazon EC2
# 1. Set ec2CredentialsFile: "ABSOLUTE_PATH_TO_YOUR_AWS_CREDENTIALS_FILE"
# 2. Ensure that ec2KeypairFile points to a valid path
# 3. If you update ec2WorkerImage, ensure that it is a ubuntu image
site.aws {
execution {
type : "coaster" # Use coasters to run on remote sites
URL : "127.0.0.1" # We use a local service to connect to AWS
jobManager : "local:ec2-cloud" # ec2-cloud provider manages the cloud resources
options {
maxJobs : 1 # Max jobs requested from AWS
tasksPerNode : 2 # Tasks per Node
maxJobTime : "00:25:00" # Time requested per job
jobOptions {
ec2CredentialsFile: ${env.AWS_CREDENTIALS_FILE} # The credentials required to authenticate with AWS
ec2SecurityGroup : swift_security_group # This security group will be generated
ec2KeypairName : swift-test-pair # Key-pair to connect to nodes
ec2KeypairFile : ${env.HOME}/.ssh/swift-test-pair.pem # Path to create the key-pair
ec2WorkerImage : ami-23700813 # Amazon image id
ec2WorkerType : t1.micro # Instance type
}
}
}
staging : "local" # Stage files from "local" system to AWS
workDirectory : "/tmp/"${env.USER}"/work" # Location for intermediate files
maxParallelTasks : 20 # Maximum number of parallel tasks
initialParallelTasks: 20 # Maximum number of tasks at start
app.ALL { executable: "*" } # All tasks to be found from commandline
}
# Instructions for a pool of Ad-hoc machines
# Assume that you have access to N machines ( alpha.foo.net, beta.foo.net ... omega.foo.net )
# Replicate the site.ad-hoc-1 site definition block for each site you have access to.
# Set the site.<SITE_NAME> with say site.alpha, site.beta etc..
# Set the site definition for each site with the correct URL
# Set the first line of this file to a comma separated list of sites, eg:
# sites: [ alpha, beta, omega ]
# Instructions for Ad-hoc-1
# 1. Set URL : URL_OF_AD_HOC_MACHINE_1
# 2. Set workDirectory : "/tmp/USERNAME_ON_AD-HOC-1/work"
site.ad-hoc-1 {
execution {
type : "coaster" # Use coasters to run on remote sites
URL : ${env.URL_OF_AD_HOC_MACHINE_1} # URL of the remote machine to connect to
jobManager : "ssh-cl:local" # ssh-cl to connect to machine, run worker locally
options {
maxJobs : 2 # Max jobs sent to remote node
tasksPerNode : 10 # Tasks per Node
maxJobTime : "00:25:00" # Time requested per job
}
}
staging : "local" # Stage files from "local" system to ad-hoc-1
workDirectory : "/tmp/"${env.USER}"/work" # Location for intermediate files
maxParallelTasks : 20 # Maximum number of parallel tasks
initialParallelTasks: 20 # Maximum number of tasks at start
app.ALL { executable: "*" } # All tasks to be found from commandline
}
# Site definition for use at Legion UCL
# parallel environments have been disabled in config
# Use http://users.rcc.uchicago.edu/~yadunand/swift-trunk-sge-mod.tar.gz
site.Legion {
execution {
type: "coaster"
jobManager: "local:sge"
URL : "localhost"
options {
maxJobs: 5
nodeGranularity: 1
maxNodesPerJob: 1
tasksPerNode: 1
jobProject: "Training"
jobQueue: "Tarvek"
maxJobTime: "00:08:20"
}
}
initialParallelTasks : 5
staging: local
workDirectory: "/tmp/"${env.USER}
app.ALL {
executable: "*"
maxWallTime: "00:05:00"
}
}
# Instructions for running work on a remote cloud setup
# 1. Follow instructions here to setup a cluster :
# https://github.com/swift-lang/swift-on-cloud/tree/master/aws
# 2. Set the CLOUD_HEADNODE to point at persistent service
# running on the headnode of the cluster.
# The coaster service is usually running on port 50010.
site.remote-cloud {
execution {
type : "coaster-persistent"
URL : ${env.CLOUD_HEADNODE} # URL of the remote machine to connect to
jobManager : "local:local" # ssh-cl to connect to machine, run worker locally
options {
maxJobs : 2 # Max jobs sent to remote node
tasksPerNode : 10 # Tasks per Node
maxJobTime : "00:25:00" # Time requested per job
}
}
staging : "local" # Stage files from "local" system to ad-hoc-1
workDirectory : "/tmp/"${env.USER}"/work" # Location for intermediate files
maxParallelTasks : 20 # Maximum number of parallel tasks
initialParallelTasks: 20 # Maximum number of tasks at start
app.ALL { executable: "*" } # All tasks to be found from commandline
}
# Instructions for Blues
# 1. If you are running on the blues login, set jobManager: "local:pbs"
# 2. Set workDirectory : "/home/YOUR_USERNAME_ON_BLUES/swiftwork"
site.blues {
execution {
type : "coaster" # Use coasters to run on remote sites
URL : "blues.lcrc.anl.gov" # Blues login URL
jobManager: "ssh-cl:pbs" # use ssh-cl to connect, pbs is the Local Resource manager(LRM)
options {
maxJobs : 4 # Max jobs submitted to LRM
nodeGranularity : 1 # Nodes per job
maxNodesPerJob : 1 # Nodes per job
tasksPerNode : 4 # Tasks per Node
jobQueue : "route" # Select queue
maxJobTime : "00:25:00" # Time requested per job
}
}
staging : "local" # Stage files from "local" system to Beagle
workDirectory : "/home/"${env.BLUES_USERNAME}"/swiftwork" # Location for intermediate files
maxParallelTasks : 101 # Maximum number of parallel tasks
initialParallelTasks: 100 # Maximum number of tasks at start
app.ALL { executable: "*" } # All tasks to be found from commandline
}
# Instructions for swan:
# Note: Do not use mpp on swan
site.swan {
execution {
type: "coaster"
URL: "swan.cray.com"
jobManager: "local:pbs"
options {
nodeGranularity: 4
maxNodesPerJob: 4
maxJobs: 1
tasksPerNode: 24
jobOptions {
pbs.aprun: true # Submit jobs via aprun mechanism
pbs.mpp : false # Mpp enabled
ppn: 12
}
}
}
staging: direct
workDirectory: "/tmp/"${env.USER}"/swiftwork"
maxParallelTasks: 101
initialParallelTasks: 100
app.ALL { executable: "*" }
}
TCPPortRange: "50000,51000" # TCP port range used by swift to communicate with remote sites
lazyErrors: false # Swift fails immediately upon encountering an error
executionRetries: 0 # Set number of retries upon task failures
keepSiteDir: true # Keep Site Dir (useful for debug)
providerStagingPinSwiftFiles: false # Pin staging files (useful for debug)
alwaysTransferWrapperLog: true # Transfer wrapper logs (useful for debug)