Skip to content

Commit

Permalink
references deep-compute#2, updated README and added some examples
Browse files Browse the repository at this point in the history
  • Loading branch information
prashanthellina committed Apr 10, 2016
1 parent 6f03a61 commit fa4442b
Show file tree
Hide file tree
Showing 7 changed files with 384 additions and 2 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,6 @@ target/

#Ipython Notebook
.ipynb_checkpoints

.*.swp
*.log
114 changes: 112 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,112 @@
# basescript
Basic infrastructure for writing scripts in Python
# Base Script

Python is an excellent language that makes writing scripts very straightforward. Over the course of writing many scripts, we realized that we were doing some things over and over like creating a logger and accepting command line arguments. Base script is a very simple abstraction that takes care of setting up logging and other basics so you can focus on your application specific logic.

Here are some facilities that Base Script offers:
- Logging
- Stats collection to StatsD/Graphite
- Accepting command-line arguments using argparse
- Employing gevent to enable co-operative multi tasking for easy IO
parallelism.

## Installation

``` bash
sudo pip install git+git://github.com/deep-compute/basescript.git
```

## Usage

Here is a simple example to get started

helloworld.py
```python
from basescript import BaseScript

class HelloWorld(BaseScript):
def run(self):
print "Hello world"

if __name__ == '__main__':
HelloWorld().run()
```

> NOTE: all examples showcased here are available under the `examples` directory
Run the above by doing:

```bash
python helloworld.py
```

Run script with log level set to DEBUG

```bash
python helloworld.py --log-level DEBUG
```

Run script with custom log file

```bash
python helloworld.py --log-level DEBUG --log mylog
```

The following is a more involved example

adder.py
```python
from basescript import BaseScript

class Adder(BaseScript):
# The following specifies the script description so that it be used
# as a part of the usage doc when --help option is used during running.
DESC = 'Adds numbers'

def init(self):
'''
We can put whatever script initialization we need for our script
over here. This is preferred to overriding __init__
'''
self.a = 10
self.b = 20

def define_args(self, parser):
parser.add_argument('c', type=int, help='Number to add')

def run(self):
self.log.info("Starting run of script ...")

print self.a + self.b + self.args.c

self.log.info("Script is done")

if __name__ == '__main__':
Adder().run()
```

Run the script as follows and observe the usage information shown. Note how the
description appears along with the `c` argument.
```
python adder.py --help
usage: adder.py [-h] [--name NAME] [--statsd-server STATSD_SERVER] [--log LOG]
[--log-level LOG_LEVEL] [--quiet]
c
Adds numbers
positional arguments:
c Number to add
optional arguments:
-h, --help show this help message and exit
--name NAME Name to identify this instance
--statsd-server STATSD_SERVER
Location of StatsD server to send statistics. Format
is ip[:port]. Eg: localhost, localhost:8125
--log LOG Name of log file
--log-level LOG_LEVEL
Logging level as picked from the logging module
--quiet
```
1 change: 1 addition & 0 deletions basescript/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from basescript import BaseScript
214 changes: 214 additions & 0 deletions basescript/basescript.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
from gevent import monkey; monkey.patch_all()

import sys
import gc
import time
import threading
import resource
import logging
import logging.handlers
import argparse
import socket

import gevent
import statsd

MAX_LOG_FILE_SIZE = 100 * 1024 * 1024 # 100MB

class StatsCollector(object):
STATS_FLUSH_INTERVAL = 1

def __init__(self, prefix, stats_loc):
self.cache = {}
self.gauge_cache = {}

self.stats = None
if not stats_loc: return

port = None
if ':' in stats_loc:
ip, port = stats_loc.split(':')
port = int(port)
else:
ip = stats_loc

S = statsd.StatsClient
self.stats = S(ip, port, prefix) if port is not None else S(ip, prefix=prefix)

def fn():
while 1:
time.sleep(self.STATS_FLUSH_INTERVAL)
self._collect_ramusage()
self.send()

self.stats_thread = gevent.spawn(fn)

def incr(self, key, n=1):
if self.stats is None: return
self.cache[key] = self.cache.get(key, 0) + n

def decr(self, key, n=1):
if self.stats is None: return
self.cache[key] = self.cache.get(key, 0) - n

def timing(self, key, ms):
if self.stats is None: return
return self.stats.timing(key, ms)

def gauge(self, key, n, delta=False):
if delta:
v, _ = self.gauge_cache.get(key, (0, True))
n += v
self.gauge_cache[key] = (n, delta)

def _collect_ramusage(self):
self.gauge('resource.maxrss',
resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

def send(self):
if self.stats is None: return
p = self.stats.pipeline()

for k, v in self.cache.iteritems():
p.incr(k, v)

for k, (v, d) in self.gauge_cache.iteritems():
p.gauge(k, v, delta=d)

p.send()
self.cache = {}
self.gauge_cache = {}


class BaseScript(object):
LOG_FORMATTER = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
DESC = 'Base script abstraction'

def __init__(self):
# argparse parser obj
self.parser = argparse.ArgumentParser(description=self.DESC)
self.define_baseargs(self.parser)
self.define_args(self.parser)
self.args = self.parser.parse_args()

self.hostname = socket.gethostname()

self.log = self.init_logger(self.args.log, self.args.log_level,\
quiet=self.args.quiet)

self.stats = self.create_stats()
self.log.debug('init: args=%s' % repr(self.args))

self.init()

@property
def name(self):
return '.'.join([x for x in (sys.argv[0].split('.')[0], self.args.name) if x])

def create_stats(self):
stats_prefix = '.'.join([x for x in (self.hostname, self.name) if x])
return StatsCollector(stats_prefix, self.args.statsd_server)

def init_logger(self, fname, log_level, quiet=False):
if not fname:
fname = '%s.log' % self.name

log = logging.getLogger('')

stderr_hdlr = logging.StreamHandler(sys.stderr)
rofile_hdlr = logging.handlers.RotatingFileHandler(fname,
maxBytes=MAX_LOG_FILE_SIZE, backupCount=10)
hdlrs = (stderr_hdlr, rofile_hdlr)

for hdlr in hdlrs:
hdlr.setFormatter(self.LOG_FORMATTER)
log.addHandler(hdlr)

log.addHandler(rofile_hdlr)
if not quiet: log.addHandler(stderr_hdlr)

log.setLevel(getattr(logging, log_level.upper()))

return log

def dump_stacks(self):
'''
Dumps the stack of all threads and greenlets. This function
is meant for debugging. Useful when a deadlock happens.
borrowed from: http://blog.ziade.org/2012/05/25/zmq-and-gevent-debugging-nightmares/
'''

dump = []

# threads
threads = dict([(th.ident, th.name)
for th in threading.enumerate()])

for thread, frame in sys._current_frames().items():
if thread not in threads: continue
dump.append('Thread 0x%x (%s)\n' % (thread, threads[thread]))
dump.append(''.join(traceback.format_stack(frame)))
dump.append('\n')

# greenlets
try:
from greenlet import greenlet
except ImportError:
return ''.join(dump)

# if greenlet is present, let's dump each greenlet stack
for ob in gc.get_objects():
if not isinstance(ob, greenlet):
continue
if not ob:
continue # not running anymore or not started
dump.append('Greenlet\n')
dump.append(''.join(traceback.format_stack(ob.gr_frame)))
dump.append('\n')

return ''.join(dump)

def define_baseargs(self, parser):
'''
Define basic command-line arguments required by the script.
@parser is a parser object created using the `argparse` module.
returns: None
'''
parser.add_argument('--name', default=None,
help='Name to identify this instance')
parser.add_argument('--statsd-server', default=None,
help='Location of StatsD server to send statistics. '
'Format is ip[:port]. Eg: localhost, localhost:8125')
parser.add_argument('--log', default=None,
help='Name of log file')
parser.add_argument('--log-level', default='WARNING',
help='Logging level as picked from the logging module')
parser.add_argument('--quiet', action='store_true')

def define_args(self, parser):
'''
Define script specific command-line arguments.
@parser is a parser object created using the `argparse` module.
You can add arguments using the `add_argument` of the parser object.
For more information, you can refer to the documentation of argparse
module.
returns: None
'''
pass

def init(self):
'''
Override this method to put any initialization logic for your script.
It is recommended that you use this instead of subclassing __init__.
'''
pass

def run(self):
'''
Override this method to define logic for the scripts functionality.
It is recommended that you use this instead of subclassing __init__.
'''
pass
27 changes: 27 additions & 0 deletions examples/adder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from basescript import BaseScript

class Adder(BaseScript):
# The following specifies the script description so that it be used
# as a part of the usage doc when --help option is used during running.
DESC = 'Adds numbers'

def init(self):
'''
We can put whatever script initialization we need for our script
over here. This is preferred to overriding __init__
'''
self.a = 10
self.b = 20

def define_args(self, parser):
parser.add_argument('c', type=int, help='Number to add')

def run(self):
self.log.info("Starting run of script ...")

print self.a + self.b + self.args.c

self.log.info("Script is done")

if __name__ == '__main__':
Adder().run()
8 changes: 8 additions & 0 deletions examples/helloworld.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from basescript import BaseScript

class HelloWorld(BaseScript):
def run(self):
print "Hello world"

if __name__ == '__main__':
HelloWorld().run()
19 changes: 19 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from setuptools import setup, find_packages

setup(
name="basescript",
version='0.1',
description="Basic infrastructure for writing scripts",
keywords='basescript',
author='Prashanth Ellina',
author_email="Use the github issues",
url="https://github.com/deep-compute/basescript",
license='MIT License',
install_requires=[
'gevent',
'statsd',
],
package_dir={'basescript': 'basescript'},
packages=find_packages('.'),
include_package_data=True
)

0 comments on commit fa4442b

Please sign in to comment.