Skip to content

Commit

Permalink
initial
Browse files Browse the repository at this point in the history
  • Loading branch information
espenfjo committed Nov 29, 2013
0 parents commit 033765f
Show file tree
Hide file tree
Showing 79 changed files with 15,264 additions and 0 deletions.
Empty file added SpidieFace/__init__.py
Empty file.
Binary file added SpidieFace/__init__.pyc
Binary file not shown.
394 changes: 394 additions & 0 deletions SpidieFace/prof.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,394 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Copyright (c) 2007, Dima Dogadaylo (www.mysoftparade.com)
# See also: http://www.mysoftparade.com/blog/django-profile-sql-performance/

import os
from os import path
import sys
from datetime import datetime
import re
import urlparse


_PATH_COLUMN = 'Path'

def profile(operations):
"""Do requested profile operations on each from requested apps."""
debug('profile', options)
import operator
urls = get_urls(options.depth)
print('Found %d urls.' % len(urls))
info('urls', urls)

for op in operations:
results = []
for url in urls:
res = op(url)
res[_PATH_COLUMN] = url
results += [res]
results.sort(key=operator.itemgetter(op.sort_key),\
reverse=getattr(op, 'reverse', False))
keys = results[0].keys()
keys.remove(_PATH_COLUMN)
keys.insert(0, _PATH_COLUMN) # ensure that path column is first
# build report table
report = [keys] + [[line[key] for key in keys] for line in results]
print_report(op.name, report)

def check_status_codes(depth=3, ignore_codes=(200,)):
"""Try to load all known pages and return dictionary of failed pages."""
failed_urls = {}
urls = get_urls(depth)
for url in urls:
resp = _internal_request(url)
if resp.status_code not in ignore_codes:
failed_urls[url] = {'code': resp.status_code}
if resp.status_code == 0:
failed_urls[url]['error'] = resp.content
return failed_urls

def props(**kwargs):
def wrapper(func):
def executor(*args, **kwargs):
output = func(*args, **kwargs)
return output
for key, value in kwargs.items():
setattr(executor, key, value)
return executor
return wrapper

@props(name='SQL queries usage', sort_key='SQL', reverse=True)
def profile_sql(url):
"""Find SQL queriers usage for each page"""
if options.verbosity:
print "profile_sql", url,
from django.conf import settings
old_debug = settings.DEBUG
settings.DEBUG = True
from django.db import connection
connection.queries = []
responce =_internal_request(url)
if options.verbosity:
print "%d SQL queries, status code: %s " %\
(len(connection.queries), responce.status_code)
if options.verbosity > 1:
for query in connection.queries:
print query['sql'], query['time']
settings.DEBUG = old_debug
return {'SQL': len(connection.queries), 'Status': responce.status_code}

@props(name='Page size', sort_key='Size, b', reverse=True)
def profile_size(url):
"""Find size of each page."""
if options.verbosity:
print "profile_size", url,
responce =_internal_request(url)
size = len(responce.content)
img = link = 0
if responce.status_code == 200:
img = len(re.findall(r'<img.*?>', responce.content))
link = len(re.findall(r'<link.*?>', responce.content))
if options.verbosity:
print ", size: %sKb, status code: %s " %\
(size, responce.status_code)
return {'Size, b': size, 'Status': responce.status_code,
'<img>': img, '<link>': link}

def print_report(name, report):
"""Print report"""
row_total = len(report)
col_total = len(report[0])
#calculate max length of each column
format = [reduce(max, [len(str(report[row][col])) for row in xrange(row_total)])\
for col in xrange(col_total)]
total_width = reduce(lambda x,y: x+y, format) + col_total - 1
format = " ".join(["%%%ds" % width for width in format])
print "*"*total_width
print name
print "*"*total_width
for row in report:
print format % tuple(row)
print

def get_urls(depth=3, apps=None):
urls = set(['/'])
if options.read_urls:
urls.update(get_predefined_pages(options.read_urls))
info("%s: current len of urls=%s" % (options.read_urls, len(urls)))
debug("\n---predefined urls", urls)
if depth > 0:
debug("\n---urls before get_model_urls()", urls)
urls.update(get_model_urls())
if depth > 1:
urls.update(get_base_urls(urls))
new_urls = urls
while depth > 2 and new_urls:
new_urls = get_urls_from_content(new_urls) - urls
urls.update(new_urls)
depth -= 1
info("depth", depth, "new_urls", len(new_urls))
if not options.all_urls:
urls = remove_dublicated_views(urls)
return list(urls)

def get_model_urls(apps = []):
import operator
from django.db.models import get_app, get_apps, get_models
debug("get_model_urls", apps)
# convert app labels to app modules
apps = [get_app(app_label) for app_label in apps] or get_apps()
# all models of all profiled apps
classes = reduce(operator.add, [get_models(app) for app in apps])
debug("all models:\n", classes)
# remove classes without get_absolute_url()
classes = [cls for cls in classes\
if hasattr(cls, 'get_absolute_url') and\
hasattr(cls.get_absolute_url, '__call__')]
debug("models with get_absolute_url():\n", classes)
urls = []
for cls in classes:
try:
if cls._default_manager.count():
url = cls._default_manager.filter()[0].get_absolute_url()
debug(cls, " -> ", url)
if url:
urls += [url]
except Exception, e:
error("Can't obtain url for %s: %s" % (cls, e))
debug("get_model_urls(): ", urls)
return urls

def is_valid_url(url):
from django.core.urlresolvers import resolve
try:
resolve(url)
return True
except:
return False

def get_base_urls(urls):
"""Returns also all valid parent urls for each url from urls"""
import re
debug("get_base_urls(): ", urls)
base_urls = set([re.sub(r"/[-\w\?=&%]+/?$", r"/", url) for url in urls])
debug('base_urls', base_urls)
base_urls = [url for url in base_urls if is_valid_url(url)]
debug('valid base_urls', base_urls)
return base_urls

def iter_page_urls(page, url):
""" Parse page and generate embedded urls.
>>> lines = ('<a href="/abs/url/"> <a name="name">',
... '<A class="klass" href="href"> <a\\nhref="new_line"> <a hRef="caSe">',
... '<a href="http://ext"> <a href="#anchor"> <a href=""></a>')
>>> page = "\\n".join(lines)
>>> [u for u in iter_page_urls(page, '/dir/')]
['/abs/url/', '/dir/href', '/dir/new_line', '/dir/caSe']
"""
for i in re.finditer(r'<a[^>]*?href="(?P<href>[^"]*?)".*?>', page, re.I):
href = i.group('href')
# bypass external urls, anchors and empty string
if not href or re.match('^(ftp|http[s]?)://.+|^#.*', href):
continue
if href and not href[0] == '/':
from urlparse import urljoin
href = urljoin(url, href)
yield href

def get_urls_from_content(urls):
"""Returns link to resources contained inside pages."""
debug('\nget_urls_from_content', urls)
hrefs = set([])
for url in urls:
responce = _internal_request(url)
if responce.status_code == 200:
for path in iter_page_urls(responce.content, url):
hrefs.add(path)
debug('hrefs', hrefs)
return hrefs

def remove_dublicated_views(hrefs):
"""Remove pages mapped to same view."""
from django.core.urlresolvers import resolve, Resolver404

resolvers = []
unique_urls = []
for href in hrefs:
try:
r = resolve(href)
if not r:
continue
except Resolver404:
continue
view, args, kwargs = r[0], list(r[1]), r[2]
# resolve() don't return url mapping name, and when generic views are used
# it's a problem, so we do this trick to find
# "really" different generic views
args = [arg for arg in args if arg not in href]
kwargs = dict([k, v] for k, v in kwargs.items()\
if not isinstance(v, basestring) or v not in href)
r = (view, args, kwargs)
if r not in resolvers:
resolvers += [r]
unique_urls += [href]
return unique_urls

def get_predefined_pages(fname):
if os.path.exists(fname) and os.path.isfile(fname):
f = None
try:
try:
f = open(fname, 'rb')
return [line.strip() for line in f]
except Exception, e:
sys.stderr.write("get_predefined_pages %s: %s" % (path, e))
finally:
close_file(f)
return []

def _internal_request(url):
"""Request page with internal Django client."""
from django.test.client import Client
# many code assume request.META['REMOTE_ADDR'] and etc
client = Client(REMOTE_ADDR="127.0.0.1", HTTP_HOST="localhost")
try:
resp = client.get(url)
except Exception, e:
error('url=%s error=%s' % (url, e))
resp = type('object', (), {'status_code':0, 'content': str(e)})

if resp.status_code in (500,) and options.save_errors:
save_page(resp.content, url, options.save_errors)
return resp


def save_page(page, url, dir):
fname = url2path(url, dir)
if not os.path.exists(os.path.dirname(fname)):
os.makedirs(os.path.dirname(fname))
write_file(fname, page)

def url2path(url, dir):
scheme, location, path, query, fragment = urlparse.urlsplit(url)
if not path or path.endswith('/'):
path += 'index.html'
return os.path.join(dir, "_".join(path.split('/')))

def error(*args):
for arg in args:
print >>sys.stderr, arg,
print >>sys.stderr

def info(*args):
if options.verbosity > 0:
for arg in args:
print arg,
print

def debug(*args):
if options.verbosity > 1:
for arg in args:
print arg,
print

def write_file(path, content, mode = "wb"):
"""Write content to file and retunr True is writing was sucessfull."""
f = None
try:
try:
f = open(path, mode)
f.write(content)
except Exception, e:
sys.stderr.write("write_file %s: %s" % (path, e))
return False
finally:
close_file(f)
return True

def close_file(f):
"""Close file and retunr True is file was closed."""
try:
if f:
f.close()
return True
except Exception, e:
sys.stderr.write("close_file %s: %s" % (f, e))
return False

PROFILERS = {
'sql': profile_sql,
'size': profile_size,
}

_default_options = {'verbosity': 0,
'read_urls': './profile-pages.txt',
'depth': 4, 'all_urls': False,
'test': False, 'save_errors': None}
# will be redefined if run from command line
options = type('DefaultOptions', (), _default_options)

_usage = """%prog [options] [app_name ...]"""

def execute_from_command_line(argv):
from optparse import OptionParser
parser = OptionParser(version='0.1', usage = _usage)
parser.set_defaults(**_default_options)
parser.add_option('--settings',
help='Python path to settings module, e.g. "myproject.settings.main". If this isn\'t provided, the DJANGO_SETTINGS_MODULE environment variable will be used.')
parser.add_option('--pythonpath',
help='Lets you manually add a directory the Python path, e.g. "/home/djangoprojects/myproject".')
parser.add_option('--verbosity', action='store', dest='verbosity',
type='choice', choices=['0', '1', '2'],
help='Verbosity level; 0=minimal output, 1=normal output, 2=all output')
parser.add_option('-a', '--all', action='store_true',\
dest='all_urls',
help='Profile all found urls without view checking.')
parser.add_option('--test', action='store_true',\
dest='test', help='Run django-profile doctests.')
parser.add_option('--check', action='store_true',\
dest='check', help='Check status codes and report broken pages.')
parser.add_option('--depth', action='store', dest='depth',\
help='Logical url searching depth; 0,1,2,3')
parser.add_option('--read_urls',
help='Optional file with paths of pages to profile. Default ./profile-pages.txt')
parser.add_option('--save_errors',
help='Directory to save pages with 500 status code and other errors. No default.')
global options
options, args = parser.parse_args(argv[1:])

if options.test:
import doctest
doctest.testmod()
return

try:
profilers = args and [PROFILERS[ind] for ind in args] or PROFILERS.values()
except Exception, e:
parser.error("Invalid indicators==%s:%s" % (args, e))
options.verbosity = int(options.verbosity)
options.depth = int(options.depth)
if options.settings:
os.environ['DJANGO_SETTINGS_MODULE'] = options.settings
if options.pythonpath:
sys.path.insert(0, options.pythonpath)
if not options.settings and not options.pythonpath:
# behave like a manage.py
try:
import settings # Assumed to be in the same directory.
except ImportError:
import sys
sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r.\nYou'll have to run django-profile.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__)
sys.exit(1)
from django.core.management import setup_environ
setup_environ(settings)

if options.check:
failed = check_status_codes()
for url, reason in failed.items():
print url, '->', reason
return
profile(profilers)

if __name__ == '__main__':
execute_from_command_line(sys.argv)
Loading

0 comments on commit 033765f

Please sign in to comment.