Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
rubinsztajn committed Nov 17, 2010
0 parents commit a952c8b
Show file tree
Hide file tree
Showing 7 changed files with 225 additions and 0 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# mods-search

A Web-based tool for searching MODS records created for the
W. E. B. Du Bois Digitization Project.

Uses the [web.py](http://webpy.org) framework.


98 changes: 98 additions & 0 deletions ingest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import apsw, os, glob, sys, fnmatch, time
from lxml import etree
from optparse import OptionParser

usage = "usage: %prog [options] arg1"
parser = OptionParser(usage=usage)

# Set options
parser.add_option("-a", "--add-only", action="store_true", dest="add")
parser.add_option("-u", "--add-and-update", action="store_true", dest="update")

(options, args) = parser.parse_args()

ns = {'mods':'http://www.loc.gov/mods/v3'}

conn = apsw.Connection('r:/aaron/db/mods.db')
c = conn.cursor()
i = 0
u = 0

def name2text(names):
if len(names) == 1:
fullname = names[0].text
elif len(names) > 1:
fullname = names[0].text + ', ' + names[1].text
else:
fullname = ''

return fullname


print time.strftime("%Y-%m-%d")

for path, dirs, files in os.walk(args[0]):
for filename in files:
if (fnmatch.fnmatch(filename, '*.xml')) and (filename[18:21] != 'tei'):
filepath = os.path.join(path, filename)
stats = os.stat(filepath)
modtime = time.strftime("%Y%m%d:%H:%m:%S", time.localtime(stats.st_mtime))
id, ext = os.path.splitext(filename)
print "Checking:", id
result = c.execute("select record_id, lastmod from fulltext where record_id match ?", (id,))
r = result.fetchall()
if r:
if r[0][1] != modtime:
r = 'update'

if not r or r == 'update':

tree = etree.parse(filepath)
full = etree.tostring(tree, pretty_print=True)
title = tree.xpath('//mods:titleInfo/mods:title', namespaces=ns)
creator = tree.xpath('//mods:roleTerm[text()="creator"]', namespaces=ns)
creatorname = ''
othernames = ''
if creator:
name = creator[0].getparent().getparent()
creatorname = name2text(name)

names = tree.xpath('//mods:name', namespaces=ns)

if names:
for name in names:
othernames += name2text(name) + ' '

abstract = tree.xpath('//mods:abstract', namespaces=ns)

if title:
t = title[0].text
else:
t = ''

if abstract:
a = abstract[0].text
else:
a = ''

values = (id, modtime, t, creatorname, othernames, a, full)
if (r == 'update') and (options.update) :
c.execute("update fulltext set lastmod=? where record_id=?", (values[1], values[0]))
c.execute("update fulltext set title=? where record_id=?", (values[2], values[0]))
c.execute("update fulltext set creator=? where record_id=?", (values[3], values[0]))
c.execute("update fulltext set names=? where record_id=?", (values[4], values[0]))
c.execute("update fulltext set abs=? where record_id=?", (values[5], values[0]))
c.execute("update fulltext set full=? where record_id=?", (values[6], values[0]))
print "Updated:", values[0]
u += 1
elif (r != 'update') and (options.add or options.update):
c.execute('insert into fulltext values (?,?,?,?,?,?,?)', values)
print "Added:", values[0]
i += 1

print ''
print i, "total records added to the database"
print u, "total records updated"



45 changes: 45 additions & 0 deletions mods-search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import web, apsw

urls = (
'/', 'Index',
'/(.+?)', 'Record'
)

app = web.application(urls, globals())
render = web.template.render('templates/')
conn = apsw.Connection('//umalstf2/spec_collections/aaron/db/mods.db')
c = conn.cursor()

class Index:
def GET(self):

i = web.input()
q = i.get('q')
f = i.get('field')
results = None

if q:
if f == 'full':
rows = c.execute("select record_id, title, creator from fulltext where full match ?", (q,))
elif f == 'title':
rows = c.execute("select record_id, title, creator from fulltext where title match ?", (q,))
elif f == 'creator':
rows = c.execute("select record_id, title, creator from fulltext where creator match ?", (q,))
elif f == 'names':
rows = c.execute("select record_id, title, creator from fulltext where names match ?", (q,))
elif f == 'abs':
rows = c.execute("select record_id, title, creator from fulltext where abs match ?", (q,))
results = rows.fetchall()

return render.index(results, q)

class Record:
def GET(self, id):
row = c.execute("select full from fulltext where record_id match ?", (id,))
result = row.fetchall()
record = result[0][0]
web.header('Content-Type', 'text/xml')
return render.record(record)


if __name__ == "__main__": app.run()
Binary file added templates/images/Thumbs.db
Binary file not shown.
Binary file added templates/images/dubois_head_icon.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
72 changes: 72 additions & 0 deletions templates/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
$def with (results, q)

<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>Search MODS</title>
<link rel="stylesheet" type="text/css" href="http://people.umass.edu/arubinst/mods-search/js/tablesorter/themes/blue/style.css" />
<link rel="stylesheet" type="text/css" href="http://people.umass.edu/arubinst/mods-search/styles/screen.css" />
<script type="text/javascript" src="http://people.umass.edu/arubinst/mods-search/js/jquery.js"></script>
<script type="text/javascript" src="http://people.umass.edu/arubinst/mods-search/js/tablesorter/jquery.tablesorter.js"></script>
<script type="text/javascript">
$$(document).ready(function() {
$$("#resultlist").tablesorter( {sortList: [[0,0]]});
});
</script>

</head>
<body>

<div id="header">
<a href="http://localhost:8080"><img src="http://people.umass.edu/arubinst/mods-search/images/dubois_head_icon.png" style="float:left;" border="0" /></a>
<div id="searchbox">
<form name="input" method="GET">
<input class="search" type="text" name="q" /><br />
<select class="select" name="field">
<option value="full">Keyword</option>
<option value="title">Title</option>
<option value="abs">Abstract</option>
<option value="creator">Creator</option>
<option value="names">Names</option>
</select>
<p><input class="button" type="submit" value="Go" /></p>
</form>
</div>
</div>



$if q:
<div id="results">
$if results:

<p class="message" style="font-size:1.0em;">You searched for <span style="font-size:1.3em;">$q</span> and found <span style="font-size:1.3em;">$len(results)</span> records</p>

<table id="resultlist" class="tablesorter">
<thead>
<tr>
<th>Item Number</th>
<th>Title</th>
<th>Creator</th>
</tr>
</thead>
<tbody>
$for record in results:
<tr>
<td><a href="/$record[0]">$record[0]</a></td>
<td>$record[1]</td>
<td>$record[2]</td>
</tr>

</tbody>
</table>

$else:

<p class="message">Sorry, nothing doing... Try somethin' else</p>

</div>

</body>
</html>
2 changes: 2 additions & 0 deletions templates/record.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
$def with (record)
$:record

0 comments on commit a952c8b

Please sign in to comment.