Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check out our changes see if you like 'em. #2

Closed
wants to merge 33 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
7569a4b
Retry opens on 409
adam-vessey Sep 15, 2011
f0b113c
Insert a small pause...
adam-vessey Sep 19, 2011
606dcca
Get rid of setup script...
adam-vessey Sep 20, 2011
10ad1bf
Ignore .pyc files...
adam-vessey Sep 20, 2011
eb6d74a
Ignore .pyc files...
adam-vessey Sep 20, 2011
219285c
Changed the connection class to default to non-persistent. Also am c…
willtp87 Oct 3, 2011
8c7e1ae
Revert "Get rid of setup script..."
adam-vessey Oct 3, 2011
7acd747
Add a bit of logging to the error handling.
adam-vessey Oct 11, 2011
94228a0
Fix error
adam-vessey Oct 20, 2011
b854bc4
Change logic in getDatastreamProfile slightly, so as to avoid KeyError
adam-vessey Oct 20, 2011
3da82ac
Merge remote branch 'origin/master'
adam-vessey Oct 20, 2011
56e5a7e
Fixed bug with APIA auth
Nov 9, 2011
c903182
fixed getNextPid in fedora 3.5
willtp87 Mar 30, 2012
136ff20
added compatiblity with risearch, fedora now needs credentials, still…
willtp87 May 10, 2012
23009e9
tested pathc in Fedora 3.4
willtp87 May 11, 2012
6a0550d
unlimited queries in query tripples
willtp87 Jun 13, 2012
499d876
Merge pull request #1 from discoverygarden/unlimit_query_limit
willtp87 Jun 13, 2012
816a8e4
not trying to encode in ascii
willtp87 Jun 19, 2012
a4da8c3
unicode v1
willtp87 Jun 19, 2012
346ca4d
Merge pull request #2 from discoverygarden/unicode_url_params
willtp87 Jun 19, 2012
be8ac3e
no longer permanently changing the headers after an ri call
willtp87 Jul 11, 2012
53c8e20
Merge pull request #3 from willtp87/master
willtp87 Jul 11, 2012
d4db861
Fix checksumType property.
adam-vessey Sep 14, 2012
dda1d89
Avoid race condition
adam-vessey Sep 17, 2012
ca8b480
Merge branch 'master' of github.com:discoverygarden/fcrepo
adam-vessey Sep 17, 2012
bdd737d
Change logging level when listing DSs fails
adam-vessey Sep 17, 2012
1fc356c
Merge pull request #4 from adam-vessey/master
Sep 17, 2012
fc0700b
We now have a workaround when uploading zero length datasteams to Fed…
willtp87 Nov 23, 2012
9a9e7e6
Merge pull request #5 from willtp87/master
Nov 23, 2012
c55b45e
unicode support in RI queries
willtp87 Nov 26, 2012
bd8a908
Merge pull request #6 from willtp87/master
willtp87 Nov 26, 2012
cc45938
incompatibilities noted
willtp87 May 5, 2014
fc3b703
Merge pull request #9 from willtp87/master
jordandukart May 5, 2014
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.pyc
3 changes: 2 additions & 1 deletion src/fcrepo/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ complete REST API.
On top of that a more highlevel abstraction is written, which will be
demonstrated in this `doctest`_.
This package has been written for FedoraCommons 3.3 and 3.4, it has not been
tested with older versions. REST API documentation can be found in the
tested with older versions. It has been tested with newer Fedoras and will not
work with fedora >= 3.6 due to WADL issues. REST API documentation can be found in the
`Fedora wiki`_.


Expand Down
86 changes: 56 additions & 30 deletions src/fcrepo/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import urllib
from collections import defaultdict

from copy import copy

from lxml import etree
from lxml.builder import ElementMaker

Expand All @@ -24,12 +26,21 @@ def getNextPID(self, namespace, numPIDs=1, format=u'text/xml'):
format=format)
xml = response.read()
response.close()
'''
The following use of default namespace is to make this code work in 3.4 and 3.5 where the namespacing is different
'''
doc = etree.fromstring(xml)
ids = [id.decode('utf8') for id in doc.xpath('/pidList/pid/text()')]
fake_namespace_dictionary = {}
try:
fake_namespace_dictionary['default'] = doc.nsmap[None]
ids = [id.decode('utf8') for id in doc.xpath('/default:pidList/default:pid/text()', namespaces = fake_namespace_dictionary)]
except KeyError:
ids = [id.decode('utf8') for id in doc.xpath('/pidList/pid/text()')]

if len(ids) == 1:
return ids[0]
return ids

def createObject(self, pid, label, state=u'A'):
foxml = ElementMaker(namespace=NSMAP['foxml'], nsmap=NSMAP)
foxml_state = {'A': u'Active',
Expand Down Expand Up @@ -133,30 +144,33 @@ def getDatastreamProfile(self, pid, dsid):
response.close()
doc = etree.fromstring(xml)
result = {}
tags = {
'dsLabel': 'label',
'dsVerionId': 'versionId',
'dsCreateDate': 'createdDate',
'dsState': 'state',
'dsMIME': 'mimeType',
'dsFormatURI': 'formatURI',
'dsControlGroup': 'controlGroup',
'dsSize': 'size',
'dsVersionable': 'versionable',
'dsInfoType': 'infoType',
'dsLocation': 'location',
'dsLocationType': 'locationType',
'dsChecksum': 'checksum',
'dsChecksumType': 'checksumType'
}

for child in doc:
# rename elementnames to match property names in foxml
# the xml data is namespaced in 3.4, but not in 3.3, so strip out
# the namespace, to be compatible with both
name = {'dsLabel': 'label',
'dsVerionId': 'versionId',
'dsCreateDate': 'createdDate',
'dsState': 'state',
'dsMIME': 'mimeType',
'dsFormatURI': 'formatURI',
'dsControlGroup': 'controlGroup',
'dsSize': 'size',
'dsVersionable': 'versionable',
'dsInfoType': 'infoType',
'dsLocation': 'location',
'dsLocationType': 'locationType',
'dsChecksum': 'checksum',
'dsChecksumType': 'checksumType'}.get(child.tag.split('}')[-1])
if name is None or child.text is None:
continue
value = child.text
if not isinstance(value, unicode):
value = value.decode('utf8')
result[name] = value
name = tags.get(child.tag.rpartition('}')[2], None)
if name is not None:
value = child.text
if value and not isinstance(value, unicode):
value = value.decode('utf8')
result[name] = value
return result

def modifyDatastream(self, pid, dsid, body='', **params):
Expand Down Expand Up @@ -252,15 +266,27 @@ def searchObjects(self, query, fields, terms=False, maxResults=10):

def searchTriples(self, query, lang='sparql', format='Sparql',
limit=100, type='tuples', dt='on', flush=True):

flush = str(flush).lower()
url = u'/risearch?%s' % urllib.urlencode({'query':query,
'lang':lang,
'flush': flush,
'format':format,
'limit':limit,
'type':type,
'dt':dt})
headers = {'Accept:': 'text/xml'}
URL_pramaters = {'query':query,
'lang':lang,
'flush': flush,
'format':format,
'type':type,
'dt':dt}

#conditionaly set limit if there is one. (so it can be set to None)
if limit:
URL_pramaters['limit'] = limit

# Encode in utf8 to let unicode pass through urlencode
for key in URL_pramaters:
URL_pramaters[key] = URL_pramaters[key].encode('utf8')

url = u'/risearch?%s' % urllib.urlencode(URL_pramaters)
#Fedora started needing authentication in 3.5 for RI, tested in 3.4 as well
headers = copy(self.api.connection.form_headers)
headers['Accept:'] = 'text/xml'
response = self.api.connection.open(url, '', headers, method='POST')
xml = response.read()
doc = etree.fromstring(xml)
Expand Down
55 changes: 45 additions & 10 deletions src/fcrepo/connection.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
# Copyright (c) 2010 Infrae / Technical University Delft. All rights reserved.
# See also LICENSE.txt
import StringIO
import socket
import httplib
import urlparse
import logging
import StringIO, socket, httplib, urlparse, logging
from time import sleep
from copy import copy

class APIException(Exception):
""" An exception in the general usage of the API """
Expand Down Expand Up @@ -33,7 +31,7 @@ class Connection(object):
"""
def __init__(self, url, debug=False,
username=None, password=None,
persistent=True):
persistent=False):
"""
url -- URI pointing to the Fedora server. eg.

Expand Down Expand Up @@ -65,18 +63,46 @@ def __init__(self, url, debug=False,
def close(self):
self.conn.close()

def open(self, url, body='', headers=None, method='GET',):
def open(self, url, body='', headers=None, method='GET'):
if headers is None:
headers = {}
http_headers = {}
else:
# Copy because in an edge case later we may alter the headers.
http_headers = copy(headers)
if url.startswith('/'):
url = url[1:]
url = '%s/%s' % (self.path, url)

# Fedora doesn't like a zero length message body when ingesting a datastream.
if body == '' and (method == 'PUT' or method == 'POST') and 'datastreams/' in url:
logging.debug('Body empty for HTTP request using'
' fake form for datastream ingest.')

# Establish the mime type and boundary.
parsed = urlparse.urlparse(url)
mime_type = urlparse.parse_qs(parsed.query)['mimeType']
if not mime_type:
mime_type = 'application/octet-stream'
boundary = '----------ThIs_Is_tHe_bouNdaRY_$'

# Build message body.
body = (('--{0}{1}Content-Disposition: form-data;'
' name="file"; filename="IGNORE THE HACK."{1}'
'Content-Type: {2}{1}{1}{1}--{0}--{1}').format(boundary, '\r\n', mime_type))

# Change headers for new content type.
http_headers.update({
'User-Agent': 'INSERT USERAGENTNAME',
'Content-Type': 'multipart/form-data; boundary=%s' % boundary
})

# Send out the request.
attempts = 3

while attempts:
try:
logging.debug('Trying %s on %s' % (method, url))
self.conn.request(method, url, body, headers)
# We can't have unicode characters floating around in the body.
self.conn.request(method, url, body, http_headers)
return check_response_status(self.conn.getresponse())
except (socket.error,
httplib.ImproperConnectionState,
Expand All @@ -89,6 +115,15 @@ def open(self, url, body='', headers=None, method='GET',):
attempts -= 1
if not attempts:
raise
except FedoraConnectionException as e:
attempts -= 1
if not attempts or e.httpcode not in [409]:
logging.exception('Got HTTP code %s in open... Failure.' % e.httpcode)
raise e
else:
logging.exception('Got HTTP code %s in open... Retrying...' % e.httpcode)
self._reconnect()
sleep(5)
if not self.persistent:
self.close()

Expand Down
7 changes: 3 additions & 4 deletions src/fcrepo/datastream.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,7 @@ def _setProperty(self, name, value):
state = property(lambda self: self._info['state'],
lambda self, value: self._setProperty('state',
value))
checksumType = property(lambda self: self._info['checksumType'],
lambda self, value: self._setProperty('checksumType',
value))

versionId = property(lambda self: self._info['versionId'],
lambda self, value: self._setProperty('versionId',
value))
Expand All @@ -97,7 +95,8 @@ def _setProperty(self, name, value):
createdDate = property(lambda self: self._info['createdDate'])
controlGroup = property(lambda self: self._info['controlGroup'])
size = typedproperty(lambda self: self._info['size'], pytype=int)
checksum = property(lambda self: self._info['formatURI'])
checksum = property(lambda self: self._info['checksum'])
checksumType = property(lambda self: self._info['checksumType'])



Expand Down
20 changes: 19 additions & 1 deletion src/fcrepo/object.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
# See also LICENSE.txt

from fcrepo.datastream import FedoraDatastream, RELSEXTDatastream, DCDatastream
from fcrepo.connection import FedoraConnectionException
import logging

logger = logging.getLogger('fcrepo.object.FedoraObject')
class FedoraObject(object):
def __init__(self, pid, client):
self.pid = pid
Expand Down Expand Up @@ -31,7 +34,22 @@ def _setProperty(self, name, value):

def datastreams(self):
if self._dsids is None:
self._dsids = self.client.listDatastreams(self.pid)
'''
XXX: Wrapped in try/except, as it can cause race condition issues when
attempting to use the 'in' operator, when trying to determine if an
object contains a certain datastream.

Race condition:
1. We get the object (profile) as a FedoraObject
2. Another system purges the object
3. We got the object, so we try to enumate the DSs via the 'in'
operator, and fail.
'''
try:
self._dsids = self.client.listDatastreams(self.pid)
except FedoraConnectionException:
self._dsids = []
logger.debug('Unable to get the list of datastream! (perhaps the object has been purged?) Returning an empty list.')
return self._dsids

def __iter__(self):
Expand Down
8 changes: 7 additions & 1 deletion src/fcrepo/wadl.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ def submit(self, body='', **params):
for param, value in self.undocumented_params.items():
if not param in qs:
qs[param] = value

#change to utf8 to let unicode pass through urlencode
for key in qs:
qs[key] = qs[key].encode('utf8')

if qs:
self.url = '%s?%s' % (self.url, urllib.urlencode(qs))

Expand All @@ -82,7 +87,8 @@ def submit(self, body='', **params):
class API(object):
def __init__(self, connection):
self.connection = connection
fp = self.connection.open('/objects/application.wadl')
# hack for APIA auth
fp = self.connection.open('/objects/application.wadl', headers=self.connection.form_headers)
wadl_xml = fp.read()
fp.close()
self.doc = etree.fromstring(wadl_xml)
Expand Down