Skip to content

Commit

Permalink
Now works with XL OAI-PMH server
Browse files Browse the repository at this point in the history
  • Loading branch information
marma committed May 2, 2017
1 parent 1a3668f commit 5402801
Show file tree
Hide file tree
Showing 13 changed files with 445 additions and 112 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ etc/export.properties
etc/config.properties
tmp/
etc/config_xl.properties
.idea
/.nb-gradle/
29 changes: 23 additions & 6 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ archivesBaseName = "export"
group = 'se.kb.libris'
version = '3.0.0-alpha'

sourceCompatibility = 1.6
targetCompatibility = 1.6
sourceCompatibility = 1.8
targetCompatibility = 1.8

dependencies {
def groovyVersion=GroovySystem.getVersion()
Expand All @@ -17,6 +17,8 @@ dependencies {
compile 'com.github.libris:jmarctools:90edfc74bcc006c2d45674cd67fbdc2db0964a97'
compile 'com.github.libris:isbntools:e70a5fbedb88bb2349de3ed5ff98e4475e8f7e1a'
compile group: 'com.ibm.icu', name: 'icu4j', version: '4.8.1.1'
compile group: 'org.dspace', name: 'xoai', version: '3.2.10'
compile fileTree(dir: 'libs', include: '*.jar')
}

allprojects {
Expand All @@ -25,20 +27,20 @@ allprojects {
options.compilerArgs << "-Xlint:unchecked" << "-Xlint:deprecation"
}
}
}

repositories {
repositories {
mavenCentral()
maven { url "http://repository.codehaus.org/" }
maven { url "https://jitpack.io" }
}
maven { url "https://jitpack.io" }
}


sourceSets {
main {
java { srcDir 'src/main/java' }
groovy { srcDir 'src/main/groovy' }
}

test {
groovy { srcDir 'src/test/groovy/' }
}
Expand Down Expand Up @@ -71,3 +73,18 @@ task list_changes (dependsOn: 'classes', type: JavaExec) {
main = 'ListChanges'
classpath = sourceSets.main.runtimeClasspath
}

task listen (dependsOn: 'classes', type: JavaExec) {
Set a = []

if (project.hasProperty("options")) a.addAll(project.getProperty("options").split());
if (project.hasProperty("profile")) a.add(project.getProperty("profile"));
if (project.hasProperty("config")) a.add(project.getProperty("config"));

args(a)

standardInput = System.in
main = 'se.kb.libris.export.listener.OaiPmhListener'
classpath = sourceSets.main.runtimeClasspath
}

3 changes: 3 additions & 0 deletions etc/config.properties.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
OaiPmhBaseUrl="http://data.libris.kb.se"
AuthBaseUrl="http://data.libris.kb.se/auth/oaipmh"
BibBaseUrl="http://data.libris.kb.se/bib/oaipmh"
HoldBsaeUrl="http://data.libris.kb.se/hold/oaipmh"
User=""
Password=""
Binary file added libs/oaij-0.1.jar
Binary file not shown.
98 changes: 0 additions & 98 deletions pom.xml

This file was deleted.

1 change: 1 addition & 0 deletions src/main/groovy/GetRecords.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,4 @@ System.in.eachLine() { line ->
}

writer.close()

16 changes: 14 additions & 2 deletions src/main/groovy/GetRecords_xl.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,13 @@ def get(url) {
conn.setRequestProperty( "Authorization", "Basic ${authString}" )
}

//System.err.println("URL: " + url)

return conn.content.text
}

def getRecord(id) {
id = java.net.URLEncoder.encode(id, "UTF-8")
def url = "${config.OaiPmhBaseUrl}?verb=GetRecord&metadataPrefix=marcxml_includehold&identifier=${id}"
return xml = new XmlSlurper(false, false).parseText(get(url)).GetRecord.record
}
Expand All @@ -39,7 +42,6 @@ def getMerged(bib_id) {
return []
}

def auth_ids = record.about.authority.collect { x -> "${config.URIBase}" + x.@id.toString() }
def bib = MarcXmlRecordReader.fromXml(toXml(record.metadata.record))

// filter out license or e-record?
Expand All @@ -51,10 +53,20 @@ def getMerged(bib_id) {
}

// Step 2 - find and get authority records
// @TODO replace with oneliner ...
def auth_ids = []
record.metadata.record.datafield.subfield.each {
if (it.@code.text().equals("0") && (it.text().startsWith("https://id.kb.se/") || it.text().startsWith("https://libris.kb.se/"))) {
auth_ids.add(it.text())
}
}

def auths = new HashSet<MarcRecord>()
if (!profile.getProperty("authtype", "NONE").equalsIgnoreCase("NONE")) {
auth_ids.each { auth_id ->
auths.add(MarcXmlRecordReader.fromXml(toXml(getRecord(auth_id).metadata.record)))
getRecord(auth_id).metadata.record.each {
auths.add(MarcXmlRecordReader.fromXml(toXml(it)))
}
}
}

Expand Down
18 changes: 12 additions & 6 deletions src/main/groovy/ListChanges.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -160,14 +160,20 @@ def listHoldIdentifiers(from, until) {
return ret
}

def getChangedRecords(from, until) {
def ids = new TreeSet<Integer>()

ids.addAll(listBibIdentifiers(from, until))
ids.addAll(listAuthIdentifiers(from, until))
ids.addAll(listHoldIdentifiers(from, until))

return ids
}

def from = args[0], until = args.size()==2? args[1]:"2050-01-01T00:00:00Z"
def ids = new TreeSet<Integer>()

System.err.println "DEBUG: from:${from} until:${until}"

ids.addAll(listBibIdentifiers(from, until))
ids.addAll(listAuthIdentifiers(from, until))
ids.addAll(listHoldIdentifiers(from, until))

for (Integer id: ids)
for (Integer id: getChangedRecords(from, until))
println id

109 changes: 109 additions & 0 deletions src/main/java/se/kb/libris/export/listener/AuthThread.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package se.kb.libris.export.listener;

import java.io.*;
import java.util.*;
import java.util.concurrent.*;
import java.util.logging.*;
import oaij.client.*;

class AuthThread extends Thread {
OaiPmhClient client = null;
Properties configProperties, exportProperties;
String timestampFile, latestDatestamp = null;
BlockingDeque<String> bdq;
boolean run;

AuthThread(Properties configProperties, Properties exportProperties, String timestampFile, BlockingDeque<String> bdq) {
this.configProperties = configProperties;
this.exportProperties = exportProperties;
this.timestampFile = timestampFile;
this.bdq = bdq;
run = true;

client = new OaiPmhClient(configProperties.getProperty("BibBaseUrl"));
if (!configProperties.getProperty("User", "").equals("")) {
client.withCredentials(
configProperties.getProperty("User"),
configProperties.getProperty("Password"));
}
}

public String getLatestDatestamp() throws IOException {
if (latestDatestamp != null) return latestDatestamp;

if (timestampFile != null) {
File f = new File(timestampFile);

if (f.exists()) {
try (Scanner s = new Scanner(f)) {
s.useDelimiter("\\Z");
latestDatestamp = s.next();
Logger.getGlobal().log(Level.INFO, "Read latest datestamp from file (" + latestDatestamp + ")");
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
}
} else {
setLatestDatestamp(client.identify().getResponseDate().asString());
}

return latestDatestamp;
}

public void setLatestDatestamp(String latestDatestamp) throws IOException {
if (timestampFile != null) {
FileWriter fw = new FileWriter(timestampFile);
fw.write(latestDatestamp);
fw.close();
}

this.latestDatestamp = latestDatestamp;

//Logger.getGlobal().log(Level.CONFIG, "latestDatestamp is now " + latestDatestamp);
}

@Override
public void run() {
Logger.getGlobal().log(Level.INFO, "AuthThread enter");

ListIdentifiers li = null;
int sleep = 1000;
while (run) {
try {
if (li != null && li.hasNext()) {
Identifier id = li.next();

// mask out identifiers with datestamp >= responseDate
if (li.getResponseDate().compareTo(id.getDatestamp()) > 0) {
Logger.getGlobal().log(Level.INFO, "Adding URI " + id.getIdentifier());
bdq.offer(id.getIdentifier());
}
} else {
try {
Thread.sleep(5000);
li = client.listIdentifiers("marcxml").withFrom(getLatestDatestamp());
setLatestDatestamp(li.getResponseDate().asString());
} catch (InterruptedException e) {
run = false;
throw new RuntimeException(e);
}
}

sleep = 1000;
} catch (Exception e) {
if (run) {
Logger.getGlobal().log(Level.WARNING, "Exception in main loop, retry in " + sleep + " msecs.", e);
try {
if (sleep < 5*60*1000) sleep *= 2;
Thread.sleep(sleep);
} catch (InterruptedException ex) {
}
}
}
}

Logger.getGlobal().log(Level.INFO, "AuthThread exit");

client.close();
}
}
Loading

0 comments on commit 5402801

Please sign in to comment.