-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
mapping subjects #23
- Loading branch information
Showing
5 changed files
with
140 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
SEPARATOR = '—'.freeze | ||
|
||
# for the hierarchical subject/genre display | ||
# split with em dash along v,x,y,z | ||
# optional vocabulary argument for whitelisting subfield $2 vocabularies | ||
def process_hierarchy(record, fields, vocabulary = []) | ||
subjects = [] | ||
split_on_subfield = %w[v x y z] | ||
Traject::MarcExtractor.cached(fields).collect_matching_lines(record) do |field, spec, extractor| | ||
subject = extractor.collect_subfields(field, spec).first | ||
include_subject = vocabulary.empty? # always include the subject if a vocabulary is not specified | ||
unless subject.nil? | ||
field.subfields.each do |s_field| | ||
# when specified, only include subject if it is part of the vocabulary | ||
include_subject = vocabulary.include?(s_field.value) if s_field.code == '2' && !vocabulary.empty? | ||
subject = subject.gsub(" #{s_field.value}", "#{SEPARATOR}#{s_field.value}") if split_on_subfield.include?(s_field.code) | ||
end | ||
subject = subject.split(SEPARATOR) | ||
subject = subject.map { |s| Traject::Macros::Marc21.trim_punctuation(s) }.join(SEPARATOR) | ||
subjects << subject if include_subject | ||
end | ||
end | ||
subjects | ||
end | ||
|
||
# for the split subject facet | ||
# split with em dash along v,x,y,z | ||
def process_subject_topic_facet(record, fields) | ||
subjects = [] | ||
split_on_subfield = %w[v x y z] | ||
Traject::MarcExtractor.cached(fields).collect_matching_lines(record) do |field, spec, extractor| | ||
subject = extractor.collect_subfields(field, spec).first | ||
unless subject.nil? | ||
field.subfields.each do |s_field| | ||
subject = subject.gsub(" #{s_field.value}", "#{SEPARATOR}#{s_field.value}") if split_on_subfield.include?(s_field.code) | ||
end | ||
subject = subject.split(SEPARATOR) | ||
subjects << subject.map { |s| Traject::Macros::Marc21.trim_punctuation(s) } | ||
end | ||
end | ||
subjects.flatten | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# frozen_string_literal: true | ||
|
||
require_relative '../../../lib/traject/psulib_marc' | ||
|
||
RSpec.describe 'From psulib_marc.rb' do | ||
describe 'process_hierarchy function' do | ||
before(:all) do | ||
@s610 = { '600' => { 'ind1' => '', 'ind2' => '5', 'subfields' => [{ 'a' => 'Exclude' }] } } | ||
@s600 = { '600' => { 'ind1' => '', 'ind2' => '0', 'subfields' => [{ 'a' => 'John.' }, { 't' => 'Title.' }, { 'v' => 'split genre' }, { 'd' => '2015' }, { '2' => 'special' }] } } | ||
@s630 = { '630' => { 'ind1' => '', 'ind2' => '0', 'subfields' => [{ 'x' => 'Fiction' }, { 'y' => '1492' }, { 'z' => "don't ignore" }, { 't' => 'TITLE.' }] } } | ||
@sample_marc = MARC::Record.new_from_hash('fields' => [@s610, @s600, @s630]) | ||
@subjects = process_hierarchy(@sample_marc, '600|*0|abcdfklmnopqrtvxyz:630|*0|adfgklmnoprstvxyz') | ||
@vocab_subjects = process_hierarchy(@sample_marc, '600|*0|abcdfklmnopqrtvxyz:630|*0|adfgklmnoprstvxyz', ['vocab']) | ||
@special_subjects = process_hierarchy(@sample_marc, '600|*0|abcdfklmnopqrtvxyz:630|*0|adfgklmnoprstvxyz', ['special']) | ||
end | ||
describe 'when an optional vocabulary limit is not provided' do | ||
it 'excludes subjects without 0 in the 2nd indicator' do | ||
expect(@subjects).not_to include('Exclude') | ||
expect(@subjects).not_to include('Also Exclude') | ||
end | ||
|
||
it 'only separates v,x,y,z with em dash, strips punctuation' do | ||
expect(@subjects).to include("John. Title#{SEPARATOR}split genre 2015") | ||
expect(@subjects).to include("Fiction#{SEPARATOR}1492#{SEPARATOR}don't ignore TITLE") | ||
end | ||
end | ||
|
||
describe 'when a vocabulary limit is provided' do | ||
it 'excludes headings missing a subfield 2 or part of a different vocab' do | ||
expect(@vocab_subjects).to eq [] | ||
end | ||
it 'only includes the heading from a matching subfield 2 value' do | ||
expect(@special_subjects).to eq ["John. Title#{SEPARATOR}split genre 2015"] | ||
end | ||
end | ||
end | ||
|
||
describe 'process_subject_topic_facet function' do | ||
before(:all) do | ||
@s600 = { '600' => { 'ind1' => '', 'ind2' => '0', 'subfields' => [{ 'a' => 'John.' }, { 'x' => 'Join' }, { 't' => 'Title' }, { 'd' => '2015' }] } } | ||
@s630 = { '630' => { 'ind1' => '', 'ind2' => '0', 'subfields' => [{ 'x' => 'Fiction' }, { 'y' => '1492' }, { 'z' => "don't ignore" }, { 'v' => 'split genre' }] } } | ||
@sample_marc = MARC::Record.new_from_hash('fields' => [@s600, @s630]) | ||
@subjects = process_subject_topic_facet(@sample_marc, '600|*0|abcdfklmnopqrtvxyz:630|*0|adfgklmnoprstvxyz') | ||
end | ||
|
||
it 'trims punctuation' do | ||
expect(@subjects).to include('John') | ||
end | ||
|
||
it 'includes subjects split along v, x, y and z' do | ||
expect(@subjects).to include('Join Title 2015') | ||
expect(@subjects).to include('1492') | ||
expect(@subjects).to include('split genre') | ||
expect(@subjects).to include('Fiction') | ||
expect(@subjects).to include("don't ignore") | ||
end | ||
end | ||
end |