Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Import of collections from H2. #443

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions app/services/importers/collection.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# frozen_string_literal: true

module Importers
# Imports a collection from a JSON export from H2.
class Collection
def self.call(...)
new(...).call
end

def initialize(collection_json:)
@collection_json = collection_json
end

def call
::Collection.transaction do
unless ToCollectionForm::RoundtripValidator.roundtrippable?(
collection_form:, cocina_object:
)
raise Error,
"Collection #{druid} cannot be roundtripped"
end

collection
end
end

private

attr_reader :collection_json

def collection # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
@collection ||= ::Collection.find_or_create_by!(druid:) do |collection|
collection.user = User.call(user_json: collection_json['creator'])
collection.title = CocinaSupport.title_for(cocina_object:)
collection.object_updated_at = cocina_object.modified
collection.release_option = release_option
collection.release_duration = collection_json['release_duration']
collection.access = option_for(collection_json['access'])
collection.doi_option = option_for(collection_json['doi_option'])
collection.license_option = option_for(collection_json['license_option'])
collection.license = license
collection.custom_rights_statement_option = custom_rights_statement_option
collection.provided_custom_rights_statement = collection_json['provided_custom_rights_statement']
collection.custom_rights_statement_custom_instructions = collection_json['custom_rights_statement_custom_instructions'] # rubocop:disable Layout/LineLength
collection.email_when_participants_changed = collection_json['email_when_participants_changed']
collection.email_depositors_status_changed = collection_json['email_depositors_status_changed']
collection.review_enabled = collection_json['review_enabled']
collection.depositors = users_from('depositors')
collection.reviewers = users_from('reviewed_by')
collection.managers = users_from('managed_by')
end
end

def cocina_object
@cocina_object ||= Sdr::Repository.find(druid:)
end

def collection_form
@collection_form ||= ToCollectionForm::Mapper.call(cocina_object:, collection:)
end

def option_for(option)
return 'depositor_selects' if option == 'depositor-selects'

option
end

def release_option
# There are a small number of H2 collections that have a delay release option.
# This is being removed in H3, so they are being mapped to depositor_selects.
return 'immediate' if collection_json['release_option'] == 'immediate'

'depositor_selects'
end

def license
if collection_json['license_option'] == 'required'
collection_json['required_license']
else
collection_json['default_license']
end
end

def custom_rights_statement_option
return 'none' if collection_json['allow_custom_rights_statement'] == false

if collection_json['provided_custom_rights_statement'].present?
'with_custom_rights_statement'
else
'with_instructions'
end
end

def users_from(field)
collection_json[field].map { |user_json| User.call(user_json:) }
end

def druid
collection_json['druid']
end
end
end
6 changes: 6 additions & 0 deletions app/services/importers/error.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# frozen_string_literal: true

module Importers
class Error < StandardError
end
end
25 changes: 25 additions & 0 deletions app/services/importers/user.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# frozen_string_literal: true

module Importers
# Imports a user from a JSON export from H2.
class User
def self.call(...)
new(...).call
end

def initialize(user_json:)
@user_json = user_json
end

def call
::User.find_or_create_by!(email_address: user_json['email']) do |user|
user.name = user_json['name'] || user_json['email'].delete_suffix(::User::EMAIL_SUFFIX)
user.first_name = user_json['first_name']
end
end

private

attr_reader :user_json
end
end
13 changes: 13 additions & 0 deletions app/services/roundtrip_validator_support.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# frozen_string_literal: true

# Support methods for roundtrip validation.
class RoundtripValidatorSupport
def self.normalize_cocina_object(cocina_object:)
# Remove created_at and updated_at from the original cocina object
lock = cocina_object&.lock
norm_cocina_object = Cocina::Models.without_metadata(cocina_object)
norm_cocina_object = norm_cocina_object.new(cocinaVersion: Cocina::Models::VERSION)

Cocina::Models.with_metadata(norm_cocina_object, lock)
end
end
9 changes: 3 additions & 6 deletions app/services/to_collection_form/roundtrip_validator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,9 @@ def roundtripped_cocina_object
end

def normalized_original_cocina_object
@normalized_original_cocina_object ||= begin
# Remove created_at and updated_at from the original cocina object
lock = @original_cocina_object&.lock
original_cocina_object_without_metadata = Cocina::Models.without_metadata(@original_cocina_object)
Cocina::Models.with_metadata(original_cocina_object_without_metadata, lock)
end
@normalized_original_cocina_object ||= RoundtripValidatorSupport.normalize_cocina_object(
cocina_object: @original_cocina_object
)
end
end
end
9 changes: 3 additions & 6 deletions app/services/to_work_form/roundtrip_validator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,9 @@ def roundtripped_cocina_object
end

def normalized_original_cocina_object
@normalized_original_cocina_object ||= begin
# Remove created_at and updated_at from the original cocina object
lock = @original_cocina_object&.lock
original_cocina_object_without_metadata = Cocina::Models.without_metadata(@original_cocina_object)
Cocina::Models.with_metadata(original_cocina_object_without_metadata, lock)
end
@normalized_original_cocina_object ||= RoundtripValidatorSupport.normalize_cocina_object(
cocina_object: @original_cocina_object
)
end
end
end
20 changes: 20 additions & 0 deletions lib/tasks/import.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# frozen_string_literal: true

namespace :import do
desc 'Import collections from json'
# collections.json can be generated in H2 for some set of collections with:
# collections_json = collections.map {|collection| collection.as_json(include: [:creator, :depositors,
# :reviewed_by, :managed_by])}
# File.write('collections.json', JSON.pretty_generate(collections_json))
# Importing is idempotent, so you can run this multiple times.
# It will raise an error if the collection cannot be roundtripped.
task :collections, [:filename] => :environment do |_t, args|
collections_json = JSON.parse(File.read(args[:filename] || 'collections.json'))
collections_json.each do |collection_json|
next unless collection_json['druid']

puts "Importing collection #{collection_json['druid']}"
Importers::Collection.call(collection_json:)
end
end
end
132 changes: 132 additions & 0 deletions spec/services/importers/collection_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# frozen_string_literal: true

require 'rails_helper'

RSpec.describe Importers::Collection do
include CollectionMappingFixtures

let(:druid) { collection_druid_fixture }

let(:collection_json) do
{
druid:,
release_option: 'depositor-selects',
release_duration: '6 months',
access: 'stanford',
doi_option: 'yes',
license_option: 'required',
required_license: 'CC0-1.0',
default_license: nil,
allow_custom_rights_statement: true,
provided_custom_rights_statement: nil,
custom_rights_statement_custom_instructions: 'These are the instructions',
email_when_participants_changed: true,
email_depositors_status_changed: false,
review_enabled: true,
creator: {
id: 932,
email: '[email protected]',
created_at: '2021-08-02T13:11:57.482Z',
updated_at: '2024-02-24T02:30:22.262Z',
name: 'Larry Fine',
last_work_terms_agreement: nil,
first_name: 'Larry'
},
depositors: [
{
id: 934,
email: '[email protected]',
created_at: '2021-08-02T13:11:57.491Z',
updated_at: '2021-08-02T13:11:57.491Z',
name: nil,
last_work_terms_agreement: nil,
first_name: nil
}
],
reviewed_by: [
{
id: 2721,
email: '[email protected]',
created_at: '2022-04-25T18:09:01.285Z',
updated_at: '2022-05-05T21:34:04.318Z',
name: 'Shemp Howard',
last_work_terms_agreement: '2022-05-05T21:34:04.315Z',
first_name: 'Shemp'
}
],
managed_by: [
{
id: 1523,
email: '[email protected]',
created_at: '2021-08-02T13:12:05.122Z',
updated_at: '2021-09-07T17:46:19.326Z',
name: 'Curly Howard',
last_work_terms_agreement: nil,
first_name: 'Jamie'
}
]

}.deep_stringify_keys
end

let(:cocina_object) do
Cocina::Models.with_metadata(collection_fixture, lock_fixture, modified:)
end

let(:modified) { Time.zone.iso8601('2024-12-31T14:00:00') }

before do
allow(Sdr::Repository).to receive(:find).with(druid:).and_return(cocina_object)
end

context 'when collection already exists' do
let!(:collection) { create(:collection, druid:) }

it 'does not create a new collection' do
expect { described_class.call(collection_json:) }.not_to change(Collection, :count)
end

it 'returns collection' do
expect(described_class.call(collection_json:)).to eq(collection)
end
end

context 'when collection is roundtrippable' do
it 'creates a new collection' do
expect { described_class.call(collection_json:) }.to change(Collection, :count).by(1)
end

it 'populates collection attributes' do
collection = described_class.call(collection_json:)

expect(collection.druid).to eq(druid)
expect(collection.title).to eq(collection_title_fixture)
expect(collection.object_updated_at).to eq(modified)
expect(collection.depositor_selects_release_option?).to be(true)
expect(collection.six_months_release_duration?).to be(true)
expect(collection.stanford_access?).to be(true)
expect(collection.yes_doi_option?).to be(true)
expect(collection.required_license_option?).to be(true)
expect(collection.with_instructions_custom_rights_statement_option?).to be(true)
expect(collection.custom_rights_statement_custom_instructions).to eq('These are the instructions')
expect(collection.email_when_participants_changed).to be(true)
expect(collection.email_depositors_status_changed).to be(false)
expect(collection.review_enabled).to be(true)
expect(collection.user.email_address).to eq('[email protected]')
expect(collection.depositors.first.email_address).to eq('[email protected]')
expect(collection.reviewers.first.email_address).to eq('[email protected]')
expect(collection.managers.first.email_address).to eq('[email protected]')
end
end

context 'when collection is not roundtrippable' do
let(:cocina_object) { collection_with_metadata_fixture.new(type: Cocina::Models::ObjectType.curated_collection) }

it 'raises an error and does not create a new collection' do
expect do
described_class.call(collection_json:)
end.to raise_error(Importers::Error,
"Collection #{druid} cannot be roundtripped").and not_change(Collection, :count)
end
end
end
Loading