From ec46097f36a0471518a90cb096ee5d99a10a28c0 Mon Sep 17 00:00:00 2001 From: Justin Coyne Date: Wed, 13 Aug 2025 12:02:59 -0500 Subject: [PATCH] Stop writing legacy links from content files --- app/services/versioned_files_service.rb | 1 - .../stacks_link_action.rb | 59 ----- spec/requests/v1/publish_dro_spec.rb | 6 +- .../stacks_link_action_spec.rb | 210 ------------------ spec/services/versioned_files_service_spec.rb | 14 +- 5 files changed, 4 insertions(+), 286 deletions(-) delete mode 100644 app/services/versioned_files_service/stacks_link_action.rb delete mode 100644 spec/services/versioned_files_service/stacks_link_action_spec.rb diff --git a/app/services/versioned_files_service.rb b/app/services/versioned_files_service.rb index f0e78964..9338288b 100644 --- a/app/services/versioned_files_service.rb +++ b/app/services/versioned_files_service.rb @@ -30,7 +30,6 @@ def versioned_files? def update(version:, version_metadata:, cocina:, file_transfers: {}) VersionedFilesService::Lock.with_lock(@object) do UpdateAction.new(version:, version_metadata:, cocina:, file_transfers:, object: @object).call - StacksLinkAction.new(version:, object: @object).call end end diff --git a/app/services/versioned_files_service/stacks_link_action.rb b/app/services/versioned_files_service/stacks_link_action.rb deleted file mode 100644 index ed683d68..00000000 --- a/app/services/versioned_files_service/stacks_link_action.rb +++ /dev/null @@ -1,59 +0,0 @@ -class VersionedFilesService - # Creates symlinks in the Stacks filesystem for the given object. - class StacksLinkAction - # @param object [VersionedFilesService::Object] the object - # @param version [String] the version number - def initialize(object:, version:) - @version = version - @object = object - end - - def call - FileUtils.mkdir_p(stacks_object_path) - shelve_file_map.each do |filename, md5| - file_path = stacks_object_path.join(filename) - - if filename == druid || filename.start_with?("#{druid}/") || !file_path.to_s.starts_with?(stacks_object_path.to_s) - Honeybadger.notify("Skipping #{filename} because it would conflict with the versioned object directory or is otherwise outside the object directory") - - next - end - - LinkSupport.link(content_path_for(md5:), file_path) - end - recursive_cleanup(stacks_object_path) - end - - private - - attr_reader :cocina, :version - - delegate :stacks_object_path, :content_path_for, :cocina_path_for, :object_path, :druid, to: :@object - - def shelve_file_map - return {} if version.nil? - - @shelve_file_map ||= Cocina.new(hash: cocina_hash).shelve_file_map - end - - def cocina_hash - JSON.parse(cocina_path_for(version:).read).to_h - end - - def recursive_cleanup(path) - if path.directory? - return if path == object_path - - path.children.each do |child| - recursive_cleanup(child) - end - path.rmdir if path.empty? && path != stacks_object_path - else - relative_path = path.relative_path_from(stacks_object_path).to_s - return if shelve_file_map.key?(relative_path) - - path.unlink - end - end - end -end diff --git a/spec/requests/v1/publish_dro_spec.rb b/spec/requests/v1/publish_dro_spec.rb index 53aba63d..9de5f749 100644 --- a/spec/requests/v1/publish_dro_spec.rb +++ b/spec/requests/v1/publish_dro_spec.rb @@ -90,7 +90,6 @@ end context 'when the object does not already exist' do - # rubocop:disable RSpec/ExpectActual it 'creates the resource' do put "/v1/purls/#{druid}", params: request, @@ -98,10 +97,9 @@ expect(response).to be_created expect(File).to exist('tmp/stacks/bc/123/df/4567/bc123df4567/versions/cocina.1.json') expect(File).to exist('tmp/stacks/bc/123/df/4567/bc123df4567/versions/cocina.json') - expect('tmp/stacks/bc/123/df/4567/bc123df4567/content/3e25960a79dbc69b674cd4ec67a72c62').to link_to('tmp/stacks/bc/123/df/4567/file2.txt') - expect('tmp/stacks/bc/123/df/4567/bc123df4567/content/5997de4d5abb55f21f652aa61b8f3aaf').to link_to('tmp/stacks/bc/123/df/4567/files/file2.txt') + expect(File).to exist('tmp/stacks/bc/123/df/4567/bc123df4567/content/3e25960a79dbc69b674cd4ec67a72c62') + expect(File).to exist('tmp/stacks/bc/123/df/4567/bc123df4567/content/5997de4d5abb55f21f652aa61b8f3aaf') end - # rubocop:enable RSpec/ExpectActual end context 'when the object already exists' do diff --git a/spec/services/versioned_files_service/stacks_link_action_spec.rb b/spec/services/versioned_files_service/stacks_link_action_spec.rb deleted file mode 100644 index ee5d3020..00000000 --- a/spec/services/versioned_files_service/stacks_link_action_spec.rb +++ /dev/null @@ -1,210 +0,0 @@ -# frozen_string_literal: true - -require 'rails_helper' - -RSpec.describe VersionedFilesService::StacksLinkAction do - describe '#recursive_cleanup' do - let(:action) { described_class.new(version: 1, object:) } - - let(:druid) { 'druid:bc123df4567' } - let(:object) { instance_double(VersionedFilesService::Object, stacks_object_path: stacks_object_pathname, object_path: object_pathname) } - - let(:dro) { build(:dro_with_metadata, id: druid).new(structural:, access: { view: 'world', download: 'world' }) } - - let(:structural) do - Cocina::Models::DROStructural.new( - contains: [ - Cocina::Models::FileSet.new( - externalIdentifier: 'bc123df4567_2', - type: Cocina::Models::FileSetType.file, - label: 'text file', - version: 1, - structural: Cocina::Models::FileSetStructural.new( - contains: [ - Cocina::Models::File.new( - externalIdentifier: '1234', - type: Cocina::Models::ObjectType.file, - label: 'the regular file', - filename: 'file2.txt', - version: 1, - hasMessageDigests: [ - { type: 'md5', digest: '3e25960a79dbc69b674cd4ec67a72c62' } - ], - administrative: { - publish: true, - shelve: true - } - ), - Cocina::Models::File.new( - externalIdentifier: '1234', - type: Cocina::Models::ObjectType.file, - label: 'the hierarchical file', - filename: 'files/file2.txt', - version: 1, - hasMessageDigests: [ - { type: 'md5', digest: '5997de4d5abb55f21f652aa61b8f3aaf' } - ], - administrative: { - publish: true, - shelve: true - } - ) - ] - ) - ) - ] - ) - end - - let(:stacks_object_path) { 'tmp/purl_doc_cache/bc/123/df/4567' } - let(:stacks_object_pathname) { Pathname.new(stacks_object_path) } - let(:object_path) { "#{stacks_object_path}/bc123df4567" } - let(:object_pathname) { Pathname.new(object_path) } - - before do - FileUtils.mkdir_p(object_path) - # These are the expected files. - FileUtils.touch("#{stacks_object_path}/file2.txt") - FileUtils.mkdir_p("#{stacks_object_path}/files") - FileUtils.touch("#{stacks_object_path}/files/file2.txt") - # These are the extra files. - FileUtils.touch("#{stacks_object_path}/file1.txt") - FileUtils.mkdir_p("#{stacks_object_path}/files2") - FileUtils.touch("#{stacks_object_path}/files2/file1.txt") - FileUtils.mkdir_p("#{stacks_object_path}/files3") - - allow(action).to receive(:shelve_file_map).and_return({ - 'file2.txt' => '3e25960a79dbc69b674cd4ec67a72c62', - 'files/file2.txt' => '5997de4d5abb55f21f652aa61b8f3aaf' - }) - end - - after do - FileUtils.rm_rf(stacks_object_path) - end - - it 'deletes extra files and directories' do - action.send(:recursive_cleanup, stacks_object_pathname) - - expect(File.exist?("#{stacks_object_path}/file2.txt")).to be true - expect(File.exist?("#{stacks_object_path}/files/file2.txt")).to be true - - expect(File.exist?("#{stacks_object_path}/file1.txt")).to be false - expect(Dir.exist?("#{stacks_object_path}/files2")).to be false - expect(File.exist?("#{stacks_object_path}/files2/file1.txt")).to be false - expect(Dir.exist?("#{stacks_object_path}/files3")).to be false - - expect(Dir.exist?(object_path)).to be true - end - end - - describe '#call' do - let(:action) { described_class.new(version: 1, object:) } - - let(:druid) { "druid:#{bare_druid}" } - let(:bare_druid) { 'bc123df4567' } - let(:object) do - instance_double(VersionedFilesService::Object, druid: bare_druid, stacks_object_path: stacks_object_pathname, object_path: object_pathname, content_path_for: Pathname.new(dummy_file_path)) - end - - let(:dro) { build(:dro_with_metadata, id: druid).new(structural:, access: { view: 'world', download: 'world' }) } - - let(:structural) do - Cocina::Models::DROStructural.new( - contains: [ - Cocina::Models::FileSet.new( - externalIdentifier: 'bc123df4567_2', - type: Cocina::Models::FileSetType.file, - label: 'text file', - version: 1, - structural: Cocina::Models::FileSetStructural.new( - contains: [ - Cocina::Models::File.new( - externalIdentifier: '1234', - type: Cocina::Models::ObjectType.file, - label: 'the regular file', - filename: 'file2.txt', - version: 1, - hasMessageDigests: [ - { type: 'md5', digest: '3e25960a79dbc69b674cd4ec67a72c62' } - ], - administrative: { - publish: true, - shelve: true - } - ), - Cocina::Models::File.new( - externalIdentifier: '1234', - type: Cocina::Models::ObjectType.file, - label: 'the hierarchical file', - filename: 'files/file2.txt', - version: 1, - hasMessageDigests: [ - { type: 'md5', digest: '5997de4d5abb55f21f652aa61b8f3aaf' } - ], - administrative: { - publish: true, - shelve: true - } - ) - ] - ) - ) - ] - ) - end - - let(:stacks_object_path) { 'tmp/purl_doc_cache/bc/123/df/4567' } - let(:stacks_object_pathname) { Pathname.new(stacks_object_path) } - let(:object_path) { "#{stacks_object_path}/bc123df4567" } - let(:object_pathname) { Pathname.new(object_path) } - let(:dummy_file_path) { "#{object_pathname}/contents/dummy_file.txt" } - - before do - FileUtils.mkdir_p(object_path) - FileUtils.mkdir_p("#{object_pathname}/contents") - - FileUtils.touch(dummy_file_path) - - allow(action).to receive(:shelve_file_map).and_return({ - 'file2.txt' => '3e25960a79dbc69b674cd4ec67a72c62', - 'files/file2.txt' => '5997de4d5abb55f21f652aa61b8f3aaf', - 'bc123df4567/invalid.json' => '3e25960a79dbc69b674cd4ec67a72c62', - 'bc123df4567_1/valid.json' => '3e25960a79dbc69b674cd4ec67a72c62', - 'bc123df4567' => '3e25960a79dbc69b674cd4ec67a72c62', - 'bc123df4567_2' => '3e25960a79dbc69b674cd4ec67a72c62', - '/tmp/abc' => '3e25960a79dbc69b674cd4ec67a72c62' - }) - end - - after do - FileUtils.rm_rf(stacks_object_path) - FileUtils.rm_rf('/tmp/abc') - end - - it 'does not create files that conflict with the AWFL layout' do - allow(Honeybadger).to receive(:notify) - - action.call - - expect(File.exist?("#{stacks_object_path}/file2.txt")).to be true - expect(File.exist?("#{stacks_object_path}/files/file2.txt")).to be true - expect(File.exist?("#{stacks_object_path}/bc123df4567_1/valid.json")).to be true - expect(File.exist?("#{stacks_object_path}/bc123df4567_2")).to be true - - expect(File.exist?("#{stacks_object_path}/bc123df4567/invalid.json")).to be false - - expect(Honeybadger).to have_received(:notify).with(%r{Skipping bc123df4567/invalid.json}) - end - - it 'does not create files that cause directory traversal' do - allow(Honeybadger).to receive(:notify) - - action.call - - expect(File.exist?("/tmp/abc")).to be false - - expect(Honeybadger).to have_received(:notify).with(%r{Skipping /tmp/abc}) - end - end -end diff --git a/spec/services/versioned_files_service_spec.rb b/spec/services/versioned_files_service_spec.rb index b25bc2b8..9b5dd195 100644 --- a/spec/services/versioned_files_service_spec.rb +++ b/spec/services/versioned_files_service_spec.rb @@ -198,13 +198,9 @@ head: 1 ) - # Symlinks to stacks filesystem - expect("#{stacks_object_path}/file2.txt").to link_to("#{content_path}/3e25960a79dbc69b674cd4ec67a72c62") - expect("#{stacks_object_path}/files/file2.txt").to link_to("#{content_path}/5997de4d5abb55f21f652aa61b8f3aaf") - # Hardlinks to globus filesystem - expect(File).to exist("#{globus_object_path}/file2.txt") - expect(File).to exist("#{globus_object_path}/files/file2.txt") + expect("#{content_path}/3e25960a79dbc69b674cd4ec67a72c62").to link_to "#{globus_object_path}/file2.txt" + expect("#{content_path}/5997de4d5abb55f21f652aa61b8f3aaf").to link_to "#{globus_object_path}/files/file2.txt" end end @@ -379,12 +375,6 @@ }, head: 2 ) - - # Symlinks to stacks filesystem - expect(File.exist?("#{stacks_object_path}/file1.txt")).to be false - expect("#{stacks_object_path}/file2.txt").to link_to("#{content_path}/4f35960a79dbc69b674cd4ec67a72d73") - expect("#{stacks_object_path}/files/file2.txt").to link_to("#{content_path}/5997de4d5abb55f21f652aa61b8f3aaf") - expect("#{stacks_object_path}/file3.txt").to link_to("#{content_path}/6007de4d5abb55f21f652aa61b8f3bbg") end end