-
Notifications
You must be signed in to change notification settings - Fork 41
SRCH-5154 Bulk delete zombie records from Elastic Search #1743
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 10 commits
18a9d2f
7714aba
f597ccf
b58dcdb
502a5fb
a3bc228
8c00b6f
8497dde
139cfb4
7b4929d
5b1923a
d894950
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,40 +1,34 @@ | ||
# frozen_string_literal: true | ||
|
||
module Admin | ||
class BulkAffiliateStylesUploadController < AdminController | ||
def index | ||
@page_title = 'Bulk Affiliate Styles Upload' | ||
end | ||
|
||
def upload | ||
begin | ||
@file = params[:bulk_upload_affiliate_styles] | ||
BulkAffiliateStyles::FileValidator.new(@file).validate! | ||
enqueue_job | ||
flash[:success] = success_message(@file.original_filename) | ||
rescue BulkAffiliateStylesUploader::Error => e | ||
Rails.logger.error e | ||
flash[:error] = e.message | ||
end | ||
class Admin::BulkAffiliateStylesUploadController < Admin::BulkUploadController | ||
def upload | ||
handle_bulk_upload( | ||
params_key: :bulk_upload_affiliate_styles, | ||
validator_class: BulkAffiliateStyles::FileValidator, | ||
error_class: BulkAffiliateStylesUploader::Error, | ||
success_path: admin_bulk_affiliate_styles_upload_index_path, | ||
logger_message: 'Bulk Affiliate Styles upload failed' | ||
) | ||
end | ||
|
||
redirect_to admin_bulk_affiliate_styles_upload_index_path | ||
end | ||
private | ||
|
||
private | ||
def set_page_title | ||
@page_title = 'Bulk Affiliate Styles Upload' | ||
end | ||
|
||
def success_message(filename) | ||
<<~SUCCESS_MESSAGE | ||
Successfully uploaded #{filename} for processing. | ||
The results will be emailed to you. | ||
SUCCESS_MESSAGE | ||
end | ||
def success_message(filename) | ||
<<~SUCCESS_MESSAGE | ||
Successfully uploaded #{filename} for processing. | ||
The results will be emailed to you. | ||
SUCCESS_MESSAGE | ||
end | ||
|
||
def enqueue_job | ||
BulkAffiliateStylesUploaderJob.perform_later( | ||
current_user, | ||
@file.original_filename, | ||
@file.tempfile.path | ||
) | ||
end | ||
def enqueue_job | ||
BulkAffiliateStylesUploaderJob.perform_later( | ||
current_user, | ||
@file.original_filename, | ||
@file.tempfile.path | ||
) | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# frozen_string_literal: true | ||
|
||
class Admin::BulkUploadController < Admin::AdminController | ||
include BulkUploadHandler | ||
before_action :set_page_title | ||
|
||
def index; end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# frozen_string_literal: true | ||
|
||
class Admin::BulkZombieUrlUploadController < Admin::BulkUploadController | ||
def upload | ||
handle_bulk_upload( | ||
params_key: :bulk_upload_zombie_urls, | ||
validator_class: BulkZombieUrls::FileValidator, | ||
error_class: BulkZombieUrlUploader::Error, | ||
success_path: admin_bulk_zombie_url_upload_index_path, | ||
logger_message: 'Zombie Url upload failed' | ||
) | ||
end | ||
|
||
private | ||
|
||
def set_page_title | ||
@page_title = 'Bulk Zombie Url Upload' | ||
end | ||
|
||
def success_message(filename) | ||
<<~SUCCESS_MESSAGE | ||
Successfully uploaded #{filename} for processing. | ||
The results will be emailed to you. | ||
SUCCESS_MESSAGE | ||
end | ||
|
||
def enqueue_job | ||
BulkZombieUrlUploaderJob.perform_later( | ||
current_user, | ||
@file.original_filename, | ||
@file.tempfile.path | ||
) | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# frozen_string_literal: true | ||
|
||
module BulkUploadHandler | ||
def handle_bulk_upload(options) | ||
krbhavith marked this conversation as resolved.
Show resolved
Hide resolved
|
||
@file = params[options[:params_key]] | ||
validate_file(@file, options[:validator_class]) | ||
perform_upload(@file) | ||
rescue options[:error_class] => e | ||
handle_upload_error(e, options) | ||
ensure | ||
redirect_to options[:success_path] | ||
end | ||
|
||
private | ||
|
||
def validate_file(file, validator_class) | ||
validator_class.new(file).validate! | ||
end | ||
|
||
def perform_upload(file) | ||
enqueue_job | ||
flash[:success] = success_message(file.original_filename) | ||
end | ||
|
||
def handle_upload_error(error, options) | ||
krbhavith marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Rails.logger.error options[:logger_message], error | ||
flash[:error] = error.message | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# frozen_string_literal: true | ||
|
||
class BulkZombieUrlUploaderJob < ApplicationJob | ||
queue_as :searchgov | ||
|
||
def perform(user, filename, filepath) | ||
@user = user | ||
@filename = filename | ||
@uploader = BulkZombieUrlUploader.new(filename, filepath) | ||
@uploader.upload | ||
report_results | ||
end | ||
|
||
private | ||
|
||
def report_results | ||
log_results | ||
send_results_email | ||
end | ||
|
||
def log_results | ||
results = @uploader.results | ||
Rails.logger.info(BulkZombieUrlUploaderJob: results.file_name, total_urls: results.total_count, errors: results.error_count) | ||
end | ||
|
||
def send_results_email | ||
results = @uploader.results | ||
email = BulkZombieUrlUploadResultsMailer.with(user: @user, results:).results_email | ||
email.deliver_now! | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# frozen_string_literal: true | ||
|
||
class BulkZombieUrlUploadResultsMailer < ApplicationMailer | ||
def results_email | ||
@results = params[:results] | ||
mail(to: params[:user].email, subject: "Bulk Zombie URL upload results for #{@results.file_name}") | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
# frozen_string_literal: true | ||
|
||
class BulkZombieUrlUploader | ||
attr_reader :results | ||
|
||
class Error < StandardError; end | ||
|
||
def initialize(filename, filepath) | ||
@file_name = filename | ||
@file_path = filepath | ||
@results = nil | ||
end | ||
|
||
def upload | ||
initialize_results | ||
process_upload | ||
rescue StandardError => e | ||
log_upload_error(e) | ||
ensure | ||
@results ||= BulkZombieUrls::Results.new(@file_name) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line is redundant since it was already initialized in the |
||
end | ||
|
||
private | ||
|
||
def initialize_results | ||
@results = BulkZombieUrls::Results.new(@file_name) | ||
raise Error, 'Results object not initialized' unless @results | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need to raise an error in here. Maybe it can fail if the file doesn't exist but if that's the case the error should say that. |
||
end | ||
|
||
def process_upload | ||
CSV.parse(File.read(@file_path), headers: true).each { |row| process_row(row) } | ||
end | ||
|
||
def process_row(row) | ||
raise Error, 'Results object not initialized' unless @results | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is no need to raise |
||
|
||
url = row['URL']&.strip | ||
document_id = row['DOC_ID']&.strip | ||
|
||
return log_missing_document_id(row, url) if document_id.blank? | ||
|
||
handle_url_processing(url, document_id, row) | ||
end | ||
|
||
def handle_url_processing(url, document_id, row) | ||
process_url(url, document_id) | ||
update_results | ||
rescue StandardError => e | ||
handle_processing_error(e, url, document_id, row) | ||
end | ||
|
||
def update_results | ||
@results.delete_ok | ||
@results.increment_updated | ||
end | ||
|
||
def log_missing_document_id(row, url) | ||
@results.add_error('Document ID is missing', url || 'Unknown') | ||
Rails.logger.error("Skipping row: #{row.inspect}. Document ID is mandatory.") | ||
end | ||
|
||
def log_upload_error(error) | ||
error_message = "Failed to process bulk zombie URL document (file: #{@file_name})." | ||
Rails.logger.error(error_message, error) | ||
krbhavith marked this conversation as resolved.
Show resolved
Hide resolved
|
||
end | ||
|
||
def handle_processing_error(error, url, document_id, row) | ||
key = url.presence || document_id | ||
@results&.add_error(error.message, key) | ||
Rails.logger.error('Failure to process bulk upload zombie URL row:', error) | ||
krbhavith marked this conversation as resolved.
Show resolved
Hide resolved
|
||
end | ||
|
||
def process_url(url, document_id) | ||
if url.present? | ||
process_url_with_searchgov(url, document_id) | ||
else | ||
delete_document(document_id) | ||
end | ||
end | ||
|
||
def process_url_with_searchgov(url, document_id) | ||
searchgov_url = SearchgovUrl.find_by(url:) | ||
searchgov_url ? searchgov_url.destroy : delete_document(document_id) | ||
krbhavith marked this conversation as resolved.
Show resolved
Hide resolved
|
||
end | ||
|
||
def delete_document(document_id) | ||
I14yDocument.delete(handle: 'searchgov', document_id:) | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# frozen_string_literal: true | ||
|
||
class BulkZombieUrls::FileValidator | ||
MAXIMUM_FILE_SIZE = 4.megabytes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where this this restriction comes from? I don't see it as part of the ticket. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not the requirement from the ticket, but I am following this as per the previous other upload processes. |
||
VALID_CONTENT_TYPES = %w[text/csv].freeze | ||
|
||
def initialize(uploaded_file) | ||
@uploaded_file = uploaded_file | ||
end | ||
|
||
def validate! | ||
ensure_present | ||
ensure_valid_content_type | ||
ensure_not_too_big | ||
end | ||
|
||
private | ||
|
||
def ensure_valid_content_type | ||
return if VALID_CONTENT_TYPES.include?(@uploaded_file.content_type) | ||
|
||
error_message = "Files of type #{@uploaded_file.content_type} are not supported." | ||
raise(BulkZombieUrlUploader::Error, error_message) | ||
end | ||
|
||
def ensure_present | ||
return if @uploaded_file.present? | ||
|
||
error_message = 'Please choose a file to upload.' | ||
raise(BulkZombieUrlUploader::Error, error_message) | ||
end | ||
|
||
def ensure_not_too_big | ||
return if @uploaded_file.size <= MAXIMUM_FILE_SIZE | ||
|
||
error_message = "#{@uploaded_file.original_filename} is too big; please split it." | ||
raise(BulkZombieUrlUploader::Error, error_message) | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# frozen_string_literal: true | ||
|
||
module BulkZombieUrls | ||
class Results | ||
attr_accessor :ok_count, :error_count, :file_name | ||
attr_reader :errors, :updated | ||
|
||
def initialize(filename) | ||
@file_name = filename | ||
@ok_count = 0 | ||
@error_count = 0 | ||
@updated = 0 | ||
@errors = Hash.new { |hash, key| hash[key] = [] } | ||
end | ||
|
||
def increment_updated | ||
@updated += 1 | ||
end | ||
|
||
def delete_ok | ||
@ok_count += 1 | ||
end | ||
|
||
def add_error(error_message, key) | ||
self.error_count += 1 | ||
@errors[key] << error_message | ||
end | ||
|
||
def total_count | ||
ok_count + error_count | ||
end | ||
end | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This approach won't work on staging or production. When the request comes in the tempfile gets created in the
app
servers but the workers will try to perform the upload from thecrawl
server.This file needs to be send to S3 and the job needs to read it from there.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Existing similar functionality at https://github.com/GSA/search-gov/blob/main/app/controllers/admin/bulk_url_upload_controller.rb#L33 working fine now in production
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How is the worker picking up the tempfile?