Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ gem "bootsnap", require: false
# Use Active Storage variants [https://guides.rubyonrails.org/active_storage_overview.html#transforming-images]
# gem "image_processing", "~> 1.2"

gem 'mechanize'

group :development, :test do
# See https://guides.rubyonrails.org/debugging_rails_applications.html#debugging-with-the-debug-gem
gem "debug", platforms: %i[ mri mswin mswin64 mingw x64_mingw ]
Expand Down
28 changes: 28 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,13 @@ GEM
irb (~> 1.10)
reline (>= 0.3.8)
docile (1.4.1)
domain_name (0.6.20240107)
drb (2.2.1)
erubi (1.13.0)
globalid (1.2.1)
activesupport (>= 6.1)
http-cookie (1.0.7)
domain_name (~> 0.5)
i18n (1.14.6)
concurrent-ruby (~> 1.0)
importmap-rails (2.0.1)
Expand Down Expand Up @@ -153,10 +156,30 @@ GEM
net-smtp
marcel (1.0.4)
matrix (0.4.2)
mechanize (2.12.2)
addressable (~> 2.8)
base64
domain_name (~> 0.5, >= 0.5.20190701)
http-cookie (~> 1.0, >= 1.0.3)
mime-types (~> 3.3)
net-http-digest_auth (~> 1.4, >= 1.4.1)
net-http-persistent (>= 2.5.2, < 5.0.dev)
nkf
nokogiri (~> 1.11, >= 1.11.2)
rubyntlm (~> 0.6, >= 0.6.3)
webrick (~> 1.7)
webrobots (~> 0.1.2)
mime-types (3.6.0)
logger
mime-types-data (~> 3.2015)
mime-types-data (3.2024.1105)
mini_mime (1.1.5)
minitest (5.25.1)
msgpack (1.7.2)
mutex_m (0.2.0)
net-http-digest_auth (1.4.1)
net-http-persistent (4.0.4)
connection_pool (~> 2.2)
net-imap (0.4.14)
date
net-protocol
Expand All @@ -167,6 +190,7 @@ GEM
net-smtp (0.5.0)
net-protocol
nio4r (2.7.3)
nkf (0.2.0)
nokogiri (1.16.7-aarch64-linux)
racc (~> 1.4)
nokogiri (1.16.7-arm-linux)
Expand Down Expand Up @@ -249,6 +273,8 @@ GEM
rubocop-ast (1.32.0)
parser (>= 3.3.1.0)
ruby-progressbar (1.13.0)
rubyntlm (0.6.5)
base64
rubyzip (2.3.2)
selenium-webdriver (4.23.0)
base64 (~> 0.2)
Expand Down Expand Up @@ -293,6 +319,7 @@ GEM
bindex (>= 0.4.0)
railties (>= 6.0.0)
webrick (1.8.2)
webrobots (0.1.2)
websocket (1.2.11)
websocket-driver (0.7.6)
websocket-extensions (>= 0.1.0)
Expand All @@ -318,6 +345,7 @@ DEPENDENCIES
debug
importmap-rails
jbuilder
mechanize
pg
puma (>= 5.0)
rails (~> 7.1.3, >= 7.1.3.4)
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,9 @@ From `heroku run bash`
Running bash on ⬢ blade-ruby-lang... up, run.7782
~ $ ./bin/rails runner import.rb --list ruby-list --from 1001 --to 2000
```

From web

```
~ $ ./bin/rails runner import_from_web.rb --list ruby-list --from 1001 --to 2000
```
10 changes: 10 additions & 0 deletions app/models/message.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
BLADE_BUCKET_NAME = 'blade.ruby-lang.org'

require 'kconv'
require 'mechanize'

class Message < ApplicationRecord
# Not really sure we will utlize this configuration,
Expand All @@ -19,6 +20,15 @@ def self.from_s3(list_name, list_seq, s3_client = Aws::S3::Client.new(region: BL
m
end

def self.from_web(list_name, list_seq, web_client = Mechanize.new)
obj = web_client.get("https://blade.ruby-lang.org/#{list_name}/#{list_seq}")

m = self.from_string(obj.body)
m.list_id = List.find_by_name(list_name).id
m.list_seq = list_seq
m
end

def self.from_string(str)
# There are a few hacks to import messages from blade.ruby-lang.org's
# S3 bucket.
Expand Down
24 changes: 24 additions & 0 deletions import_from_web.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
require 'optparse'

params = {}
OptionParser.new do |opts|
opts.on('--list LIST')
opts.on('--from FROM', Integer)
opts.on('--to TO', Integer)
end.parse!(into: params)

list = params[:list]

(params[:from]..params[:to]).each do |seq|
begin
message = Message.from_web(list, seq)
message.save
rescue ActiveRecord::RecordNotUnique
STDERR.puts("#{list}:#{seq} already exists in Postgres")
rescue Aws::S3::Errors::NoSuchKey
STDERR.puts("#{list}:#{seq} doesn't exist in Web")
rescue StandardError => e
STDERR.puts("failed to import #{list}:#{seq}: #{e}")
end
sleep 1
end