Skip to content

Commit 41705e6

Browse files
committed
Merge branch 'master' into trocco
2 parents 9f4febc + ee1b21d commit 41705e6

File tree

13 files changed

+139
-46
lines changed

13 files changed

+139
-46
lines changed

.github/workflows/check.yml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: Check
2+
on: [ pull_request, push ]
3+
jobs:
4+
check:
5+
runs-on: ubuntu-latest
6+
# push: always run.
7+
# pull_request: run only when the PR is submitted from a forked repository, not within this repository.
8+
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
9+
strategy:
10+
matrix:
11+
jruby_version:
12+
- 9.3.15.0
13+
- 9.4.8.0
14+
fail-fast: false
15+
steps:
16+
- uses: actions/checkout@v4
17+
- name: Set up OpenJDK 8
18+
uses: actions/setup-java@v4
19+
with:
20+
java-version: 8
21+
distribution: "temurin"
22+
- name: download jruby
23+
run: "curl -L -o jruby.jar https://repo1.maven.org/maven2/org/jruby/jruby-complete/${{ matrix.jruby_version }}/jruby-complete-${{ matrix.jruby_version }}.jar"
24+
#
25+
# For avoiding permission denied. install gems into `gems` directory
26+
#
27+
- name: bundle install
28+
run: "env GEM_HOME=gems java -jar jruby.jar -S bundle install"
29+
30+
- name: install embulk.jar
31+
run: "curl -L -o embulk.jar https://github.com/embulk/embulk/releases/download/v0.11.4/embulk-0.11.4.jar"
32+
- name: rake test
33+
run: 'env GEM_HOME=gems RUBYOPT="-r ./embulk.jar -r rubygems" java -jar jruby.jar -S bundle exec rake test'

.github/workflows/publish.yml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: Publish
2+
on:
3+
push:
4+
tags:
5+
- "v0.*"
6+
jobs:
7+
publish:
8+
runs-on: ubuntu-latest
9+
environment: maven-central-and-ruby-gems
10+
strategy:
11+
fail-fast: true
12+
steps:
13+
- uses: actions/checkout@v4
14+
- name: Set up Ruby
15+
uses: ruby/setup-ruby@v1
16+
with:
17+
ruby-version: 3.3.0
18+
# get tag variable using {{ github.ref_name }}
19+
#
20+
# References:
21+
# * https://docs.github.com/en/actions/learn-github-actions/contexts#github-context
22+
# * https://docs.github.com/en/actions/learn-github-actions/variables#default-environment-variables
23+
- name: extract gem version from tag
24+
id: vars
25+
run: echo version=${{ github.ref_name }} | sed -e 's/v0/0/' >> $GITHUB_OUTPUT
26+
#
27+
# From gem push documents.
28+
#
29+
# The push command will use ~/.gem/credentials to authenticate to a server,
30+
# but you can use the RubyGems environment variable GEM_HOST_API_KEY
31+
# to set the api key to authenticate.
32+
#
33+
# https://guides.rubygems.org/command-reference/#gem-push
34+
#
35+
- name: Publish
36+
run: |
37+
rake build
38+
gem push pkg/${EMBULK_PLUGIN_NAME}-${{ steps.vars.outputs.version }}.gem
39+
env:
40+
EMBULK_PLUGIN_NAME: embulk-output-bigquery
41+
GEM_HOST_API_KEY: "${{secrets.RUBYGEMS_API_KEY}}"

CHANGELOG.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
## 0.7.2 - 2024-07-21
2+
* [maintenance] Fix GitHub Actions #166
3+
* [maintenance] Fix gcs_client in order to load data using gcs_bucket parameter (Thanks to kashira202111) #164
4+
* [maintenance] Prevent creating unnecessary tables. (Thanks to kashira202111) #148
5+
6+
## 0.7.1 - 2024-03-4
7+
* [enhancement] Support description of columns and tables (Thanks to @kyoshidajp and @fagai ) #142
8+
* [maintenance] Add missing GitHub Actions environment setting. #160
9+
* [maintenance] Replace google-api-client with specific Google APIs (Thanks to @Nozomuts) #161
10+
* [maintenance] Update GitHub Actions use checkout@v4 and setup-java@v4 #162
11+
12+
## 0.7.0 - 2024-02-1
13+
* [enhancement] Add support Embulk 0.11.x
14+
115
## 0.6.9 - 2023-03-16
216
* [enhancement] Add SSLException to retry job (thanks to @mzumi)
317

Gemfile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
source 'https://rubygems.org/'
22

33
gemspec
4-
gem 'embulk', '< 0.10'
5-
gem 'liquid', '= 4.0.0' # the version included in embulk.jar
4+
gem 'embulk', '= 0.11.4'
65
gem 'embulk-parser-none'
76
gem 'embulk-parser-jsonl'
87
gem 'pry-nav'

README.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
# embulk-output-bigquery
22

3-
[![Build Status](https://secure.travis-ci.org/embulk/embulk-output-bigquery.png?branch=master)](http://travis-ci.org/embulk/embulk-output-bigquery)
4-
53
[Embulk](https://github.com/embulk/embulk/) output plugin to load/insert data into [Google BigQuery](https://cloud.google.com/bigquery/) using [direct insert](https://cloud.google.com/bigquery/loading-data-into-bigquery#loaddatapostrequest)
64

75
## Overview
@@ -14,6 +12,13 @@ https://developers.google.com/bigquery/loading-data-into-bigquery
1412
* **Cleanup supported**: no
1513
* **Dynamic table creating**: yes
1614

15+
### Supported Embulk
16+
17+
| gem version | Embulk version |
18+
|------------------|--------------------|
19+
| 0.7.0 and higher | v0.11.0 and higher |
20+
| 0.6.9 and lower | v0.9.X and lower |
21+
1722
### NOT IMPLEMENTED
1823
* insert data over streaming inserts
1924
* for continuous real-time insertions
@@ -55,6 +60,7 @@ OAuth flow for installed applications.
5560
| gcs_bucket | string | optional | nil | See [GCS Bucket](#gcs-bucket) |
5661
| auto_create_gcs_bucket | boolean | optional | false | See [GCS Bucket](#gcs-bucket) |
5762
| progress_log_interval | float | optional | nil (Disabled) | Progress log interval. The progress log is disabled by nil (default). NOTE: This option may be removed in a future because a filter plugin can achieve the same goal |
63+
| description | string | optional | nil | description of table |
5864

5965
Client or request options
6066

@@ -325,6 +331,7 @@ Column options are used to aid guessing BigQuery schema, or to define conversion
325331
- numeric: `STRING`
326332
- **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
327333
- **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
334+
- **description**: description (string, default is `None`).
328335
- **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is `default_timestamp_format`)
329336
- **timezone**: timezone to convert into/from `timestamp`, `date` (string, default is `default_timezone`).
330337
- **description**: description for the column.

embulk-output-bigquery.gemspec

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Gem::Specification.new do |spec|
22
spec.name = "embulk-output-bigquery"
3-
spec.version = "0.6.9.trocco.0.0.3"
3+
spec.version = "0.7.2"
44
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
55
spec.summary = "Google BigQuery output plugin for Embulk"
66
spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -14,18 +14,14 @@ Gem::Specification.new do |spec|
1414
spec.test_files = spec.files.grep(%r{^(test|spec)/})
1515
spec.require_paths = ["lib"]
1616

17-
# TODO
18-
# signet 0.12.0 and google-api-client 0.33.0 require >= Ruby 2.4.
19-
# Embulk 0.9 use JRuby 9.1.X.Y and it's compatible with Ruby 2.3.
20-
# So, force install signet < 0.12 and google-api-client < 0.33.0
21-
# Also, representable version >= 3.1.0 requires Ruby version >= 2.4
22-
spec.add_dependency 'signet', '~> 0.7', '< 0.12.0'
23-
spec.add_dependency 'google-api-client','< 0.33.0'
17+
# the latest version
18+
spec.add_dependency 'google-apis-storage_v1'
19+
spec.add_dependency 'google-apis-bigquery_v2'
2420
spec.add_dependency 'time_with_zone'
25-
spec.add_dependency "representable", ['~> 3.0.0', '< 3.1']
26-
# faraday 1.1.0 require >= Ruby 2.4.
27-
# googleauth 0.9.0 requires faraday ~> 0.12
28-
spec.add_dependency "faraday", '~> 0.12'
21+
spec.add_dependency 'thwait'
22+
# activesupport require Ruby >= 2.7.0
23+
# jruby-9.3.0.0 is MRI 2.6 compatible
24+
spec.add_dependency 'activesupport', "< 7.0"
2925

3026
spec.add_development_dependency 'bundler', ['>= 1.10.6']
3127
spec.add_development_dependency 'rake', ['>= 10.0']

lib/embulk/output/bigquery.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ def self.configure(config, schema, task_count)
6363
'payload_column' => config.param('payload_column', :string, :default => nil),
6464
'payload_column_index' => config.param('payload_column_index', :integer, :default => nil),
6565

66+
'description' => config.param('description', :string, :default => nil),
67+
6668
'open_timeout_sec' => config.param('open_timeout_sec', :integer, :default => nil),
6769
'timeout_sec' => config.param('timeout_sec', :integer, :default => nil), # google-api-ruby-client < v0.11.0
6870
'send_timeout_sec' => config.param('send_timeout_sec', :integer, :default => nil), # google-api-ruby-client >= v0.11.0

lib/embulk/output/bigquery/bigquery_client.rb

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def load_from_gcs(object_uris, table)
121121
opts = {}
122122

123123
Embulk.logger.debug { "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts})" }
124-
response = with_network_retry { client.insert_job(@project, body, opts) }
124+
response = with_network_retry { client.insert_job(@project, body, **opts) }
125125
unless @task['is_skip_job_result_check']
126126
response = wait_load('Load', response)
127127
end
@@ -222,7 +222,7 @@ def load(path, table, write_disposition: 'WRITE_APPEND')
222222
# },
223223
}
224224
Embulk.logger.debug { "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts})" }
225-
response = with_network_retry { client.insert_job(@project, body, opts) }
225+
response = with_network_retry { client.insert_job(@project, body, **opts) }
226226
if @task['is_skip_job_result_check']
227227
response
228228
else
@@ -278,7 +278,7 @@ def copy(source_table, destination_table, destination_dataset = nil, write_dispo
278278

279279
opts = {}
280280
Embulk.logger.debug { "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts})" }
281-
response = with_network_retry { client.insert_job(@project, body, opts) }
281+
response = with_network_retry { client.insert_job(@project, body, **opts) }
282282
wait_load('Copy', response)
283283
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
284284
response = {status_code: e.status_code, message: e.message, error_class: e.class}
@@ -372,7 +372,7 @@ def create_dataset(dataset = nil, reference: nil)
372372
end
373373
opts = {}
374374
Embulk.logger.debug { "embulk-output-bigquery: insert_dataset(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
375-
with_network_retry { client.insert_dataset(@project, body, opts) }
375+
with_network_retry { client.insert_dataset(@project, body, **opts) }
376376
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
377377
if e.status_code == 409 && /Already Exists:/ =~ e.message
378378
# ignore 'Already Exists' error
@@ -420,6 +420,7 @@ def create_table_if_not_exists(table, dataset: nil, options: nil)
420420
table_reference: {
421421
table_id: table,
422422
},
423+
description: @task['description'],
423424
schema: {
424425
fields: fields,
425426
}
@@ -446,8 +447,8 @@ def create_table_if_not_exists(table, dataset: nil, options: nil)
446447
end
447448

448449
opts = {}
449-
Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
450-
with_network_retry { client.insert_table(@project, dataset, body, opts) }
450+
Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@destination_project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
451+
with_network_retry { client.insert_table(@destination_project, dataset, body, **opts) }
451452
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
452453
if e.status_code == 409 && /Already Exists:/ =~ e.message
453454
# ignore 'Already Exists' error
@@ -456,7 +457,7 @@ def create_table_if_not_exists(table, dataset: nil, options: nil)
456457

457458
response = {status_code: e.status_code, message: e.message, error_class: e.class}
458459
Embulk.logger.error {
459-
"embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts}), response:#{response}"
460+
"embulk-output-bigquery: insert_table(#{@destination_project}, #{dataset}, #{@location_for_log}, #{body}, #{opts}), response:#{response}"
460461
}
461462
raise Error, "failed to create table #{@destination_project}:#{dataset}.#{table} in #{@location_for_log}, response:#{response}"
462463
end

lib/embulk/output/bigquery/gcs_client.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def insert_temporary_bucket(bucket = nil)
4848
opts = {}
4949

5050
Embulk.logger.debug { "embulk-output-bigquery: insert_temporary_bucket(#{@project}, #{body}, #{opts})" }
51-
with_network_retry { client.insert_bucket(@project, body, opts) }
51+
with_network_retry { client.insert_bucket(@project, body, **opts) }
5252
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
5353
if e.status_code == 409 && /conflict:/ =~ e.message
5454
# ignore 'Already Exists' error
@@ -81,7 +81,7 @@ def insert_object(path, object: nil, bucket: nil)
8181

8282
Embulk.logger.debug { "embulk-output-bigquery: insert_object(#{bucket}, #{body}, #{opts})" }
8383
# memo: gcs is strongly consistent for insert (read-after-write). ref: https://cloud.google.com/storage/docs/consistency
84-
with_network_retry { client.insert_object(bucket, body, opts) }
84+
with_network_retry { client.insert_object(bucket, body, **opts) }
8585
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
8686
response = {status_code: e.status_code, message: e.message, error_class: e.class}
8787
Embulk.logger.error {
@@ -114,7 +114,7 @@ def delete_object(object, bucket: nil)
114114
opts = {}
115115

116116
Embulk.logger.debug { "embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts})" }
117-
response = with_network_retry { client.delete_object(bucket, object, opts) }
117+
response = with_network_retry { client.delete_object(bucket, object, **opts) }
118118
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
119119
if e.status_code == 404 # ignore 'notFound' error
120120
return nil

lib/embulk/output/bigquery/helper.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,10 @@ def self.fields_from_embulk_schema(task, schema)
4646
embulk_type = column[:type]
4747
column_option = column_options_map[column_name] || {}
4848
{}.tap do |field|
49-
field[:name] = column_name
50-
field[:type] = (column_option['type'] || bq_type_from_embulk_type(embulk_type)).upcase
51-
field[:mode] = column_option['mode'] if column_option['mode']
52-
field[:fields] = deep_symbolize_keys(column_option['fields']) if column_option['fields']
49+
field[:name] = column_name
50+
field[:type] = (column_option['type'] || bq_type_from_embulk_type(embulk_type)).upcase
51+
field[:mode] = column_option['mode'] if column_option['mode']
52+
field[:fields] = deep_symbolize_keys(column_option['fields']) if column_option['fields']
5353
field[:description] = column_option['description'] if column_option['description']
5454
end
5555
end

0 commit comments

Comments
 (0)