|
| 1 | +package org.example; |
| 2 | + |
| 3 | +import org.apache.commons.lang3.StringUtils; |
| 4 | +import org.apache.spark.sql.SaveMode; |
| 5 | +import org.apache.spark.sql.SparkSession; |
| 6 | +import org.apache.spark.sql.functions; |
| 7 | + |
| 8 | +import java.util.HashMap; |
| 9 | +import java.util.Map; |
| 10 | +import java.util.Properties; |
| 11 | + |
| 12 | +public class ImportCustomersWithRentalsAndPayments { |
| 13 | + |
| 14 | + public static void main(String[] args) { |
| 15 | + // The MarkLogic admin password is assumed to be "admin" per the docker-compose.yml file. This is purely for |
| 16 | + // demonstrational purposes and should never be used in a real application. |
| 17 | + final String markLogicAdminPassword = "admin"; |
| 18 | + |
| 19 | + // Create a vanilla local Spark session. |
| 20 | + SparkSession session = SparkSession.builder() |
| 21 | + .master("local[*]") |
| 22 | + .getOrCreate(); |
| 23 | + |
| 24 | + Map<String, String> jdbcOptions = new HashMap<String, String>() {{ |
| 25 | + put("driver", "org.postgresql.Driver"); |
| 26 | + put("url", "jdbc:postgresql://localhost/dvdrental"); |
| 27 | + put("user", "postgres"); |
| 28 | + put("password", "postgres"); |
| 29 | + }}; |
| 30 | + |
| 31 | + String query = |
| 32 | + "select c.customer_id, c.last_name, r.rental_id, r.rental_date, p.payment_id, p.amount " + |
| 33 | + "from customer c " + |
| 34 | + "inner join rental r on c.customer_id = r.customer_id " + |
| 35 | + "inner join payment p on r.rental_id = p.rental_id " + |
| 36 | + "where (c.customer_id >= 180 and c.customer_id < 190) "; |
| 37 | + |
| 38 | + session |
| 39 | + .read() |
| 40 | + // Use Spark's built-in JDBC support to read rows from Postgres. |
| 41 | + .format("jdbc").options(jdbcOptions) |
| 42 | + .option("query", query) |
| 43 | + .load() |
| 44 | + |
| 45 | + .groupBy("rental_id") |
| 46 | + .agg( |
| 47 | + functions.first("customer_id").alias("customer_id"), |
| 48 | + functions.first("last_name").alias("last_name"), |
| 49 | + functions.collect_list(functions.struct("payment_id","amount")).alias("payments") |
| 50 | + ) |
| 51 | + .groupBy("customer_id") |
| 52 | + .agg( |
| 53 | + functions.first("last_name").alias("last_name"), |
| 54 | + functions.collect_list(functions.struct("rental_id","payments")).alias("Rentals") |
| 55 | + ) |
| 56 | + |
| 57 | + |
| 58 | + // The remaining calls use the MarkLogic Spark connector to write customer rows, with nested rentals and |
| 59 | + // sub-nested payments, to the Documents database in MarkLogic. |
| 60 | + .write() |
| 61 | + .format("com.marklogic.spark") |
| 62 | + .option("spark.marklogic.client.host", "localhost") |
| 63 | + .option("spark.marklogic.client.port", "8000") |
| 64 | + .option("spark.marklogic.client.username", "admin") |
| 65 | + .option("spark.marklogic.client.password", markLogicAdminPassword) |
| 66 | + .option("spark.marklogic.write.uriTemplate", "/customerWithDoubleNesting/{customer_id}.json") |
| 67 | + .option("spark.marklogic.write.collections", "CustomerRentalPayments") |
| 68 | + .option("spark.marklogic.write.permissions", "rest-reader,read,rest-writer,update") |
| 69 | + .mode(SaveMode.Append) |
| 70 | + .save(); |
| 71 | + } |
| 72 | +} |
0 commit comments