diff --git a/docs/getting-started/example-datasets/nyc-taxi.md b/docs/getting-started/example-datasets/nyc-taxi.md index 60b5175c7f7..7119e087ada 100644 --- a/docs/getting-started/example-datasets/nyc-taxi.md +++ b/docs/getting-started/example-datasets/nyc-taxi.md @@ -10,17 +10,30 @@ title: 'New York Taxi Data' import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -The New York taxi data consists of 3+ billion taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009. The dataset can be obtained in a couple of ways: +The New York taxi data sample consists of 3+ billion taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009. This getting started guide uses a 3m row sample. + +The full dataset can be obtained in a couple of ways: - insert the data directly into ClickHouse Cloud from S3 or GCS - download prepared partitions +- Alternatively users can query the full dataset in our demo environment at [sql.clickhouse.com](https://sql.clickhouse.com/?query=U0VMRUNUIGNvdW50KCkgRlJPTSBueWNfdGF4aS50cmlwcw&chart=eyJ0eXBlIjoibGluZSIsImNvbmZpZyI6eyJ0aXRsZSI6IlRlbXBlcmF0dXJlIGJ5IGNvdW50cnkgYW5kIHllYXIiLCJ4YXhpcyI6InllYXIiLCJ5YXhpcyI6ImNvdW50KCkiLCJzZXJpZXMiOiJDQVNUKHBhc3Nlbmdlcl9jb3VudCwgJ1N0cmluZycpIn19). + + +:::note +The example queries below were executed on a **Production** instance of ClickHouse Cloud. For more information see +["Playground specifications"](/getting-started/playground#specifications). +::: + ## Create the table trips {#create-the-table-trips} Start by creating a table for the taxi rides: ```sql -CREATE TABLE trips ( + +CREATE DATABASE nyc_taxi; + +CREATE TABLE nyc_taxi.trips_small ( trip_id UInt32, pickup_datetime DateTime, dropoff_datetime DateTime, @@ -45,7 +58,7 @@ PRIMARY KEY (pickup_datetime, dropoff_datetime); ## Load the Data directly from Object Storage {#load-the-data-directly-from-object-storage} -Let's grab a small subset of the data for getting familiar with it. The data is in TSV files in object storage, which is easily streamed into +Users' can grab a small subset of the data (3 million rows) for getting familiar with it. The data is in TSV files in object storage, which is easily streamed into ClickHouse Cloud using the `s3` table function. The same data is stored in both S3 and GCS; choose either tab. @@ -56,7 +69,7 @@ The same data is stored in both S3 and GCS; choose either tab. The following command streams three files from a GCS bucket into the `trips` table (the `{0..2}` syntax is a wildcard for the values 0, 1, and 2): ```sql -INSERT INTO trips +INSERT INTO nyc_taxi.trips_small SELECT trip_id, pickup_datetime, @@ -84,10 +97,10 @@ FROM gcs( -The following command streams three files from an S3 bucket into the `trips` table (the `{0..2}` syntax is a wildcard for the values 0, 1, and 2): +The following command streams three files from an S3 bucket into the `trips_small` table (the `{0..2}` syntax is a wildcard for the values 0, 1, and 2): ```sql -INSERT INTO trips +INSERT INTO nyc_taxi.trips_small SELECT trip_id, pickup_datetime, @@ -117,122 +130,59 @@ FROM s3( ## Sample Queries {#sample-queries} +The following queries are executed on the sample described above. Users can run the sample queries on the full dataset in [sql.clickhouse.com](https://sql.clickhouse.com/?query=U0VMRUNUIGNvdW50KCkgRlJPTSBueWNfdGF4aS50cmlwcw&chart=eyJ0eXBlIjoibGluZSIsImNvbmZpZyI6eyJ0aXRsZSI6IlRlbXBlcmF0dXJlIGJ5IGNvdW50cnkgYW5kIHllYXIiLCJ4YXhpcyI6InllYXIiLCJ5YXhpcyI6ImNvdW50KCkiLCJzZXJpZXMiOiJDQVNUKHBhc3Nlbmdlcl9jb3VudCwgJ1N0cmluZycpIn19), modifying the queries below to use the table `nyc_taxi.trips`. + Let's see how many rows were inserted: -```sql +```sql runnable SELECT count() -FROM trips; +FROM nyc_taxi.trips_small; ``` Each TSV file has about 1M rows, and the three files have 3,000,317 rows. Let's look at a few rows: -```sql +```sql runnable SELECT * -FROM trips +FROM nyc_taxi.trips_small LIMIT 10; ``` -Notice there are columns for the pickup and dropoff dates, geo coordinates, fare details, New York neighborhoods, and more: - -```response -┌────trip_id─┬─────pickup_datetime─┬────dropoff_datetime─┬───pickup_longitude─┬────pickup_latitude─┬──dropoff_longitude─┬───dropoff_latitude─┬─passenger_count─┬─trip_distance─┬─fare_amount─┬─extra─┬─tip_amount─┬─tolls_amount─┬─total_amount─┬─payment_type─┬─pickup_ntaname─────────────────────────────┬─dropoff_ntaname────────────────────────────┐ -│ 1200864931 │ 2015-07-01 00:00:13 │ 2015-07-01 00:14:41 │ -73.99046325683594 │ 40.746116638183594 │ -73.97918701171875 │ 40.78467559814453 │ 5 │ 3.54 │ 13.5 │ 0.5 │ 1 │ 0 │ 15.8 │ CSH │ Midtown-Midtown South │ Upper West Side │ -│ 1200018648 │ 2015-07-01 00:00:16 │ 2015-07-01 00:02:57 │ -73.78358459472656 │ 40.648677825927734 │ -73.80242919921875 │ 40.64767837524414 │ 1 │ 1.45 │ 6 │ 0.5 │ 0 │ 0 │ 7.3 │ CRE │ Airport │ Airport │ -│ 1201452450 │ 2015-07-01 00:00:20 │ 2015-07-01 00:11:07 │ -73.98579406738281 │ 40.72777557373047 │ -74.00482177734375 │ 40.73748779296875 │ 5 │ 1.56 │ 8.5 │ 0.5 │ 1.96 │ 0 │ 11.76 │ CSH │ East Village │ West Village │ -│ 1202368372 │ 2015-07-01 00:00:40 │ 2015-07-01 00:05:46 │ -74.00206756591797 │ 40.73833084106445 │ -74.00658416748047 │ 40.74875259399414 │ 2 │ 1 │ 6 │ 0.5 │ 0 │ 0 │ 7.3 │ CRE │ West Village │ Hudson Yards-Chelsea-Flatiron-Union Square │ -│ 1200831168 │ 2015-07-01 00:01:06 │ 2015-07-01 00:09:23 │ -73.98748016357422 │ 40.74344253540039 │ -74.00575256347656 │ 40.716793060302734 │ 1 │ 2.3 │ 9 │ 0.5 │ 2 │ 0 │ 12.3 │ CSH │ Hudson Yards-Chelsea-Flatiron-Union Square │ SoHo-TriBeCa-Civic Center-Little Italy │ -│ 1201362116 │ 2015-07-01 00:01:07 │ 2015-07-01 00:03:31 │ -73.9926986694336 │ 40.75826644897461 │ -73.98628997802734 │ 40.76075744628906 │ 1 │ 0.6 │ 4 │ 0.5 │ 0 │ 0 │ 5.3 │ CRE │ Clinton │ Midtown-Midtown South │ -│ 1200639419 │ 2015-07-01 00:01:13 │ 2015-07-01 00:03:56 │ -74.00382995605469 │ 40.741981506347656 │ -73.99711608886719 │ 40.742271423339844 │ 1 │ 0.49 │ 4 │ 0.5 │ 0 │ 0 │ 5.3 │ CRE │ Hudson Yards-Chelsea-Flatiron-Union Square │ Hudson Yards-Chelsea-Flatiron-Union Square │ -│ 1201181622 │ 2015-07-01 00:01:17 │ 2015-07-01 00:05:12 │ -73.9512710571289 │ 40.78261947631836 │ -73.95230865478516 │ 40.77476119995117 │ 4 │ 0.97 │ 5 │ 0.5 │ 1 │ 0 │ 7.3 │ CSH │ Upper East Side-Carnegie Hill │ Yorkville │ -│ 1200978273 │ 2015-07-01 00:01:28 │ 2015-07-01 00:09:46 │ -74.00822448730469 │ 40.72113037109375 │ -74.00422668457031 │ 40.70782470703125 │ 1 │ 1.71 │ 8.5 │ 0.5 │ 1.96 │ 0 │ 11.76 │ CSH │ SoHo-TriBeCa-Civic Center-Little Italy │ Battery Park City-Lower Manhattan │ -│ 1203283366 │ 2015-07-01 00:01:47 │ 2015-07-01 00:24:26 │ -73.98199462890625 │ 40.77289962768555 │ -73.91968536376953 │ 40.766082763671875 │ 3 │ 5.26 │ 19.5 │ 0.5 │ 5.2 │ 0 │ 26 │ CSH │ Lincoln Square │ Astoria │ -└────────────┴─────────────────────┴─────────────────────┴────────────────────┴────────────────────┴────────────────────┴────────────────────┴─────────────────┴───────────────┴─────────────┴───────┴────────────┴──────────────┴──────────────┴──────────────┴────────────────────────────────────────────┴────────────────────────────────────────────┘ -``` +Notice there are columns for the pickup and dropoff dates, geo coordinates, fare details, New York neighborhoods, and more. + Let's run a few queries. This query shows us the top 10 neighborhoods that have the most frequent pickups: -```sql +```sql runnable SELECT pickup_ntaname, count(*) AS count -FROM trips +FROM nyc_taxi.trips_small WHERE pickup_ntaname != '' GROUP BY pickup_ntaname ORDER BY count DESC LIMIT 10; ``` -The result is: - -```response -┌─pickup_ntaname─────────────────────────────┬──count─┐ -│ Midtown-Midtown South │ 526864 │ -│ Hudson Yards-Chelsea-Flatiron-Union Square │ 288797 │ -│ West Village │ 210436 │ -│ Turtle Bay-East Midtown │ 197111 │ -│ Upper East Side-Carnegie Hill │ 184327 │ -│ Airport │ 151343 │ -│ SoHo-TriBeCa-Civic Center-Little Italy │ 144967 │ -│ Murray Hill-Kips Bay │ 138599 │ -│ Upper West Side │ 135469 │ -│ Clinton │ 130002 │ -└────────────────────────────────────────────┴────────┘ -``` - This query shows the average fare based on the number of passengers: -```sql +```sql runnable view='chart' chart_config='eyJ0eXBlIjoiYmFyIiwiY29uZmlnIjp7InhheGlzIjoicGFzc2VuZ2VyX2NvdW50IiwieWF4aXMiOiJhdmcodG90YWxfYW1vdW50KSIsInRpdGxlIjoiQXZlcmFnZSBmYXJlIGJ5IHBhc3NlbmdlciBjb3VudCJ9fQ' SELECT passenger_count, avg(total_amount) -FROM trips +FROM nyc_taxi.trips_small +WHERE passenger_count < 10 GROUP BY passenger_count; ``` -```response -┌─passenger_count─┬──avg(total_amount)─┐ -│ 0 │ 25.226335263065018 │ -│ 1 │ 15.961279340656672 │ -│ 2 │ 17.146174183960667 │ -│ 3 │ 17.65380033178517 │ -│ 4 │ 17.248804201047456 │ -│ 5 │ 16.353501285179135 │ -│ 6 │ 15.995094439202836 │ -│ 7 │ 62.077143805367605 │ -│ 8 │ 26.120000791549682 │ -│ 9 │ 10.300000190734863 │ -└─────────────────┴────────────────────┘ -``` - Here's a correlation between the number of passengers and the distance of the trip: -```sql +```sql runnable chart_config='eyJ0eXBlIjoiaG9yaXpvbnRhbCBiYXIiLCJjb25maWciOnsidGl0bGUiOiJEaXN0YW5jZSBieSBwYXNzZW5nZXIgY291bnQiLCJ4YXhpcyI6InBhc3Nlbmdlcl9jb3VudCIsInlheGlzIjoiZGlzdGFuY2UiLCJ6YXhpcyI6ImNvdW50KCkifX0' SELECT passenger_count, - toYear(pickup_datetime) AS year, round(trip_distance) AS distance, count(*) -FROM trips -GROUP BY passenger_count, year, distance -ORDER BY year, count(*) DESC; -``` - -The first part of the result is: - -```response -┌─passenger_count─┬─year─┬─distance─┬─count()─┐ -│ 1 │ 2015 │ 1 │ 748644 │ -│ 1 │ 2015 │ 2 │ 521602 │ -│ 1 │ 2015 │ 3 │ 225077 │ -│ 2 │ 2015 │ 1 │ 144990 │ -│ 1 │ 2015 │ 4 │ 134782 │ -│ 1 │ 2015 │ 0 │ 127284 │ -│ 2 │ 2015 │ 2 │ 106411 │ -│ 1 │ 2015 │ 5 │ 72725 │ -│ 5 │ 2015 │ 1 │ 59343 │ -│ 1 │ 2015 │ 6 │ 53447 │ -│ 2 │ 2015 │ 3 │ 48019 │ -│ 3 │ 2015 │ 1 │ 44865 │ -│ 6 │ 2015 │ 1 │ 39409 │ +FROM nyc_taxi.trips_small +GROUP BY passenger_count, distance +ORDER BY passenger_count ASC, count(*) DESC; ``` ## Download of Prepared Partitions {#download-of-prepared-partitions} diff --git a/sidebars.js b/sidebars.js index cddf0cd477e..433e388608c 100644 --- a/sidebars.js +++ b/sidebars.js @@ -196,6 +196,7 @@ const sidebars = { collapsible: true, link: { type: "doc", id: "getting-started/index" }, items: [ + "getting-started/example-datasets/nyc-taxi", "getting-started/example-datasets/amazon-reviews", "getting-started/example-datasets/amplab-benchmark", "getting-started/example-datasets/brown-benchmark", @@ -209,7 +210,6 @@ const sidebars = { "getting-started/example-datasets/menus", "getting-started/example-datasets/metrica", "getting-started/example-datasets/noaa", - "getting-started/example-datasets/nyc-taxi", "getting-started/example-datasets/nypd_complaint_data", "getting-started/example-datasets/ontime", "getting-started/example-datasets/opensky", diff --git a/src/components/CodeViewer/CodeInterpreter.tsx b/src/components/CodeViewer/CodeInterpreter.tsx index c76e7fe55a9..a820679cf64 100644 --- a/src/components/CodeViewer/CodeInterpreter.tsx +++ b/src/components/CodeViewer/CodeInterpreter.tsx @@ -175,6 +175,7 @@ function CodeInterpreter({ @@ -186,6 +187,7 @@ function CodeInterpreter({ @@ -195,15 +197,15 @@ function CodeInterpreter({ ) return ( -
+
{show_results} {chart && ( -
+
{ setCurrentView(DefaultView.Table) }} @@ -230,14 +232,14 @@ function CodeInterpreter({ return (
-
{hideTableResultButton()}
-
- {show_statistics && results?.response?.statistics && ( -
- {`${runBy()} Read ${formatReadableRows(results.response.statistics.rows_read)} rows and ${formatBytes(results.response.statistics.bytes_read)} in ${roundToDynamicPrecision(results.response.statistics.elapsed)} seconds`} +
{hideTableResultButton()}
+
+ {show_statistics && results?.response?.statistics && ( +
+ {`Read ${formatReadableRows(results.response.statistics.rows_read)} rows and ${formatBytes(results.response.statistics.bytes_read)} in ${roundToDynamicPrecision(results.response.statistics.elapsed)} secs`} +
+ )}
- )} -
diff --git a/src/components/CodeViewer/index.tsx b/src/components/CodeViewer/index.tsx index 51b00245553..fd1d25c7ed8 100644 --- a/src/components/CodeViewer/index.tsx +++ b/src/components/CodeViewer/index.tsx @@ -1,4 +1,4 @@ -import { CodeBlock, ClickUIProvider, Text, Separator } from '@clickhouse/click-ui/bundled' +import { CodeBlock, ClickUIProvider, Text } from '@clickhouse/click-ui/bundled' import CodeInterpreter from './CodeInterpreter' import { DefaultView } from './CodeResults' import { ChartConfig, ChartType } from './types'