|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# Configuration |
| 4 | +DEFAULT_OUTPUT_DIR="." |
| 5 | +VERBOSITY=0 # 0=normal, 1=verbose, 2=debug |
| 6 | + |
| 7 | +# Initialize variables |
| 8 | +DSN="" |
| 9 | +DATE="" |
| 10 | +OUTPUT_DIR="" |
| 11 | +FORMATTED_DATE="" |
| 12 | + |
| 13 | +# Logging functions |
| 14 | +log() { |
| 15 | + local level="$1" |
| 16 | + local message="$2" |
| 17 | + |
| 18 | + case "$level" in |
| 19 | + DEBUG) |
| 20 | + [[ $VERBOSITY -ge 2 ]] && echo "[DEBUG] $message" >&2 |
| 21 | + ;; |
| 22 | + INFO) |
| 23 | + [[ $VERBOSITY -ge 1 ]] && echo "[INFO] $message" >&2 |
| 24 | + ;; |
| 25 | + ERROR) |
| 26 | + echo "[ERROR] $message" >&2 |
| 27 | + ;; |
| 28 | + esac |
| 29 | +} |
| 30 | + |
| 31 | +# Show help information |
| 32 | +show_help() { |
| 33 | + cat << EOF |
| 34 | +Usage: $0 [OPTIONS] YYYYMMDD |
| 35 | +
|
| 36 | +Download Databend system data for the specified date. |
| 37 | +
|
| 38 | +OPTIONS: |
| 39 | + --dsn DSN Database connection string (overrides BENDSQL_DSN env var) |
| 40 | + --output_dir PATH Output directory (default: current directory) |
| 41 | + -v Verbose output |
| 42 | + -vv Debug output (very verbose) |
| 43 | + -h, --help Show this help message |
| 44 | +
|
| 45 | +ARGUMENTS: |
| 46 | + YYYYMMDD Date in YYYYMMDD format (e.g., 20250701) |
| 47 | +
|
| 48 | +ENVIRONMENT VARIABLES: |
| 49 | + BENDSQL_DSN Default database connection string |
| 50 | +
|
| 51 | +EXAMPLES: |
| 52 | + # Use environment variable for DSN |
| 53 | + export BENDSQL_DSN="http://username:password@localhost:8000/database?sslmode=enable" |
| 54 | + $0 20250701 |
| 55 | +
|
| 56 | + # Override DSN with command line |
| 57 | + $0 --dsn "http://username:password@localhost:8000/database?sslmode=enable" 20250701 |
| 58 | +
|
| 59 | + # Specify custom output directory |
| 60 | + $0 --output_dir /tmp/databend_export 20250701 |
| 61 | +
|
| 62 | + # Enable verbose logging |
| 63 | + $0 -v 20250701 |
| 64 | +EOF |
| 65 | +} |
| 66 | + |
| 67 | +# Convert YYYYMMDD to YYYY-MM-DD |
| 68 | +format_date() { |
| 69 | + local input_date="$1" |
| 70 | + FORMATTED_DATE="${input_date:0:4}-${input_date:4:2}-${input_date:6:2}" |
| 71 | + log DEBUG "Formatted date: $FORMATTED_DATE" |
| 72 | +} |
| 73 | + |
| 74 | +parse_arguments() { |
| 75 | + while [[ $# -gt 0 ]]; do |
| 76 | + case "$1" in |
| 77 | + -h|--help) |
| 78 | + show_help |
| 79 | + exit 0 |
| 80 | + ;; |
| 81 | + --dsn) |
| 82 | + DSN="$2" |
| 83 | + shift 2 |
| 84 | + ;; |
| 85 | + --output_dir) |
| 86 | + OUTPUT_DIR="$2" |
| 87 | + shift 2 |
| 88 | + ;; |
| 89 | + -v) |
| 90 | + VERBOSITY=1 |
| 91 | + shift |
| 92 | + ;; |
| 93 | + -vv) |
| 94 | + VERBOSITY=2 |
| 95 | + shift |
| 96 | + ;; |
| 97 | + *) |
| 98 | + if [[ -z "$DATE" ]]; then |
| 99 | + DATE="$1" |
| 100 | + shift |
| 101 | + else |
| 102 | + log ERROR "Unexpected argument: $1" |
| 103 | + echo "Use -h or --help for usage information." >&2 |
| 104 | + exit 1 |
| 105 | + fi |
| 106 | + ;; |
| 107 | + esac |
| 108 | + done |
| 109 | + |
| 110 | + # Set DSN from environment variable if not provided via command line |
| 111 | + if [[ -z "$DSN" && -n "$BENDSQL_DSN" ]]; then |
| 112 | + DSN="$BENDSQL_DSN" |
| 113 | + log DEBUG "Using DSN from environment variable BENDSQL_DSN" |
| 114 | + fi |
| 115 | + |
| 116 | + OUTPUT_DIR="${OUTPUT_DIR:-$DEFAULT_OUTPUT_DIR}" |
| 117 | + |
| 118 | + if [[ -n "$DATE" ]]; then |
| 119 | + format_date "$DATE" |
| 120 | + fi |
| 121 | +} |
| 122 | + |
| 123 | +validate_arguments() { |
| 124 | + if [[ -z "$DATE" ]]; then |
| 125 | + log ERROR "Missing required date parameter" |
| 126 | + echo "Use -h or --help for usage information." >&2 |
| 127 | + exit 1 |
| 128 | + fi |
| 129 | + |
| 130 | + if [[ ! "$DATE" =~ ^[0-9]{8}$ ]]; then |
| 131 | + log ERROR "Invalid date format: $DATE (expected YYYYMMDD)" |
| 132 | + echo "Use -h or --help for usage information." >&2 |
| 133 | + exit 1 |
| 134 | + fi |
| 135 | + |
| 136 | + if [[ -z "$DSN" ]]; then |
| 137 | + log ERROR "No DSN provided. Set BENDSQL_DSN environment variable or use --dsn option." |
| 138 | + echo "Use -h or --help for usage information." >&2 |
| 139 | + exit 1 |
| 140 | + fi |
| 141 | +} |
| 142 | + |
| 143 | +build_base_command() { |
| 144 | + BASE_CMD="bendsql" |
| 145 | + [[ -n "$DSN" ]] && BASE_CMD+=" --dsn \"$DSN\"" |
| 146 | + log INFO "Using command: $BASE_CMD" |
| 147 | +} |
| 148 | + |
| 149 | +execute_query() { |
| 150 | + local sql="$1" |
| 151 | + log DEBUG "Executing: ${sql:0:60}..." |
| 152 | + |
| 153 | + eval "$BASE_CMD --quote-style never --query=\"$sql\"" |
| 154 | + local retval=$? |
| 155 | + |
| 156 | + [[ $retval -ne 0 ]] && { log ERROR "Query failed"; exit $retval; } |
| 157 | +} |
| 158 | + |
| 159 | +download_file() { |
| 160 | + local filename="$1" |
| 161 | + local download_dir="$2" |
| 162 | + |
| 163 | + log INFO "Processing file: $filename" |
| 164 | + |
| 165 | + presign_result=$(execute_query "PRESIGN DOWNLOAD @a5c7667401c0c728c2ef9703bdaea66d9ae2d906/$filename") |
| 166 | + presign_url=$(echo "$presign_result" | awk '{print $3}') |
| 167 | + |
| 168 | + [[ -z "$presign_url" ]] && { log ERROR "Empty presigned URL"; return 1; } |
| 169 | + [[ "$presign_url" =~ ^https?:// ]] || { log ERROR "Invalid URL format"; return 1; } |
| 170 | + |
| 171 | + log DEBUG "Downloading from: $presign_url" |
| 172 | + if curl -L -s -S -f -o "$download_dir/$filename" "$presign_url"; then |
| 173 | + log INFO "Downloaded: $filename ($(du -h "$download_dir/$filename" | cut -f1))" |
| 174 | + return 0 |
| 175 | + else |
| 176 | + log ERROR "Download failed" |
| 177 | + rm -f "$download_dir/$filename" 2>/dev/null |
| 178 | + return 1 |
| 179 | + fi |
| 180 | +} |
| 181 | + |
| 182 | +create_archive() { |
| 183 | + local date_str="$1" |
| 184 | + shift |
| 185 | + local dirs=("$@") |
| 186 | + local deleted_files=0 |
| 187 | + local temp_log=$(mktemp) |
| 188 | + |
| 189 | + local archive_name="data_${date_str}.tar.gz" |
| 190 | + |
| 191 | + local missing_dirs=() |
| 192 | + for dir in "${dirs[@]}"; do |
| 193 | + if [[ ! -d "$dir" ]]; then |
| 194 | + missing_dirs+=("$dir") |
| 195 | + elif [[ ! -r "$dir" ]]; then |
| 196 | + log WARN "[WARN] Directory exists but not readable: $dir" |
| 197 | + fi |
| 198 | + done |
| 199 | + |
| 200 | + if [[ ${#missing_dirs[@]} -gt 0 ]]; then |
| 201 | + log ERROR "[ERROR] Missing directories: ${missing_dirs[*]}" |
| 202 | + return 1 |
| 203 | + fi |
| 204 | + |
| 205 | + local file_list |
| 206 | + file_list=$(mktemp) |
| 207 | + find "${dirs[@]}" -type f -print > "$file_list" 2>/dev/null |
| 208 | + |
| 209 | + log INFO "[INFO] Creating archive $archive_name from ${#dirs[@]} directories..." |
| 210 | + if ! tar -czf "$archive_name" --files-from="$file_list" 2>>"$temp_log"; then |
| 211 | + log ERROR "[ERROR] Create archive failure, for details:" |
| 212 | + cat "$temp_log" >&2 |
| 213 | + rm -f "$file_list" "$temp_log" "$archive_name" |
| 214 | + return 1 |
| 215 | + fi |
| 216 | + |
| 217 | + if ! tar -tzf "$archive_name" &>/dev/null; then |
| 218 | + log ERROR "[ERROR] Create archive failure" |
| 219 | + rm -f "$archive_name" |
| 220 | + return 1 |
| 221 | + fi |
| 222 | + |
| 223 | + log INFO "[INFO] Clean temp files..." |
| 224 | + while IFS= read -r file; do |
| 225 | + if rm -f "$file" 2>>"$temp_log"; then |
| 226 | + ((deleted_files++)) |
| 227 | + else |
| 228 | + log WARN "[WARN] remove file failure: $file" |
| 229 | + fi |
| 230 | + done < "$file_list" |
| 231 | + |
| 232 | + rm -f "$file_list" "$temp_log" |
| 233 | + return 0 |
| 234 | +} |
| 235 | + |
| 236 | +extract_first_column() { |
| 237 | + sed 's/[[:space:]]\{1,\}/\t/g' | \ |
| 238 | + sed -e 's/^[[:space:]]*//' \ |
| 239 | + -e 's/[[:space:]]*$//' \ |
| 240 | + -e 's/"//g' | \ |
| 241 | + awk -F '\t' 'NF>0 {print $1}' |
| 242 | +} |
| 243 | + |
| 244 | +main() { |
| 245 | + parse_arguments "$@" |
| 246 | + validate_arguments |
| 247 | + build_base_command |
| 248 | + |
| 249 | + mkdir -p "$OUTPUT_DIR" |
| 250 | + log INFO "Output directory: $OUTPUT_DIR" |
| 251 | + |
| 252 | + execute_query "DROP STAGE IF EXISTS a5c7667401c0c728c2ef9703bdaea66d9ae2d906;" |
| 253 | + execute_query "CREATE STAGE a5c7667401c0c728c2ef9703bdaea66d9ae2d906;" |
| 254 | + |
| 255 | + log INFO "Fetch columns info..." |
| 256 | + execute_query "COPY INTO @a5c7667401c0c728c2ef9703bdaea66d9ae2d906 FROM system.columns;" |
| 257 | + |
| 258 | + file_list=$(execute_query "list @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;" | awk '{print $1}') |
| 259 | + [[ -z "$file_list" ]] && { log ERROR "No files found"; exit 1; } |
| 260 | + |
| 261 | + total=0 |
| 262 | + success=0 |
| 263 | + |
| 264 | + mkdir -p "$OUTPUT_DIR/columns" |
| 265 | + while IFS= read -r filename; do |
| 266 | + ((total++)) |
| 267 | + download_file "$filename" "$OUTPUT_DIR/columns" && ((success++)) |
| 268 | + done <<< "$file_list" |
| 269 | + |
| 270 | + log INFO "Fetch Databend user functions..." |
| 271 | + execute_query "REMOVE @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;" |
| 272 | + |
| 273 | + execute_query "COPY INTO @a5c7667401c0c728c2ef9703bdaea66d9ae2d906 FROM (SELECT * FROM system.user_functions);" |
| 274 | + |
| 275 | + file_list=$(execute_query "list @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;" | awk '{print $1}') |
| 276 | + [[ -z "$file_list" ]] && { log ERROR "No files found"; exit 1; } |
| 277 | + |
| 278 | + mkdir -p "$OUTPUT_DIR/user_functions" |
| 279 | + while IFS= read -r filename; do |
| 280 | + ((total++)) |
| 281 | + download_file "$filename" "$OUTPUT_DIR/user_functions" && ((success++)) |
| 282 | + done <<< "$file_list" |
| 283 | + |
| 284 | + log INFO "Fetch Databend query logs..." |
| 285 | + execute_query "REMOVE @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;" |
| 286 | + |
| 287 | + execute_query "COPY INTO @a5c7667401c0c728c2ef9703bdaea66d9ae2d906 FROM (SELECT * FROM system_history.query_history WHERE event_date = '$FORMATTED_DATE');" |
| 288 | + |
| 289 | + file_list=$(execute_query "list @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;" | awk '{print $1}') |
| 290 | + [[ -z "$file_list" ]] && { log ERROR "No files found"; exit 1; } |
| 291 | + |
| 292 | + mkdir -p "$OUTPUT_DIR/query_logs" |
| 293 | + while IFS= read -r filename; do |
| 294 | + ((total++)) |
| 295 | + download_file "$filename" "$OUTPUT_DIR/query_logs" && ((success++)) |
| 296 | + done <<< "$file_list" |
| 297 | + |
| 298 | + log INFO "Fetch Databend query raw logs..." |
| 299 | + execute_query "REMOVE @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;" |
| 300 | + |
| 301 | + execute_query "COPY INTO @a5c7667401c0c728c2ef9703bdaea66d9ae2d906 FROM (SELECT * FROM system_history.log_history WHERE to_date(timestamp) = '$FORMATTED_DATE');" |
| 302 | + |
| 303 | + file_list=$(execute_query "list @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;" | awk '{print $1}') |
| 304 | + [[ -z "$file_list" ]] && { log ERROR "No files found"; exit 1; } |
| 305 | + |
| 306 | + mkdir -p "$OUTPUT_DIR/query_raw_logs" |
| 307 | + while IFS= read -r filename; do |
| 308 | + ((total++)) |
| 309 | + download_file "$filename" "$OUTPUT_DIR/query_raw_logs" && ((success++)) |
| 310 | + done <<< "$file_list" |
| 311 | + |
| 312 | + log INFO "Fetch Databend query profile logs..." |
| 313 | + execute_query "REMOVE @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;" |
| 314 | + |
| 315 | + execute_query "COPY INTO @a5c7667401c0c728c2ef9703bdaea66d9ae2d906 FROM (SELECT * FROM system_history.profile_history WHERE to_date(timestamp) = '$FORMATTED_DATE');" |
| 316 | + |
| 317 | + file_list=$(execute_query "list @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;" | awk '{print $1}') |
| 318 | + [[ -z "$file_list" ]] && { log ERROR "No files found"; exit 1; } |
| 319 | + |
| 320 | + mkdir -p "$OUTPUT_DIR/query_profile_logs" |
| 321 | + while IFS= read -r filename; do |
| 322 | + ((total++)) |
| 323 | + download_file "$filename" "$OUTPUT_DIR/query_profile_logs" && ((success++)) |
| 324 | + done <<< "$file_list" |
| 325 | + |
| 326 | + echo "Summary:" |
| 327 | + echo "Files processed: $total" |
| 328 | + echo "Successfully downloaded: $success" |
| 329 | + echo "Failed: $((total - success))" |
| 330 | + |
| 331 | + if [[ $success -gt 0 ]]; then |
| 332 | + if create_archive "$FORMATTED_DATE" "$OUTPUT_DIR/columns" "$OUTPUT_DIR/user_functions" "$OUTPUT_DIR/query_logs" "$OUTPUT_DIR/query_raw_logs" "$OUTPUT_DIR/query_profile_logs" ; then |
| 333 | + echo "Operation completed successfully" |
| 334 | + exit 0 |
| 335 | + else |
| 336 | + exit 1 |
| 337 | + fi |
| 338 | + else |
| 339 | + exit 1 |
| 340 | + fi |
| 341 | +} |
| 342 | + |
| 343 | +main "$@" |
0 commit comments