Skip to content

Commit 215aa17

Browse files
zhang2014BohuTANG
andauthored
chore(query): add fetch_log.sh script for self host (#18248)
* chore(query): add fetch_log.sh script for self host * chore(query): add fetch_log.sh script for self host * chore(query): add fetch_log.sh script for self host * Update fetch_log.sh: add -h * Update fetch_log.sh --------- Co-authored-by: Bohu <overred.shuttler@gmail.com>
1 parent 2db11df commit 215aa17

File tree

1 file changed

+343
-0
lines changed

1 file changed

+343
-0
lines changed

scripts/selfhost/fetch_log.sh

Lines changed: 343 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,343 @@
1+
#!/bin/bash
2+
3+
# Configuration
4+
DEFAULT_OUTPUT_DIR="."
5+
VERBOSITY=0 # 0=normal, 1=verbose, 2=debug
6+
7+
# Initialize variables
8+
DSN=""
9+
DATE=""
10+
OUTPUT_DIR=""
11+
FORMATTED_DATE=""
12+
13+
# Logging functions
14+
log() {
15+
local level="$1"
16+
local message="$2"
17+
18+
case "$level" in
19+
DEBUG)
20+
[[ $VERBOSITY -ge 2 ]] && echo "[DEBUG] $message" >&2
21+
;;
22+
INFO)
23+
[[ $VERBOSITY -ge 1 ]] && echo "[INFO] $message" >&2
24+
;;
25+
ERROR)
26+
echo "[ERROR] $message" >&2
27+
;;
28+
esac
29+
}
30+
31+
# Show help information
32+
show_help() {
33+
cat << EOF
34+
Usage: $0 [OPTIONS] YYYYMMDD
35+
36+
Download Databend system data for the specified date.
37+
38+
OPTIONS:
39+
--dsn DSN Database connection string (overrides BENDSQL_DSN env var)
40+
--output_dir PATH Output directory (default: current directory)
41+
-v Verbose output
42+
-vv Debug output (very verbose)
43+
-h, --help Show this help message
44+
45+
ARGUMENTS:
46+
YYYYMMDD Date in YYYYMMDD format (e.g., 20250701)
47+
48+
ENVIRONMENT VARIABLES:
49+
BENDSQL_DSN Default database connection string
50+
51+
EXAMPLES:
52+
# Use environment variable for DSN
53+
export BENDSQL_DSN="http://username:password@localhost:8000/database?sslmode=enable"
54+
$0 20250701
55+
56+
# Override DSN with command line
57+
$0 --dsn "http://username:password@localhost:8000/database?sslmode=enable" 20250701
58+
59+
# Specify custom output directory
60+
$0 --output_dir /tmp/databend_export 20250701
61+
62+
# Enable verbose logging
63+
$0 -v 20250701
64+
EOF
65+
}
66+
67+
# Convert YYYYMMDD to YYYY-MM-DD
68+
format_date() {
69+
local input_date="$1"
70+
FORMATTED_DATE="${input_date:0:4}-${input_date:4:2}-${input_date:6:2}"
71+
log DEBUG "Formatted date: $FORMATTED_DATE"
72+
}
73+
74+
parse_arguments() {
75+
while [[ $# -gt 0 ]]; do
76+
case "$1" in
77+
-h|--help)
78+
show_help
79+
exit 0
80+
;;
81+
--dsn)
82+
DSN="$2"
83+
shift 2
84+
;;
85+
--output_dir)
86+
OUTPUT_DIR="$2"
87+
shift 2
88+
;;
89+
-v)
90+
VERBOSITY=1
91+
shift
92+
;;
93+
-vv)
94+
VERBOSITY=2
95+
shift
96+
;;
97+
*)
98+
if [[ -z "$DATE" ]]; then
99+
DATE="$1"
100+
shift
101+
else
102+
log ERROR "Unexpected argument: $1"
103+
echo "Use -h or --help for usage information." >&2
104+
exit 1
105+
fi
106+
;;
107+
esac
108+
done
109+
110+
# Set DSN from environment variable if not provided via command line
111+
if [[ -z "$DSN" && -n "$BENDSQL_DSN" ]]; then
112+
DSN="$BENDSQL_DSN"
113+
log DEBUG "Using DSN from environment variable BENDSQL_DSN"
114+
fi
115+
116+
OUTPUT_DIR="${OUTPUT_DIR:-$DEFAULT_OUTPUT_DIR}"
117+
118+
if [[ -n "$DATE" ]]; then
119+
format_date "$DATE"
120+
fi
121+
}
122+
123+
validate_arguments() {
124+
if [[ -z "$DATE" ]]; then
125+
log ERROR "Missing required date parameter"
126+
echo "Use -h or --help for usage information." >&2
127+
exit 1
128+
fi
129+
130+
if [[ ! "$DATE" =~ ^[0-9]{8}$ ]]; then
131+
log ERROR "Invalid date format: $DATE (expected YYYYMMDD)"
132+
echo "Use -h or --help for usage information." >&2
133+
exit 1
134+
fi
135+
136+
if [[ -z "$DSN" ]]; then
137+
log ERROR "No DSN provided. Set BENDSQL_DSN environment variable or use --dsn option."
138+
echo "Use -h or --help for usage information." >&2
139+
exit 1
140+
fi
141+
}
142+
143+
build_base_command() {
144+
BASE_CMD="bendsql"
145+
[[ -n "$DSN" ]] && BASE_CMD+=" --dsn \"$DSN\""
146+
log INFO "Using command: $BASE_CMD"
147+
}
148+
149+
execute_query() {
150+
local sql="$1"
151+
log DEBUG "Executing: ${sql:0:60}..."
152+
153+
eval "$BASE_CMD --quote-style never --query=\"$sql\""
154+
local retval=$?
155+
156+
[[ $retval -ne 0 ]] && { log ERROR "Query failed"; exit $retval; }
157+
}
158+
159+
download_file() {
160+
local filename="$1"
161+
local download_dir="$2"
162+
163+
log INFO "Processing file: $filename"
164+
165+
presign_result=$(execute_query "PRESIGN DOWNLOAD @a5c7667401c0c728c2ef9703bdaea66d9ae2d906/$filename")
166+
presign_url=$(echo "$presign_result" | awk '{print $3}')
167+
168+
[[ -z "$presign_url" ]] && { log ERROR "Empty presigned URL"; return 1; }
169+
[[ "$presign_url" =~ ^https?:// ]] || { log ERROR "Invalid URL format"; return 1; }
170+
171+
log DEBUG "Downloading from: $presign_url"
172+
if curl -L -s -S -f -o "$download_dir/$filename" "$presign_url"; then
173+
log INFO "Downloaded: $filename ($(du -h "$download_dir/$filename" | cut -f1))"
174+
return 0
175+
else
176+
log ERROR "Download failed"
177+
rm -f "$download_dir/$filename" 2>/dev/null
178+
return 1
179+
fi
180+
}
181+
182+
create_archive() {
183+
local date_str="$1"
184+
shift
185+
local dirs=("$@")
186+
local deleted_files=0
187+
local temp_log=$(mktemp)
188+
189+
local archive_name="data_${date_str}.tar.gz"
190+
191+
local missing_dirs=()
192+
for dir in "${dirs[@]}"; do
193+
if [[ ! -d "$dir" ]]; then
194+
missing_dirs+=("$dir")
195+
elif [[ ! -r "$dir" ]]; then
196+
log WARN "[WARN] Directory exists but not readable: $dir"
197+
fi
198+
done
199+
200+
if [[ ${#missing_dirs[@]} -gt 0 ]]; then
201+
log ERROR "[ERROR] Missing directories: ${missing_dirs[*]}"
202+
return 1
203+
fi
204+
205+
local file_list
206+
file_list=$(mktemp)
207+
find "${dirs[@]}" -type f -print > "$file_list" 2>/dev/null
208+
209+
log INFO "[INFO] Creating archive $archive_name from ${#dirs[@]} directories..."
210+
if ! tar -czf "$archive_name" --files-from="$file_list" 2>>"$temp_log"; then
211+
log ERROR "[ERROR] Create archive failure, for details:"
212+
cat "$temp_log" >&2
213+
rm -f "$file_list" "$temp_log" "$archive_name"
214+
return 1
215+
fi
216+
217+
if ! tar -tzf "$archive_name" &>/dev/null; then
218+
log ERROR "[ERROR] Create archive failure"
219+
rm -f "$archive_name"
220+
return 1
221+
fi
222+
223+
log INFO "[INFO] Clean temp files..."
224+
while IFS= read -r file; do
225+
if rm -f "$file" 2>>"$temp_log"; then
226+
((deleted_files++))
227+
else
228+
log WARN "[WARN] remove file failure: $file"
229+
fi
230+
done < "$file_list"
231+
232+
rm -f "$file_list" "$temp_log"
233+
return 0
234+
}
235+
236+
extract_first_column() {
237+
sed 's/[[:space:]]\{1,\}/\t/g' | \
238+
sed -e 's/^[[:space:]]*//' \
239+
-e 's/[[:space:]]*$//' \
240+
-e 's/"//g' | \
241+
awk -F '\t' 'NF>0 {print $1}'
242+
}
243+
244+
main() {
245+
parse_arguments "$@"
246+
validate_arguments
247+
build_base_command
248+
249+
mkdir -p "$OUTPUT_DIR"
250+
log INFO "Output directory: $OUTPUT_DIR"
251+
252+
execute_query "DROP STAGE IF EXISTS a5c7667401c0c728c2ef9703bdaea66d9ae2d906;"
253+
execute_query "CREATE STAGE a5c7667401c0c728c2ef9703bdaea66d9ae2d906;"
254+
255+
log INFO "Fetch columns info..."
256+
execute_query "COPY INTO @a5c7667401c0c728c2ef9703bdaea66d9ae2d906 FROM system.columns;"
257+
258+
file_list=$(execute_query "list @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;" | awk '{print $1}')
259+
[[ -z "$file_list" ]] && { log ERROR "No files found"; exit 1; }
260+
261+
total=0
262+
success=0
263+
264+
mkdir -p "$OUTPUT_DIR/columns"
265+
while IFS= read -r filename; do
266+
((total++))
267+
download_file "$filename" "$OUTPUT_DIR/columns" && ((success++))
268+
done <<< "$file_list"
269+
270+
log INFO "Fetch Databend user functions..."
271+
execute_query "REMOVE @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;"
272+
273+
execute_query "COPY INTO @a5c7667401c0c728c2ef9703bdaea66d9ae2d906 FROM (SELECT * FROM system.user_functions);"
274+
275+
file_list=$(execute_query "list @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;" | awk '{print $1}')
276+
[[ -z "$file_list" ]] && { log ERROR "No files found"; exit 1; }
277+
278+
mkdir -p "$OUTPUT_DIR/user_functions"
279+
while IFS= read -r filename; do
280+
((total++))
281+
download_file "$filename" "$OUTPUT_DIR/user_functions" && ((success++))
282+
done <<< "$file_list"
283+
284+
log INFO "Fetch Databend query logs..."
285+
execute_query "REMOVE @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;"
286+
287+
execute_query "COPY INTO @a5c7667401c0c728c2ef9703bdaea66d9ae2d906 FROM (SELECT * FROM system_history.query_history WHERE event_date = '$FORMATTED_DATE');"
288+
289+
file_list=$(execute_query "list @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;" | awk '{print $1}')
290+
[[ -z "$file_list" ]] && { log ERROR "No files found"; exit 1; }
291+
292+
mkdir -p "$OUTPUT_DIR/query_logs"
293+
while IFS= read -r filename; do
294+
((total++))
295+
download_file "$filename" "$OUTPUT_DIR/query_logs" && ((success++))
296+
done <<< "$file_list"
297+
298+
log INFO "Fetch Databend query raw logs..."
299+
execute_query "REMOVE @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;"
300+
301+
execute_query "COPY INTO @a5c7667401c0c728c2ef9703bdaea66d9ae2d906 FROM (SELECT * FROM system_history.log_history WHERE to_date(timestamp) = '$FORMATTED_DATE');"
302+
303+
file_list=$(execute_query "list @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;" | awk '{print $1}')
304+
[[ -z "$file_list" ]] && { log ERROR "No files found"; exit 1; }
305+
306+
mkdir -p "$OUTPUT_DIR/query_raw_logs"
307+
while IFS= read -r filename; do
308+
((total++))
309+
download_file "$filename" "$OUTPUT_DIR/query_raw_logs" && ((success++))
310+
done <<< "$file_list"
311+
312+
log INFO "Fetch Databend query profile logs..."
313+
execute_query "REMOVE @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;"
314+
315+
execute_query "COPY INTO @a5c7667401c0c728c2ef9703bdaea66d9ae2d906 FROM (SELECT * FROM system_history.profile_history WHERE to_date(timestamp) = '$FORMATTED_DATE');"
316+
317+
file_list=$(execute_query "list @a5c7667401c0c728c2ef9703bdaea66d9ae2d906;" | awk '{print $1}')
318+
[[ -z "$file_list" ]] && { log ERROR "No files found"; exit 1; }
319+
320+
mkdir -p "$OUTPUT_DIR/query_profile_logs"
321+
while IFS= read -r filename; do
322+
((total++))
323+
download_file "$filename" "$OUTPUT_DIR/query_profile_logs" && ((success++))
324+
done <<< "$file_list"
325+
326+
echo "Summary:"
327+
echo "Files processed: $total"
328+
echo "Successfully downloaded: $success"
329+
echo "Failed: $((total - success))"
330+
331+
if [[ $success -gt 0 ]]; then
332+
if create_archive "$FORMATTED_DATE" "$OUTPUT_DIR/columns" "$OUTPUT_DIR/user_functions" "$OUTPUT_DIR/query_logs" "$OUTPUT_DIR/query_raw_logs" "$OUTPUT_DIR/query_profile_logs" ; then
333+
echo "Operation completed successfully"
334+
exit 0
335+
else
336+
exit 1
337+
fi
338+
else
339+
exit 1
340+
fi
341+
}
342+
343+
main "$@"

0 commit comments

Comments
 (0)