From 19c0a40a40859e7b0b52b8e37eaef8d34208845d Mon Sep 17 00:00:00 2001 From: Charlie Sharpsteen Date: Thu, 13 Feb 2025 10:35:49 -0500 Subject: [PATCH] (PE-40043) Remove faulty index bloat statistics This commit removes index bloat estimates added in SUP-2115 from `psql_metrics`. Collection of this data can result in some amount of temporary files written to disk every 5 minutes due to the nested group and sort operations involved. The bloat values generated were also of questionable quality producing both "reasonable" numbers like 50% bloat and absolutely unusable numbers like -230% bloat. The table bloat estimate is retained as the query there is less complicated, operates directly on postgres page counts and fill percentages, and does not produce the same negative estimates that the index bloat query is generating. This partially reverts commit 7497949fe8b28f79eca1efa469f239123050ae9e. --- files/psql_metrics | 106 +-------------------------------------------- 1 file changed, 1 insertion(+), 105 deletions(-) diff --git a/files/psql_metrics b/files/psql_metrics index fe56e904..ee2fba08 100755 --- a/files/psql_metrics +++ b/files/psql_metrics @@ -540,9 +540,7 @@ SELECT json_object_agg( 'idx_tup_read', idx_tup_read, 'idx_tup_fetch', idx_tup_fetch, 'idx_blks_read', idx_blks_read, - 'idx_blks_hit', idx_blks_hit, - 'bloat_size', bloat_size, - 'bloat_percent', bloat_pct + 'idx_blks_hit', idx_blks_hit ) ) FROM @@ -553,108 +551,6 @@ JOIN pg_catalog.pg_stat_all_indexes AS s ON c.oid = s.indexrelid JOIN pg_catalog.pg_statio_all_indexes AS si ON c.oid = si.indexrelid -JOIN -( - SELECT nspname AS schemaname, tblname, idxname, bs*(relpages)::bigint AS real_size, - bs*(relpages-est_pages)::bigint AS extra_size, - 100 * (relpages-est_pages)::float / relpages AS extra_pct, - fillfactor, - idxoid, - CASE WHEN relpages > est_pages_ff - THEN bs*(relpages-est_pages_ff) - ELSE 0 - END AS bloat_size, - 100 * (relpages-est_pages_ff)::float / relpages AS bloat_pct, - is_na - -- , 100-(pst).avg_leaf_density AS pst_avg_bloat, est_pages, index_tuple_hdr_bm, maxalign, pagehdr, nulldatawidth, nulldatahdrwidth, reltuples, relpages -- (DEBUG INFO) -FROM ( - SELECT coalesce(1 + - ceil(reltuples/floor((bs-pageopqdata-pagehdr)/(4+nulldatahdrwidth)::float)), 0 -- ItemIdData size + computed avg size of a tuple (nulldatahdrwidth) - ) AS est_pages, - coalesce(1 + - ceil(reltuples/floor((bs-pageopqdata-pagehdr)*fillfactor/(100*(4+nulldatahdrwidth)::float))), 0 - ) AS est_pages_ff, - bs, nspname, tblname, idxname, idxoid, relpages, fillfactor, is_na - -- , pgstatindex(idxoid) AS pst, index_tuple_hdr_bm, maxalign, pagehdr, nulldatawidth, nulldatahdrwidth, reltuples -- (DEBUG INFO) - FROM ( - SELECT maxalign, bs, nspname, tblname, idxname, reltuples, relpages, idxoid, fillfactor, - ( index_tuple_hdr_bm + - maxalign - CASE -- Add padding to the index tuple header to align on MAXALIGN - WHEN index_tuple_hdr_bm%maxalign = 0 THEN maxalign - ELSE index_tuple_hdr_bm%maxalign - END - + nulldatawidth + maxalign - CASE -- Add padding to the data to align on MAXALIGN - WHEN nulldatawidth = 0 THEN 0 - WHEN nulldatawidth::integer%maxalign = 0 THEN maxalign - ELSE nulldatawidth::integer%maxalign - END - )::numeric AS nulldatahdrwidth, pagehdr, pageopqdata, is_na - -- , index_tuple_hdr_bm, nulldatawidth -- (DEBUG INFO) - FROM ( - SELECT n.nspname, i.tblname, i.idxname, i.reltuples, i.relpages, - i.idxoid, i.fillfactor, current_setting('block_size')::numeric AS bs, - CASE -- MAXALIGN: 4 on 32bits, 8 on 64bits (and mingw32 ?) - WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' THEN 8 - ELSE 4 - END AS maxalign, - /* per page header, fixed size: 20 for 7.X, 24 for others */ - 24 AS pagehdr, - /* per page btree opaque data */ - 16 AS pageopqdata, - /* per tuple header: add IndexAttributeBitMapData if some cols are null-able */ - CASE WHEN max(coalesce(s.null_frac,0)) = 0 - THEN 8 -- IndexTupleData size - ELSE 8 + (( 32 + 8 - 1 ) / 8) -- IndexTupleData size + IndexAttributeBitMapData size ( max num filed per index + 8 - 1 /8) - END AS index_tuple_hdr_bm, - /* data len: we remove null values save space using it fractionnal part from stats */ - sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024)) AS nulldatawidth, - max( CASE WHEN i.atttypid = 'pg_catalog.name'::regtype THEN 1 ELSE 0 END ) > 0 AS is_na - FROM ( - SELECT ct.relname AS tblname, ct.relnamespace, ic.idxname, ic.attpos, ic.indkey, ic.indkey[ic.attpos], ic.reltuples, ic.relpages, ic.tbloid, ic.idxoid, ic.fillfactor, - coalesce(a1.attnum, a2.attnum) AS attnum, coalesce(a1.attname, a2.attname) AS attname, coalesce(a1.atttypid, a2.atttypid) AS atttypid, - CASE WHEN a1.attnum IS NULL - THEN ic.idxname - ELSE ct.relname - END AS attrelname - FROM ( - SELECT idxname, reltuples, relpages, tbloid, idxoid, fillfactor, indkey, - pg_catalog.generate_series(1,indnatts) AS attpos - FROM ( - SELECT ci.relname AS idxname, ci.reltuples, ci.relpages, i.indrelid AS tbloid, - i.indexrelid AS idxoid, - coalesce(substring( - array_to_string(ci.reloptions, ' ') - from 'fillfactor=([0-9]+)')::smallint, 90) AS fillfactor, - i.indnatts, - pg_catalog.string_to_array(pg_catalog.textin( - pg_catalog.int2vectorout(i.indkey)),' ')::int[] AS indkey - FROM pg_catalog.pg_index i - JOIN pg_catalog.pg_class ci ON ci.oid = i.indexrelid - WHERE ci.relam=(SELECT oid FROM pg_am WHERE amname = 'btree') - AND ci.relpages > 0 - ) AS idx_data - ) AS ic - JOIN pg_catalog.pg_class ct ON ct.oid = ic.tbloid - LEFT JOIN pg_catalog.pg_attribute a1 ON - ic.indkey[ic.attpos] <> 0 - AND a1.attrelid = ic.tbloid - AND a1.attnum = ic.indkey[ic.attpos] - LEFT JOIN pg_catalog.pg_attribute a2 ON - ic.indkey[ic.attpos] = 0 - AND a2.attrelid = ic.idxoid - AND a2.attnum = ic.attpos - ) i - JOIN pg_catalog.pg_namespace n ON n.oid = i.relnamespace - JOIN pg_catalog.pg_stats s ON s.schemaname = n.nspname - AND s.tablename = i.attrelname - AND s.attname = i.attname - GROUP BY 1,2,3,4,5,6,7,8,9,10,11 - ) AS rows_data_stats - ) AS rows_hdr_pdg_stats -) AS relation_stats -ORDER BY nspname, tblname, idxname -) a on -a.idxoid = s.indexrelid WHERE n.nspname NOT IN ('pg_catalog', 'information_schema') AND c.relkind = 'i'