|
| 1 | +package sqlite3 |
| 2 | + |
| 3 | +// https://www.sqlite.org/fileformat.html |
| 4 | +// https://sqlite.org/schematab.html |
| 5 | + |
| 6 | +// TODO: page overflow |
| 7 | +// TODO: format version |
| 8 | +// TODO: text encoding |
| 9 | +// TODO: table/column names |
| 10 | +// TODO: assert version and schema version? |
| 11 | +// TODO: ptrmap |
| 12 | +// TODO: how to represent NULL serials |
| 13 | + |
| 14 | +// CREATE TABLE sqlite_schema( |
| 15 | +// type text, |
| 16 | +// name text, |
| 17 | +// tbl_name text, |
| 18 | +// rootpage integer, |
| 19 | +// sql text |
| 20 | +// ); |
| 21 | +// > A table with the name "sqlite_sequence" that is used to keep track of the maximum historical INTEGER PRIMARY KEY for a table using AUTOINCREMENT. |
| 22 | +// CREATE TABLE sqlite_sequence(name,seq); |
| 23 | +// > Tables with names of the form "sqlite_statN" where N is an integer. Such tables store database statistics gathered by the ANALYZE command and used by the query planner to help determine the best algorithm to use for each query. |
| 24 | +// CREATE TABLE sqlite_stat1(tbl,idx,stat); |
| 25 | +// Only if compiled with SQLITE_ENABLE_STAT2: |
| 26 | +// CREATE TABLE sqlite_stat2(tbl,idx,sampleno,sample); |
| 27 | +// Only if compiled with SQLITE_ENABLE_STAT3: |
| 28 | +// CREATE TABLE sqlite_stat3(tbl,idx,nEq,nLt,nDLt,sample); |
| 29 | +// Only if compiled with SQLITE_ENABLE_STAT4: |
| 30 | +// CREATE TABLE sqlite_stat4(tbl,idx,nEq,nLt,nDLt,sample); |
| 31 | +// TODO: sqlite_autoindex_TABLE_N index |
| 32 | + |
| 33 | +import ( |
| 34 | + "embed" |
| 35 | + |
| 36 | + "github.com/wader/fq/format" |
| 37 | + "github.com/wader/fq/format/registry" |
| 38 | + "github.com/wader/fq/internal/num" |
| 39 | + "github.com/wader/fq/pkg/decode" |
| 40 | + "github.com/wader/fq/pkg/scalar" |
| 41 | +) |
| 42 | + |
| 43 | +//go:embed *.jq |
| 44 | +var sqlite3FS embed.FS |
| 45 | + |
| 46 | +func init() { |
| 47 | + registry.MustRegister(decode.Format{ |
| 48 | + Name: format.SQLITE3, |
| 49 | + Description: "SQLite v3 database", |
| 50 | + Groups: []string{format.PROBE}, |
| 51 | + DecodeFn: sqlite3Decode, |
| 52 | + Files: sqlite3FS, |
| 53 | + }) |
| 54 | +} |
| 55 | + |
| 56 | +const ( |
| 57 | + bTreeIndexInterior = 0x02 |
| 58 | + bTreeTableInterior = 0x05 |
| 59 | + bTreeIndexLeaf = 0x0a |
| 60 | + bTreeTableLeaf = 0x0d |
| 61 | +) |
| 62 | + |
| 63 | +var bTreeTypeMap = scalar.UToScalar{ |
| 64 | + bTreeIndexInterior: scalar.S{Sym: "index_interior", Description: "Index interior b-tree page"}, |
| 65 | + bTreeTableInterior: scalar.S{Sym: "table_interior", Description: "Table interior b-tree page"}, |
| 66 | + bTreeIndexLeaf: scalar.S{Sym: "index_leaf", Description: "Index leaf b-tree page"}, |
| 67 | + bTreeTableLeaf: scalar.S{Sym: "table_leaf", Description: "Table leaf b-tree page"}, |
| 68 | +} |
| 69 | + |
| 70 | +const ( |
| 71 | + textEncodingUTF8 = 1 |
| 72 | + textEncodingUTF16LE = 2 |
| 73 | + textEncodingUTF16BE = 3 |
| 74 | +) |
| 75 | + |
| 76 | +var textEncodingMap = scalar.UToSymStr{ |
| 77 | + textEncodingUTF8: "utf8", |
| 78 | + textEncodingUTF16LE: "utf16le", |
| 79 | + textEncodingUTF16BE: "utf16be", |
| 80 | +} |
| 81 | + |
| 82 | +var versionMap = scalar.UToSymStr{ |
| 83 | + 1: "legacy", |
| 84 | + 2: "wal", |
| 85 | +} |
| 86 | + |
| 87 | +// TODO: all bits if nine bytes? |
| 88 | +// TODO: two complement on bit read count |
| 89 | +func varintDecode(d *decode.D) int64 { |
| 90 | + var n uint64 |
| 91 | + for i := 0; i < 9; i++ { |
| 92 | + v := d.U8() |
| 93 | + n = n<<7 | v&0b0111_1111 |
| 94 | + if v&0b1000_0000 == 0 { |
| 95 | + break |
| 96 | + } |
| 97 | + } |
| 98 | + return num.TwosComplement(64, n) |
| 99 | +} |
| 100 | + |
| 101 | +func sqlite3DecodeSerialType(d *decode.D, typ int64) { |
| 102 | + switch typ { |
| 103 | + case 0: |
| 104 | + d.FieldValueStr("value", "NULL", scalar.Description("null")) |
| 105 | + case 1: |
| 106 | + d.FieldS8("value", scalar.Description("8-bit integer")) |
| 107 | + case 2: |
| 108 | + d.FieldS16("value", scalar.Description("16-bit integer")) |
| 109 | + case 3: |
| 110 | + d.FieldS24("value", scalar.Description("24-bit integer")) |
| 111 | + case 4: |
| 112 | + d.FieldS32("value", scalar.Description("32-bit integer")) |
| 113 | + case 5: |
| 114 | + d.FieldS48("value", scalar.Description("48-bit integer")) |
| 115 | + case 6: |
| 116 | + d.FieldS64("value", scalar.Description("64-bit integer")) |
| 117 | + case 7: |
| 118 | + d.FieldF64("value", scalar.Description("64-bit float")) |
| 119 | + case 8: |
| 120 | + d.FieldValueU("value", 0, scalar.Description("constant 0")) |
| 121 | + case 9: |
| 122 | + d.FieldValueU("value", 1, scalar.Description("constant 1")) |
| 123 | + case 10, 11: |
| 124 | + default: |
| 125 | + if typ%2 == 0 { |
| 126 | + // N => 12 and even: (N-12)/2 bytes blob. |
| 127 | + d.FieldRawLen("value", (typ-12)/2*8, scalar.Description("blob")) |
| 128 | + } else { |
| 129 | + // N => 13 and odd: (N-13)/2 bytes text |
| 130 | + d.FieldUTF8("value", int(typ-13)/2, scalar.Description("text")) |
| 131 | + } |
| 132 | + } |
| 133 | +} |
| 134 | + |
| 135 | +func sqlite3CellFreeblockDecode(d *decode.D) uint64 { |
| 136 | + nextOffset := d.FieldU16("next_offset") |
| 137 | + if nextOffset == 0 { |
| 138 | + return 0 |
| 139 | + } |
| 140 | + // TODO: "header" is size bytes or offset+size? seems to be just size |
| 141 | + // "size of the freeblock in bytes, including the 4-byte header" |
| 142 | + size := d.FieldU16("size") |
| 143 | + d.FieldRawLen("space", int64(size-4)*8) |
| 144 | + return nextOffset |
| 145 | +} |
| 146 | + |
| 147 | +func sqlite3CellPayloadDecode(d *decode.D) { |
| 148 | + lengthStart := d.Pos() |
| 149 | + length := d.FieldSFn("length", varintDecode) |
| 150 | + lengtbBits := d.Pos() - lengthStart |
| 151 | + var serialTypes []int64 |
| 152 | + d.LenFn((length)*8-lengtbBits, func(d *decode.D) { |
| 153 | + d.FieldArray("serials", func(d *decode.D) { |
| 154 | + for !d.End() { |
| 155 | + serialTypes = append(serialTypes, d.FieldSFn("serial", varintDecode)) |
| 156 | + } |
| 157 | + }) |
| 158 | + }) |
| 159 | + d.FieldArray("contents", func(d *decode.D) { |
| 160 | + for _, s := range serialTypes { |
| 161 | + sqlite3DecodeSerialType(d, s) |
| 162 | + } |
| 163 | + }) |
| 164 | +} |
| 165 | + |
| 166 | +func sqlite3Decode(d *decode.D, in interface{}) interface{} { |
| 167 | + var pageSizeS *scalar.S |
| 168 | + var databaseSizePages uint64 |
| 169 | + |
| 170 | + d.FieldStruct("header", func(d *decode.D) { |
| 171 | + d.FieldUTF8("magic", 16, d.AssertStr("SQLite format 3\x00")) |
| 172 | + pageSizeS = d.FieldScalarU16("page_size", scalar.UToSymU{1: 65536}) // in bytes. Must be a power of two between 512 and 32768 inclusive, or the value 1 representing a page size of 65536. |
| 173 | + d.FieldU8("write_version", versionMap) // 1 for legacy; 2 for WAL. |
| 174 | + d.FieldU8("read_version", versionMap) // . 1 for legacy; 2 for WAL. |
| 175 | + d.FieldU8("unused_space") // at the end of each page. Usually 0. |
| 176 | + d.FieldU8("maximum_embedded_payload_fraction") // . Must be 64. |
| 177 | + d.FieldU8("minimum_embedded_payload_fraction") // . Must be 32. |
| 178 | + d.FieldU8("leaf_payload_fraction") // . Must be 32. |
| 179 | + d.FieldU32("file_change_counter") // |
| 180 | + databaseSizePages = d.FieldU32("database_size_pages") // . The "in-header database size". |
| 181 | + d.FieldU32("page_number_freelist") // of the first freelist trunk page. |
| 182 | + d.FieldU32("total_number_freelist") // pages. |
| 183 | + d.FieldU32("schema_cookie") // . |
| 184 | + d.FieldU32("schema_format_number") // . Supported schema formats are 1, 2, 3, and 4. |
| 185 | + d.FieldU32("default_page_cache_size") // . |
| 186 | + d.FieldU32("page_number_largest_root_btree") // page when in auto-vacuum or incremental-vacuum modes, or zero otherwise. |
| 187 | + d.FieldU32("text_encoding", textEncodingMap) |
| 188 | + d.FieldU32("user_version") // " as read and set by the user_version pragma. |
| 189 | + d.FieldU32("incremental_vacuum_mode") // False (zero) otherwise. |
| 190 | + d.FieldU32("application_id") // " set by PRAGMA application_id. |
| 191 | + d.FieldRawLen("reserved", 160, d.BitBufIsZero()) // for expansion. Must be zero. |
| 192 | + d.FieldU32("version_valid_for") // number. |
| 193 | + d.FieldU32("sqlite_version_number") // |
| 194 | + }) |
| 195 | + |
| 196 | + // TODO: nicer API for fallback? |
| 197 | + pageSize := pageSizeS.ActualU() |
| 198 | + if pageSizeS.Sym != nil { |
| 199 | + pageSize = pageSizeS.SymU() |
| 200 | + } |
| 201 | + |
| 202 | + d.FieldArray("pages", func(d *decode.D) { |
| 203 | + for i := uint64(0); i < databaseSizePages; i++ { |
| 204 | + pageOffset := int64(pageSize) * int64(i) |
| 205 | + d.SeekAbs(pageOffset * 8) |
| 206 | + // skip header for first page |
| 207 | + if i == 0 { |
| 208 | + d.SeekRel(100 * 8) |
| 209 | + } |
| 210 | + |
| 211 | + d.FieldStruct("page", func(d *decode.D) { |
| 212 | + typ := d.FieldU8("type", bTreeTypeMap) |
| 213 | + startFreeblocks := d.FieldU16("start_freeblocks") // The two-byte integer at offset 1 gives the start of the first freeblock on the page, or is zero if there are no freeblocks. |
| 214 | + pageCells := d.FieldU16("page_cells") // The two-byte integer at offset 3 gives the number of cells on the page. |
| 215 | + d.FieldU16("cell_start") // sThe two-byte integer at offset 5 designates the start of the cell content area. A zero value for this integer is interpreted as 65536. |
| 216 | + d.FieldU8("cell_fragments") // The one-byte integer at offset 7 gives the number of fragmented free bytes within the cell content area. |
| 217 | + switch typ { |
| 218 | + case bTreeIndexInterior, |
| 219 | + bTreeTableInterior: |
| 220 | + d.FieldU32("right_pointer") // The four-byte page number at offset 8 is the right-most pointer. This value appears in the header of interior b-tree pages only and is omitted from all other pages. |
| 221 | + } |
| 222 | + var cellPointers []uint64 |
| 223 | + d.FieldArray("cells_pointers", func(d *decode.D) { |
| 224 | + for i := uint64(0); i < pageCells; i++ { |
| 225 | + cellPointers = append(cellPointers, d.FieldU16("pointer")) |
| 226 | + } |
| 227 | + }) |
| 228 | + if startFreeblocks != 0 { |
| 229 | + d.FieldArray("freeblocks", func(d *decode.D) { |
| 230 | + nextOffset := startFreeblocks |
| 231 | + for nextOffset != 0 { |
| 232 | + d.SeekAbs((pageOffset + int64(nextOffset)) * 8) |
| 233 | + d.FieldStruct("freeblock", func(d *decode.D) { |
| 234 | + nextOffset = sqlite3CellFreeblockDecode(d) |
| 235 | + }) |
| 236 | + } |
| 237 | + }) |
| 238 | + } |
| 239 | + d.FieldArray("cells", func(d *decode.D) { |
| 240 | + for _, p := range cellPointers { |
| 241 | + d.FieldStruct("cell", func(d *decode.D) { |
| 242 | + // TODO: SeekAbs with fn later? |
| 243 | + d.SeekAbs((pageOffset + int64(p)) * 8) |
| 244 | + switch typ { |
| 245 | + case bTreeIndexInterior: |
| 246 | + d.FieldU32("left_child") |
| 247 | + payLoadLen := d.FieldSFn("payload_len", varintDecode) |
| 248 | + d.LenFn(payLoadLen*8, func(d *decode.D) { |
| 249 | + d.FieldStruct("payload", sqlite3CellPayloadDecode) |
| 250 | + }) |
| 251 | + case bTreeTableInterior: |
| 252 | + d.FieldU32("left_child") |
| 253 | + d.FieldSFn("rowid", varintDecode) |
| 254 | + case bTreeIndexLeaf: |
| 255 | + payLoadLen := d.FieldSFn("payload_len", varintDecode) |
| 256 | + d.LenFn(payLoadLen*8, func(d *decode.D) { |
| 257 | + d.FieldStruct("payload", sqlite3CellPayloadDecode) |
| 258 | + }) |
| 259 | + case bTreeTableLeaf: |
| 260 | + payLoadLen := d.FieldSFn("payload_len", varintDecode) |
| 261 | + d.FieldSFn("rowid", varintDecode) |
| 262 | + d.LenFn(payLoadLen*8, func(d *decode.D) { |
| 263 | + d.FieldStruct("payload", sqlite3CellPayloadDecode) |
| 264 | + }) |
| 265 | + } |
| 266 | + }) |
| 267 | + } |
| 268 | + }) |
| 269 | + }) |
| 270 | + } |
| 271 | + }) |
| 272 | + |
| 273 | + return nil |
| 274 | +} |
0 commit comments