6
6
Utilities for working directly with the wallet's BDB database file
7
7
8
8
This is specific to the configuration of BDB used in this project:
9
- - pagesize: 4096 bytes
10
9
- Outer database contains single subdatabase named 'main'
11
10
- btree
12
- - btree leaf pages
11
+ - btree internal, leaf and overflow pages
13
12
14
- Each key-value pair is two entries in a btree leaf. The first is the key, the one that follows
13
+ Each key-value pair is two entries in a btree leaf, which optionally refers to overflow pages
14
+ if the data doesn't fit into a single page. The first entry is the key, the one that follows
15
15
is the value. And so on. Note that the entry data is itself not in the correct order. Instead
16
16
entry offsets are stored in the correct order and those offsets are needed to then retrieve
17
- the data itself.
17
+ the data itself. Note that this implementation currently only supports reading databases that
18
+ are in the same endianness as the host.
18
19
19
20
Page format can be found in BDB source code dbinc/db_page.h
20
- This only implements the deserialization of btree metadata pages and normal btree pages. Overflow
21
- pages are not implemented but may be needed in the future if dealing with wallets with large
22
- transactions.
23
21
24
22
`db_dump -da wallet.dat` is useful to see the data in a wallet.dat BDB file
25
23
"""
26
24
27
25
import struct
28
26
29
27
# Important constants
30
- PAGESIZE = 4096
28
+ PAGE_HEADER_SIZE = 26
31
29
OUTER_META_PAGE = 0
32
- INNER_META_PAGE = 2
33
30
34
31
# Page type values
35
32
BTREE_INTERNAL = 3
36
33
BTREE_LEAF = 5
34
+ OVERFLOW_DATA = 7
37
35
BTREE_META = 9
38
36
37
+ # Record type values
38
+ RECORD_KEYDATA = 1
39
+ RECORD_OVERFLOW_DATA = 3
40
+
39
41
# Some magic numbers for sanity checking
40
42
BTREE_MAGIC = 0x053162
41
43
DB_VERSION = 9
42
-
43
- # Deserializes a leaf page into a dict.
44
- # Btree internal pages have the same header, for those, return None.
45
- # For the btree leaf pages, deserialize them and put all the data into a dict
46
- def dump_leaf_page (data ):
44
+ SUBDATABASE_NAME = b'main'
45
+
46
+ # Deserializes an internal, leaf or overflow page into a dict.
47
+ # In addition to the common page header fields, the result contains an 'entries'
48
+ # array of dicts with the following fields, depending on the page type:
49
+ # internal page [BTREE_INTERNAL]:
50
+ # - 'page_num': referenced page number (used to find further pages to process)
51
+ # leaf page [BTREE_LEAF]:
52
+ # - 'record_type': record type, must be RECORD_KEYDATA or RECORD_OVERFLOW_DATA
53
+ # - 'data': binary data (key or value payload), if record type is RECORD_KEYDATA
54
+ # - 'page_num': referenced overflow page number, if record type is RECORD_OVERFLOW_DATA
55
+ # overflow page [OVERFLOW_DATA]:
56
+ # - 'data': binary data (part of key or value payload)
57
+ def dump_page (data ):
47
58
page_info = {}
48
59
page_header = data [0 :26 ]
49
60
_ , pgno , prev_pgno , next_pgno , entries , hf_offset , level , pg_type = struct .unpack ('QIIIHHBB' , page_header )
@@ -56,20 +67,35 @@ def dump_leaf_page(data):
56
67
page_info ['entry_offsets' ] = struct .unpack ('{}H' .format (entries ), data [26 :26 + entries * 2 ])
57
68
page_info ['entries' ] = []
58
69
59
- if pg_type == BTREE_INTERNAL :
60
- # Skip internal pages. These are the internal nodes of the btree and don't contain anything relevant to us
61
- return None
70
+ assert pg_type in (BTREE_INTERNAL , BTREE_LEAF , OVERFLOW_DATA )
62
71
63
- assert pg_type == BTREE_LEAF , 'A non-btree leaf page has been encountered while dumping leaves'
72
+ if pg_type == OVERFLOW_DATA :
73
+ assert entries == 1
74
+ page_info ['entries' ].append ({'data' : data [26 :26 + hf_offset ]})
75
+ return page_info
64
76
65
77
for i in range (0 , entries ):
78
+ entry = {}
66
79
offset = page_info ['entry_offsets' ][i ]
67
- entry = {'offset' : offset }
68
- page_data_header = data [offset :offset + 3 ]
69
- e_len , pg_type = struct .unpack ('HB' , page_data_header )
70
- entry ['len' ] = e_len
71
- entry ['pg_type' ] = pg_type
72
- entry ['data' ] = data [offset + 3 :offset + 3 + e_len ]
80
+ record_header = data [offset :offset + 3 ]
81
+ offset += 3
82
+ e_len , record_type = struct .unpack ('HB' , record_header )
83
+
84
+ if pg_type == BTREE_INTERNAL :
85
+ assert record_type == RECORD_KEYDATA
86
+ internal_record_data = data [offset :offset + 9 ]
87
+ _ , page_num , _ = struct .unpack ('=BII' , internal_record_data )
88
+ entry ['page_num' ] = page_num
89
+ elif pg_type == BTREE_LEAF :
90
+ assert record_type in (RECORD_KEYDATA , RECORD_OVERFLOW_DATA )
91
+ entry ['record_type' ] = record_type
92
+ if record_type == RECORD_KEYDATA :
93
+ entry ['data' ] = data [offset :offset + e_len ]
94
+ elif record_type == RECORD_OVERFLOW_DATA :
95
+ overflow_record_data = data [offset :offset + 9 ]
96
+ _ , page_num , _ = struct .unpack ('=BII' , overflow_record_data )
97
+ entry ['page_num' ] = page_num
98
+
73
99
page_info ['entries' ].append (entry )
74
100
75
101
return page_info
@@ -115,37 +141,70 @@ def dump_meta_page(page):
115
141
return metadata
116
142
117
143
# Given the dict from dump_leaf_page, get the key-value pairs and put them into a dict
118
- def extract_kv_pairs (page_data ):
144
+ def extract_kv_pairs (page_data , pages ):
119
145
out = {}
120
146
last_key = None
121
147
for i , entry in enumerate (page_data ['entries' ]):
148
+ data = b''
149
+ if entry ['record_type' ] == RECORD_KEYDATA :
150
+ data = entry ['data' ]
151
+ elif entry ['record_type' ] == RECORD_OVERFLOW_DATA :
152
+ next_page = entry ['page_num' ]
153
+ while next_page != 0 :
154
+ opage = pages [next_page ]
155
+ opage_info = dump_page (opage )
156
+ data += opage_info ['entries' ][0 ]['data' ]
157
+ next_page = opage_info ['next_pgno' ]
158
+
122
159
# By virtue of these all being pairs, even number entries are keys, and odd are values
123
160
if i % 2 == 0 :
124
161
out [entry ['data' ]] = b''
125
- last_key = entry [ ' data' ]
162
+ last_key = data
126
163
else :
127
- out [last_key ] = entry [ ' data' ]
164
+ out [last_key ] = data
128
165
return out
129
166
130
167
# Extract the key-value pairs of the BDB file given in filename
131
168
def dump_bdb_kv (filename ):
132
169
# Read in the BDB file and start deserializing it
133
170
pages = []
134
171
with open (filename , 'rb' ) as f :
135
- data = f . read ( PAGESIZE )
136
- while len ( data ) > 0 :
137
- pages . append ( data )
138
- data = f . read ( PAGESIZE )
172
+ # Determine pagesize first
173
+ data = f . read ( PAGE_HEADER_SIZE )
174
+ pagesize = struct . unpack ( 'I' , data [ 20 : 24 ])[ 0 ]
175
+ assert pagesize in ( 512 , 1024 , 2048 , 4096 , 8192 , 16384 , 32768 , 65536 )
139
176
140
- # Sanity check the meta pages
141
- dump_meta_page ( pages [ OUTER_META_PAGE ] )
142
- dump_meta_page ( pages [ INNER_META_PAGE ])
177
+ # Read rest of first page
178
+ data += f . read ( pagesize - PAGE_HEADER_SIZE )
179
+ assert len ( data ) == pagesize
143
180
144
- # Fetch the kv pairs from the leaf pages
181
+ # Read all remaining pages
182
+ while len (data ) > 0 :
183
+ pages .append (data )
184
+ data = f .read (pagesize )
185
+
186
+ # Sanity check the meta pages, read root page
187
+ outer_meta_info = dump_meta_page (pages [OUTER_META_PAGE ])
188
+ root_page_info = dump_page (pages [outer_meta_info ['root' ]])
189
+ assert root_page_info ['pg_type' ] == BTREE_LEAF
190
+ assert len (root_page_info ['entries' ]) == 2
191
+ assert root_page_info ['entries' ][0 ]['data' ] == SUBDATABASE_NAME
192
+ assert len (root_page_info ['entries' ][1 ]['data' ]) == 4
193
+ inner_meta_page = int .from_bytes (root_page_info ['entries' ][1 ]['data' ], 'big' )
194
+ inner_meta_info = dump_meta_page (pages [inner_meta_page ])
195
+
196
+ # Fetch the kv pairs from the pages
145
197
kv = {}
146
- for i in range (3 , len (pages )):
147
- info = dump_leaf_page (pages [i ])
148
- if info is not None :
149
- info_kv = extract_kv_pairs (info )
198
+ pages_to_process = [inner_meta_info ['root' ]]
199
+ while len (pages_to_process ) > 0 :
200
+ curr_page_no = pages_to_process .pop ()
201
+ assert curr_page_no <= outer_meta_info ['last_pgno' ]
202
+ info = dump_page (pages [curr_page_no ])
203
+ assert info ['pg_type' ] in (BTREE_INTERNAL , BTREE_LEAF )
204
+ if info ['pg_type' ] == BTREE_INTERNAL :
205
+ for entry in info ['entries' ]:
206
+ pages_to_process .append (entry ['page_num' ])
207
+ elif info ['pg_type' ] == BTREE_LEAF :
208
+ info_kv = extract_kv_pairs (info , pages )
150
209
kv = {** kv , ** info_kv }
151
210
return kv
0 commit comments