Skip to content

Commit 7ad606c

Browse files
authored
Feature/fix (#71)
* refa: refactor auto * refa: refact the cli:main * doc * refa: 4294967295 -> cosnt.FFFFFFFF * refa: show undo hist * feat: infer datadir when not specify * bump 0.0.19
1 parent ac824e3 commit 7ad606c

File tree

16 files changed

+129
-88
lines changed

16 files changed

+129
-88
lines changed

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,26 +24,26 @@ $ ./pyinnodb.sh --help
2424
### dump the ddl from ibd file
2525

2626
```bash
27-
./pyinnodb.sh ${your_ibd_path} tosql --mode ddl
27+
./pyinnodb.sh --fn ${your_ibd_path} tosql --mode ddl
2828
```
2929

3030
### dump sql script to insert data
3131

3232
```bash
33-
./pyinnodb.sh ${your_ibd_path} tosql --mode sql
33+
./pyinnodb.sh --fn ${your_ibd_path} tosql --mode sql
3434
```
3535

3636
### search data with primary key(only support for int primary key now)
3737

3838
```bash
39-
./pyinnodb.sh ${your_ibd_path} search --primary-key 42
39+
./pyinnodb.sh --fn ${your_ibd_path} search --primary-key 42
4040
```
4141

4242
## Mysql 5.7
4343

4444
### view data in ibd file, require .frm as well
4545

4646
```bash
47-
./pyinnodb.sh ${your_ibd_path} frm ${your_frm_path}
47+
./pyinnodb.sh --fn ${your_ibd_path} frm ${your_frm_path}
4848
```
4949

README_zh.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ python 3.8 以上
3434

3535
#### 1. 验证.ibd文件
3636
```bash
37-
$ ./pyinnodb.sh datadir/test/all_type.ibd validate
37+
$ ./pyinnodb.sh --fn datadir/test/all_type.ibd validate
3838

3939
page[1], fil.checksum[0x20fa5081], calculate checksum[0x20fa5081], eq[True]
4040
page[2], fil.checksum[0x18395c50], calculate checksum[0x18395c50], eq[True]
@@ -45,27 +45,27 @@ page[3], fil.checksum[0x1493810c], calculate checksum[0x1493810c], eq[True]
4545

4646
#### 2. 输出表结构DDL语句
4747
```bash
48-
$ ./pyinnodb.sh datadir/test/all_type.ibd tosql --mode ddl
48+
$ ./pyinnodb.sh --fn datadir/test/all_type.ibd tosql --mode ddl
4949
```
5050

5151
#### 3. 查看sdi
5252
8.0之后, mysql新增了一种page用于存储表结构数据,将表结构存储在.ibd文件中,一般
5353
称为SDI,通过以下命令查看表结构的sdi数据
5454

5555
```bash
56-
$ ./pyinnodb.sh datadir/test/all_type.ibd tosql --mode sdi
56+
$ ./pyinnodb.sh --fn datadir/test/all_type.ibd tosql --mode sdi
5757
```
5858
SDI页中每一条记录都是一个JSON串, 可以通过 ` | jnv ` 交互式查看json数据
5959

6060
#### 4. 导出ibd文件中的数据
6161
```bash
62-
$ ./pyinnodb.sh datadir/test/all_type.ibd tosql --mode dump
62+
$ ./pyinnodb.sh --fn datadir/test/all_type.ibd tosql --mode dump
6363
```
6464
命令会将ibd文件中每一条记录导出成SQL语句, 通过 ` > data.sql`
6565

6666
#### 5. 搜索指定主键的记录
6767
```bash
68-
$ ./pyinnodb.sh datadir/test/all_type.ibd search --primary-key 1
68+
$ ./pyinnodb.sh --fn datadir/test/all_type.ibd search --primary-key 1
6969
```
7070
<details>
7171
<summary>展开输出以及解释</summary>
@@ -77,7 +77,7 @@ search命令通过--primary-key选项指定主键的值, 将会在ibd文件中
7777

7878
此外,search命令还包括--hidden-col, 指定后将会解析,记录的隐藏字段, 如:
7979
```bash
80-
$ ./pyinnodb.sh datadir/test/all_type.ibd search --primary-key 2 --hidden-col
80+
$ ./pyinnodb.sh --fn datadir/test/all_type.ibd search --primary-key 2 --hidden-col
8181
```
8282

8383
<details>
@@ -88,7 +88,7 @@ $ ./pyinnodb.sh datadir/test/all_type.ibd search --primary-key 2 --hidden-col
8888

8989
如果进一步查看数据的修改记录, 可以指定 --with-hist 以及--datadir指定mysql的数据目录来查看, 如:
9090
```bash
91-
$ ./pyinnodb.sh datadir/test/all_type.ibd search --primary-key 2 --hidden-col --with-hist --datadir datadir
91+
$ ./pyinnodb.sh --fn datadir/test/all_type.ibd search --primary-key 2 --hidden-col --with-hist --datadir datadir
9292
```
9393

9494
<details>
@@ -110,5 +110,5 @@ $ ./pyinnodb.sh datadir/test/all_type.ibd search --primary-key 2 --hidden-col --
110110
mysql 5.7的文件组织方式与mysql8.0不同,表结构存储在.frm文件,而数据存储在.ibd,对ibd文件的解析需要使用:
111111

112112
```
113-
./pyinnodb.sh datadir/test/all_type.ibd frm datadir/test/all_type.frm
113+
./pyinnodb.sh --fn datadir/test/all_type.ibd frm datadir/test/all_type.frm
114114
```

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "pyinnodb"
3-
version = "0.0.18"
3+
version = "0.0.19"
44
description = "A parser for InnoDB file formats, in Python"
55
authors = [
66
{ name = "WinChua", email = "winchua@foxmail.com" }

src/pyinnodb/cli/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
from .main import *
1+
from .main import * # noqa: F403
22

3-
from . import frm, iter_record, parse, sdi, sql, static_usage, systab, undo, validate
3+
from . import frm, iter_record, parse, sdi, sql, static_usage, systab, undo, validate # noqa: F401

src/pyinnodb/cli/iter_record.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,12 @@
33
from pyinnodb.disk_struct.index import MSDIPage
44
from pyinnodb.disk_struct.record import MRecordHeader
55
from pyinnodb.sdi.table import Table
6+
from pyinnodb.disk_struct.rollback import History
7+
from pathlib import Path
8+
import os
9+
import typing as t
610

7-
from . import *
11+
from . import * # noqa: F403
812

913

1014
@main.command()
@@ -32,7 +36,7 @@ def list_first_page(ctx, pageno):
3236
@click.option("--datadir", type=click.Path(exists=False), default=None)
3337
def search(ctx, primary_key, pageno, hidden_col, with_hist, datadir):
3438
"""search the primary-key(int support only now)"""
35-
f = ctx.obj["fn"]
39+
f: t.IO[t.Any] = ctx.obj["fn"]
3640
# print("search start cost:", time.time() - ctx.obj["start_time"])
3741
fsp_page: MFspPage = ctx.obj["fsp_page"]
3842
f.seek(fsp_page.sdi_page_no * const.PAGE_SIZE)
@@ -58,26 +62,23 @@ def search(ctx, primary_key, pageno, hidden_col, with_hist, datadir):
5862
return
5963

6064
if datadir is None:
61-
print("--datadir should be specified to view the history")
62-
return
65+
fpath = Path(f.name)
66+
if not (fpath.parent.parent/"mysql.ibd").exists():
67+
print("--datadir should be specified to view the history")
68+
return
69+
datadir = fpath.parent.parent
70+
6371
if not os.path.exists(datadir):
6472
print(f"--datadir {datadir} not exists")
6573
return
6674

67-
rptr = result.DB_ROLL_PTR
6875
primary_key_col = dd_object.get_primary_key_col()
6976
disk_data_layout = dd_object.get_disk_data_layout()
7077
undo_map = const.util.get_undo_tablespacefile(f"{datadir}/mysql.ibd")
71-
history = []
72-
while rptr is not None:
73-
hist, rptr = rptr.last_version(
74-
undo_map,
75-
primary_key_col,
76-
disk_data_layout,
77-
)
78-
history.append(hist)
79-
for h in history:
80-
print(h)
78+
79+
history = History(result)
80+
history.parse(primary_key_col, disk_data_layout, undo_map)
81+
history.show()
8182

8283
return
8384

src/pyinnodb/cli/main.py

Lines changed: 37 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import logging
22
import sys
3+
import typing as t
34
from importlib_metadata import version as meta_version
45

56
import click
@@ -11,8 +12,24 @@
1112
logger = logging.getLogger(__name__)
1213

1314

14-
@click.group()
15-
@click.argument("fn", type=click.File("rb"))
15+
def validate_ibd(fsp_page: MFspPage, fn: t.IO[t.Any]):
16+
for pn in range(fsp_page.fsp_header.highest_page_number):
17+
fn.seek(const.PAGE_SIZE * pn)
18+
page_data = fn.read(const.PAGE_SIZE)
19+
fil = MFil.parse(page_data)
20+
if fil.page_type == const.FIL_PAGE_TYPE_ALLOCATED:
21+
continue
22+
checksum = const.page_checksum_crc32c(page_data)
23+
if checksum != fil.checksum:
24+
print(
25+
f"PAGE {pn}'s checksum is invalid, stored[{hex(fil.checksum)}] != calculate[{hex(checksum)}]"
26+
)
27+
print("use validate to get a more detail output of the validation")
28+
return False
29+
return True
30+
31+
@click.group(invoke_without_command=True)
32+
@click.option("--fn", type=click.File("rb"), default=None)
1633
@click.option(
1734
"--log-level", type=click.Choice(["DEBUG", "ERROR", "INFO"]), default="ERROR"
1835
)
@@ -22,45 +39,38 @@
2239
def main(ctx, fn, log_level, validate_first, version):
2340
"""A ibd file parser for MySQL 8.0 above, help you to know innodb better.
2441
42+
\b
2543
It offer several function bellow:
26-
a) validate the checksum of your ibd file;
27-
b) output the DDL of table;
28-
c) dump the data in ibd file as INSERT statments;
29-
d) search record by primary key;
30-
e) show the undo log history
44+
a) validate the checksum of your ibd file;
45+
b) output the DDL of table;
46+
c) dump the data in ibd file as INSERT statments;
47+
d) search record by primary key;
48+
e) show the undo log history
3149
3250
many other function to explore your ibd file
3351
3452
"""
35-
if version:
53+
54+
if version and not ctx.invoked_subcommand:
3655
print(meta_version("pyinnodb"))
3756
sys.exit(0)
38-
# pid = os.getpid()
39-
# start_time = os.stat(f"/proc/{pid}").st_ctime
40-
# print("cost to startup:", time.time() - start_time)
41-
# ctx.obj["start_time"] = start_time
57+
if fn is None:
58+
print("use --fn to specify ibd file")
59+
sys.exit(0)
60+
4261
logging.basicConfig(
4362
format="[%(levelname)s]-[%(filename)s:%(lineno)d] %(message)s", level=log_level
4463
)
4564
ctx.ensure_object(dict)
4665
ctx.obj["fn"] = fn
66+
4767
try:
4868
fsp_page = MFspPage.parse_stream(fn)
49-
ctx.obj["fsp_page"] = fsp_page
50-
if validate_first:
51-
for pn in range(fsp_page.fsp_header.highest_page_number):
52-
fn.seek(const.PAGE_SIZE * pn)
53-
page_data = fn.read(const.PAGE_SIZE)
54-
fil = MFil.parse(page_data)
55-
if fil.page_type == const.FIL_PAGE_TYPE_ALLOCATED:
56-
continue
57-
checksum = const.page_checksum_crc32c(page_data)
58-
if checksum != fil.checksum:
59-
print(
60-
f"PAGE {pn}'s checksum is invalid, stored[{hex(fil.checksum)}] != calculate[{hex(checksum)}]"
61-
)
62-
print("use validate to get a more detail output of the validation")
63-
sys.exit(1)
6469
except Exception as e:
6570
print(e)
6671
print("the file parse faile")
72+
sys.exit(1)
73+
74+
ctx.obj["fsp_page"] = fsp_page
75+
if validate_first and not validate_ibd(fsp_page, fn):
76+
sys.exit(1)

src/pyinnodb/cli/sql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def dump_ibd(table_object, f, oneline=True):
5555
)
5656

5757
values = []
58-
while first_leaf_page_no != 4294967295:
58+
while first_leaf_page_no != const.FFFFFFFF:
5959
f.seek(first_leaf_page_no * const.PAGE_SIZE)
6060
index_page = MIndexPage.parse_stream(f)
6161
values.extend(

src/pyinnodb/cli/undo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,6 @@ def rseg_array(ctx, pageno):
8484
for pageno in page.header.pagenos:
8585
f.seek(pageno * const.PAGE_SIZE)
8686
rseg_page = MRSEGPage.parse_stream(f)
87-
pages = [f for f in rseg_page.slots if f != 4294967295]
87+
pages = [f for f in rseg_page.slots if f != const.FFFFFFFF]
8888
if 150 in pages:
8989
print(rseg_page, pageno)

src/pyinnodb/const/define.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
PAGE_SIZE = 16 * 1024
2+
FFFFFFFF = 0xFFFFFFFF
23

34
FIL_PAGE_INDEX = 17855 # B-tree node */
45
FIL_PAGE_RTREE = 17854 # R-tree node */

src/pyinnodb/disk_struct/data.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
from datetime import timedelta, datetime, date
1+
from datetime import datetime, date
22
from ..mconstruct import cs, cfield, CC
33

44
try:
55
from datetime import UTC
6-
except:
6+
except ImportError:
77
from datetime import timezone
8-
98
UTC = timezone.utc
109

1110
TIMEF_INT_OFS = 0x800000

src/pyinnodb/disk_struct/first_page.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def get_data(self, stream):
6868
stream.seek(ie.page_no * const.PAGE_SIZE)
6969
dp = MDataPage.parse_stream(stream)
7070
data += stream.read(dp.data_len)
71-
if ie.node.next.page_number == 4294967295:
71+
if ie.node.next.page_number == const.FFFFFFFF:
7272
break
7373
stream.seek(ie.node.next.seek_loc())
7474
ie = MIndexEntryNode.parse_stream(stream)

src/pyinnodb/disk_struct/index.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class MFsegHeader(CC):
5050
# should not use this way to determine the first leaf page number
5151
# as off-page may allocate first
5252
# def get_first_leaf_page(self, f):
53-
# if self.leaf_pointer.page_number != 4294967295:
53+
const.FFFFFFFF # if self.leaf_pointer.page_number != const.FFFFFFFF:
5454
# f.seek(self.leaf_pointer.seek_loc())
5555
# inode_entry = MInodeEntry.parse_stream(f)
5656
# fp = inode_entry.first_page()
@@ -163,7 +163,7 @@ def value_parser(rh: MRecordHeader, f):
163163
nullable_cols = [
164164
d[0]
165165
for d in cols_disk_layout
166-
if d[1] == 4294967295 and d[0].is_nullable
166+
if d[1] == const.FFFFFFFF and d[0].is_nullable
167167
]
168168

169169
logger.debug(
@@ -256,16 +256,15 @@ def value_parser(rh: MRecordHeader, f):
256256
disk_data_parsed[col.name] = col_value
257257

258258
for col in dd_object.columns:
259-
if (
260-
col.name in ["DB_ROW_ID", "DB_TRX_ID", "DB_ROLL_PTR"]
261-
and not hidden_col
262-
) or col.private_data.get("version_dropped", 0) != 0 or col.is_hidden_from_user:
259+
if col.name in ["DB_ROW_ID", "DB_TRX_ID", "DB_ROLL_PTR"]:
260+
if not hidden_col and col.name in disk_data_parsed:
261+
disk_data_parsed.pop(col.name)
262+
elif col.private_data.get("version_dropped", 0) != 0 or col.is_hidden_from_user:
263263
if col.name in disk_data_parsed:
264264
disk_data_parsed.pop(col.name)
265+
elif col.is_virtual or col.generation_expression_utf8 != "":
265266
continue
266-
if col.is_virtual or col.generation_expression_utf8 != "":
267-
continue
268-
if col.name not in disk_data_parsed:
267+
elif col.name not in disk_data_parsed:
269268
disk_data_parsed[col.name] = col.get_instant_default()
270269

271270
klass = dd_object.DataClassHiddenCol if hidden_col else dd_object.DataClass
@@ -407,7 +406,7 @@ def iterate_sdi_record(self, stream):
407406
stream.seek(-8 + infimum.next_record_offset + 12, 1)
408407
cur_page_num = int.from_bytes(stream.read(4), byteorder="big")
409408

410-
while cur_page_num != 4294967295:
409+
while cur_page_num != const.FFFFFFFF:
411410
stream.seek(cur_page_num * const.PAGE_SIZE)
412411
sdi_page = MSDIPage.parse_stream(stream)
413412
stream.seek(

0 commit comments

Comments
 (0)