Skip to content

Checksums in default VFS. #177

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package sqlite3

import (
"context"
"strconv"

"github.com/tetratelabs/wazero/api"

Expand Down Expand Up @@ -327,3 +328,46 @@ func (c *Conn) SoftHeapLimit(n int64) int64 {
func (c *Conn) HardHeapLimit(n int64) int64 {
return int64(c.call("sqlite3_hard_heap_limit64", uint64(n)))
}

// EnableChecksums enables checksums on a database.
//
// https://sqlite.org/cksumvfs.html
func (c *Conn) EnableChecksums(schema string) error {
r, err := c.FileControl(schema, FCNTL_RESERVE_BYTES)
if err != nil {
return err
}
if r == 8 {
// Correct value, enabled.
return nil
}
if r == 0 {
// Default value, enable.
_, err = c.FileControl(schema, FCNTL_RESERVE_BYTES, 8)
if err != nil {
return err
}
r, err = c.FileControl(schema, FCNTL_RESERVE_BYTES)
if err != nil {
return err
}
}
if r != 8 {
// Invalid value.
return util.ErrorString("sqlite3: reserve bytes must be 8, is: " + strconv.Itoa(r.(int)))
}

// VACUUM the database.
if schema != "" {
err = c.Exec(`VACUUM ` + QuoteIdentifier(schema))
} else {
err = c.Exec(`VACUUM`)
}
if err != nil {
return err
}

// Checkpoint the WAL.
_, _, err = c.WALCheckpoint(schema, CHECKPOINT_RESTART)
return err
}
75 changes: 75 additions & 0 deletions tests/cksm_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package tests

import (
_ "embed"
"strings"
"testing"

"github.com/ncruces/go-sqlite3"
"github.com/ncruces/go-sqlite3/driver"
_ "github.com/ncruces/go-sqlite3/embed"
_ "github.com/ncruces/go-sqlite3/internal/testcfg"
"github.com/ncruces/go-sqlite3/util/ioutil"
"github.com/ncruces/go-sqlite3/vfs/memdb"
"github.com/ncruces/go-sqlite3/vfs/readervfs"
)

//go:embed testdata/cksm.db
var cksmDB string

func Test_fileformat(t *testing.T) {
t.Parallel()

readervfs.Create("test.db", ioutil.NewSizeReaderAt(strings.NewReader(cksmDB)))

db, err := driver.Open("file:test.db?vfs=reader")
if err != nil {
t.Fatal(err)
}
defer db.Close()

var enabled bool
err = db.QueryRow(`PRAGMA checksum_verification`).Scan(&enabled)
if err != nil {
t.Fatal(err)
}
if !enabled {
t.Error("want true")
}

db.SetMaxIdleConns(0) // Clears the page cache.

_, err = db.Exec(`PRAGMA integrity_check`)
if err != nil {
t.Fatal(err)
}
}

func Test_enable(t *testing.T) {
t.Parallel()

db, err := driver.Open(memdb.TestDB(t),
func(db *sqlite3.Conn) error {
return db.EnableChecksums("main")
})
if err != nil {
t.Fatal(err)
}
defer db.Close()

var enabled bool
err = db.QueryRow(`PRAGMA checksum_verification`).Scan(&enabled)
if err != nil {
t.Fatal(err)
}
if !enabled {
t.Error("want true")
}

db.SetMaxIdleConns(0) // Clears the page cache.

_, err = db.Exec(`PRAGMA integrity_check`)
if err != nil {
t.Fatal(err)
}
}
File renamed without changes.
23 changes: 18 additions & 5 deletions vfs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ The main differences are [file locking](#file-locking) and [WAL mode](#write-ahe
POSIX advisory locks, which SQLite uses on Unix, are
[broken by design](https://github.com/sqlite/sqlite/blob/b74eb0/src/os_unix.c#L1073-L1161).

On Linux and macOS, this module uses
On Linux and macOS, this package uses
[OFD locks](https://www.gnu.org/software/libc/manual/html_node/Open-File-Description-Locks.html)
to synchronize access to database files.
OFD locks are fully compatible with POSIX advisory locks.

This module can also use
This package can also use
[BSD locks](https://man.freebsd.org/cgi/man.cgi?query=flock&sektion=2),
albeit with reduced concurrency (`BEGIN IMMEDIATE` behaves like `BEGIN EXCLUSIVE`).
On BSD, macOS, and illumos, BSD locks are fully compatible with POSIX advisory locks;
Expand All @@ -30,7 +30,7 @@ elsewhere, they are very likely broken.
BSD locks are the default on BSD and illumos,
but you can opt into them with the `sqlite3_flock` build tag.

On Windows, this module uses `LockFileEx` and `UnlockFileEx`,
On Windows, this package uses `LockFileEx` and `UnlockFileEx`,
like SQLite.

Otherwise, file locking is not supported, and you must use
Expand All @@ -46,7 +46,7 @@ to check if your build supports file locking.

### Write-Ahead Logging

On little-endian Unix, this module uses `mmap` to implement
On little-endian Unix, this package uses `mmap` to implement
[shared-memory for the WAL-index](https://sqlite.org/wal.html#implementation_of_shared_memory_for_the_wal_index),
like SQLite.

Expand All @@ -67,9 +67,22 @@ to check if your build supports shared memory.

### Batch-Atomic Write

On 64-bit Linux, this module supports [batch-atomic writes](https://sqlite.org/cgi/src/technote/714)
On 64-bit Linux, this package supports
[batch-atomic writes](https://sqlite.org/cgi/src/technote/714)
on the F2FS filesystem.

### Checksums

This package can be [configured](https://pkg.go.dev/github.com/ncruces/go-sqlite3#Conn.EnableChecksums)
to add an 8-byte checksum to the end of every page in an SQLite database.
The checksum is added as each page is written
and verified as each page is read.\
The checksum is intended to help detect database corruption
caused by random bit-flips in the mass storage device.

The implementation is compatible with SQLite's
[Checksum VFS Shim](https://sqlite.org/cksumvfs.html).

### Build Tags

The VFS can be customized with a few build tags:
Expand Down
2 changes: 2 additions & 0 deletions vfs/adiantum/adiantum_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
var testDB string

func Test_fileformat(t *testing.T) {
t.Parallel()

readervfs.Create("test.db", ioutil.NewSizeReaderAt(strings.NewReader(testDB)))
vfs.Register("radiantum", adiantum.Wrap(vfs.Find("reader"), nil))

Expand Down
5 changes: 5 additions & 0 deletions vfs/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,3 +186,8 @@ type blockingSharedMemory interface {
SharedMemory
shmEnableBlocking(block bool)
}

type fileControl interface {
File
fileControl(ctx context.Context, mod api.Module, op _FcntlOpcode, pArg uint32) _ErrorCode
}
149 changes: 149 additions & 0 deletions vfs/cksm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
package vfs

import (
"bytes"
"context"
_ "embed"
"encoding/binary"
"strconv"

"github.com/tetratelabs/wazero/api"

"github.com/ncruces/go-sqlite3/internal/util"
"github.com/ncruces/go-sqlite3/util/sql3util"
)

func cksmWrapFile(name *Filename, flags OpenFlag, file File) File {
// Checksum only main databases and WALs.
if flags&(OPEN_MAIN_DB|OPEN_WAL) == 0 {
return file
}

cksm := cksmFile{File: file}

if flags&OPEN_WAL != 0 {
main, _ := name.DatabaseFile().(cksmFile)
cksm.cksmFlags = main.cksmFlags
} else {
cksm.cksmFlags = new(cksmFlags)
cksm.isDB = true
}

return cksm
}

type cksmFile struct {
File
*cksmFlags
isDB bool
}

type cksmFlags struct {
computeCksm bool
verifyCksm bool
inCkpt bool
pageSize int
}

func (c cksmFile) ReadAt(p []byte, off int64) (n int, err error) {
n, err = c.File.ReadAt(p, off)

// SQLite is reading the header of a database file.
if c.isDB && off == 0 && len(p) >= 100 &&
bytes.HasPrefix(p, []byte("SQLite format 3\000")) {
c.init(p)
}

// Verify checksums.
if c.verifyCksm && !c.inCkpt && len(p) == c.pageSize {
cksm1 := cksmCompute(p[:len(p)-8])
cksm2 := *(*[8]byte)(p[len(p)-8:])
if cksm1 != cksm2 {
return 0, _IOERR_DATA
}
}
return n, err
}

func (c cksmFile) WriteAt(p []byte, off int64) (n int, err error) {
// SQLite is writing the first page of a database file.
if c.isDB && off == 0 && len(p) >= 100 &&
bytes.HasPrefix(p, []byte("SQLite format 3\000")) {
c.init(p)
}

// Compute checksums.
if c.computeCksm && !c.inCkpt && len(p) == c.pageSize {
*(*[8]byte)(p[len(p)-8:]) = cksmCompute(p[:len(p)-8])
}

return c.File.WriteAt(p, off)
}

func (c cksmFile) Pragma(name string, value string) (string, error) {
switch name {
case "checksum_verification":
b, ok := sql3util.ParseBool(value)
if ok {
c.verifyCksm = b && c.computeCksm
}
if !c.verifyCksm {
return "0", nil
}
return "1", nil

case "page_size":
if c.computeCksm {
// Do not allow page size changes on a checksum database.
return strconv.Itoa(c.pageSize), nil
}
}
return "", _NOTFOUND
}

func (c cksmFile) fileControl(ctx context.Context, mod api.Module, op _FcntlOpcode, pArg uint32) _ErrorCode {
switch op {
case _FCNTL_CKPT_START:
c.inCkpt = true
case _FCNTL_CKPT_DONE:
c.inCkpt = false
}
if rc := vfsFileControlImpl(ctx, mod, c, op, pArg); rc != _NOTFOUND {
return rc
}
return vfsFileControlImpl(ctx, mod, c.File, op, pArg)
}

func (f *cksmFlags) init(header []byte) {
f.pageSize = 256 * int(binary.LittleEndian.Uint16(header[16:18]))
if r := header[20] == 8; r != f.computeCksm {
f.computeCksm = r
f.verifyCksm = r
}
}

func cksmCompute(a []byte) (cksm [8]byte) {
var s1, s2 uint32
for len(a) >= 8 {
s1 += binary.LittleEndian.Uint32(a[0:4]) + s2
s2 += binary.LittleEndian.Uint32(a[4:8]) + s1
a = a[8:]
}
if len(a) != 0 {
panic(util.AssertErr())
}
binary.LittleEndian.PutUint32(cksm[0:4], s1)
binary.LittleEndian.PutUint32(cksm[4:8], s2)
return
}

func (c cksmFile) SharedMemory() SharedMemory {
if f, ok := c.File.(FileSharedMemory); ok {
return f.SharedMemory()
}
return nil
}

func (c cksmFile) Unwrap() File {
return c.File
}
20 changes: 0 additions & 20 deletions vfs/cksmvfs/README.md

This file was deleted.

Loading