Skip to content

Commit 86b25c4

Browse files
committed
add macos dedup detect support
1 parent 1cc096d commit 86b25c4

File tree

4 files changed

+133
-6
lines changed

4 files changed

+133
-6
lines changed

cmd/chkbit/help.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ Performance:
4141
- chkbit uses 5 workers by default. To speed it up tune it with the --workers flag.
4242
- Note: slow/spinning disks work best with just 1 worker!
4343
44-
Deduplication:
45-
- requires a supported OS, currently Linux with a CoW filesystem like btrfs
44+
Deduplication requires a supported OS:
45+
- Linux with a CoW filesystem like btrfs (full support)
46+
- macOS with a APFS (detect supported)
4647
- for details see https://laktak.github.io/chkbit/dedup/
4748
`

cmd/chkbit/main.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,12 @@ type CLI struct {
9595
Util struct {
9696
Fileext struct {
9797
Paths []string `arg:"" name:"paths" help:"files to check"`
98-
} `cmd:"" help:"check if the given files occupy the same block on disk; Linux only"`
98+
} `cmd:"" help:"check if the given files occupy the same block on disk"`
9999

100100
Filededup struct {
101101
Paths []string `arg:"" name:"paths" help:"files to dedup"`
102-
} `cmd:"" help:"run deduplication for the given files, makes all duplicate file blocks point to the same space; requires supported OS & filesystem (see tips)"`
103-
} `cmd:"" help:"Utility functions"`
102+
} `cmd:"" help:"run deduplication for the given files, makes all duplicate file blocks point to the same space"`
103+
} `cmd:"" help:"Utility functions; requires supported OS & filesystem (see tips)"`
104104

105105
ShowIgnored struct {
106106
Paths []string `arg:"" name:"paths" help:"directories to list"`

os.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build !linux
1+
//go:build !linux && !darwin
22

33
package chkbit
44

os_darwin.go

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
package chkbit
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"os"
7+
"syscall"
8+
"unsafe"
9+
10+
"golang.org/x/sys/unix"
11+
)
12+
13+
// https://www.unix.com/man_page/osx/2/fcntl/
14+
15+
type fiemapExtent struct {
16+
Logical uint64 // byte offset of the extent in the file
17+
Physical uint64 // byte offset of extent on disk
18+
Length uint64 // length in bytes for this extent
19+
}
20+
21+
type FileExtentList []fiemapExtent
22+
23+
func (fe *fiemapExtent) matches(o *fiemapExtent) bool {
24+
return fe.Logical == o.Logical && fe.Physical == o.Physical && fe.Length == o.Length
25+
}
26+
27+
func (fe FileExtentList) find(offs uint64) *fiemapExtent {
28+
for _, o := range fe {
29+
if o.Logical == offs {
30+
return &o
31+
}
32+
}
33+
return nil
34+
}
35+
36+
func getFileExtentsFp(file *os.File) (FileExtentList, os.FileInfo, error) {
37+
38+
fileInfo, err := file.Stat()
39+
if err != nil {
40+
return nil, nil, err
41+
}
42+
43+
var all []fiemapExtent
44+
start := uint64(0)
45+
size := uint64(fileInfo.Size())
46+
maxReq := uint64(100 * 1024 * 1024)
47+
48+
// don't use syscall.Log2phys_t as it's alignment is incorrect
49+
50+
type Log2phys_t2 struct {
51+
// IN: number of bytes to be queried; OUT: number of contiguous bytes allocated at this position
52+
Contigbytes uint64
53+
// IN: bytes into file; OUT: bytes into device
54+
Devoffset uint64
55+
}
56+
57+
buf := make([]byte, 8*3)
58+
for {
59+
60+
// skip flags
61+
lp := (*Log2phys_t2)(unsafe.Pointer(&buf[4]))
62+
lp.Contigbytes = maxReq
63+
lp.Devoffset = start
64+
65+
rc, err := unix.FcntlInt(file.Fd(), syscall.F_LOG2PHYS_EXT, int(uintptr(unsafe.Pointer(&buf[0]))))
66+
if err != nil {
67+
return nil, nil, err
68+
}
69+
if rc < 0 {
70+
return nil, nil, errors.New("log2phys failed")
71+
}
72+
73+
all = append(all,
74+
fiemapExtent{
75+
Logical: start,
76+
Physical: lp.Devoffset,
77+
Length: lp.Contigbytes,
78+
})
79+
80+
start += lp.Contigbytes
81+
if start >= size {
82+
return all, fileInfo, nil
83+
}
84+
}
85+
}
86+
87+
func GetFileExtents(filePath string) (FileExtentList, error) {
88+
file, err := os.Open(filePath)
89+
if err != nil {
90+
return nil, err
91+
}
92+
file.Sync()
93+
defer file.Close()
94+
fe, _, err := getFileExtentsFp(file)
95+
if err != nil {
96+
return nil, fmt.Errorf("failed to get fileextents for %s: %v", filePath, err)
97+
}
98+
return fe, err
99+
}
100+
101+
func ExtentsMatch(extList1, extList2 FileExtentList) bool {
102+
if len(extList1) != len(extList2) {
103+
return false
104+
}
105+
for i := range extList1 {
106+
a := extList1[i]
107+
b := extList2[i]
108+
if !a.matches(&b) {
109+
return false
110+
}
111+
}
112+
113+
return true
114+
}
115+
116+
func ShowExtents(extList FileExtentList) string {
117+
res := ""
118+
for _, b := range extList {
119+
res += fmt.Sprintf("offs=%x len=%x phys=%x\n", b.Logical, b.Length, b.Physical)
120+
}
121+
return res
122+
}
123+
124+
func DeduplicateFiles(file1, file2 string) (uint64, error) {
125+
return 0, errors.New("deduplicate is not supported on this OS")
126+
}

0 commit comments

Comments
 (0)