Skip to content

Commit 4b0715a

Browse files
committed
- handle manager for source and targets
- refactoring
1 parent 5e525cd commit 4b0715a

File tree

5 files changed

+128
-77
lines changed

5 files changed

+128
-77
lines changed

blobporter.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import (
1515
"github.com/Azure/blobporter/util"
1616
)
1717

18-
const programVersion = "0.6.05"
18+
const programVersion = "0.6.06"
1919

2020
var argsUtil paramParserValidator
2121

@@ -111,7 +111,7 @@ func displayFilesToTransfer(sourcesInfo []pipeline.SourceInfo, numOfBatches int,
111111
totalSize = totalSize + source.Size
112112
}
113113

114-
if len(sourcesInfo) < 20 {
114+
if len(sourcesInfo) < 0 {
115115
fmt.Printf(summary)
116116
return
117117
}

targets/handleman.go renamed to internal/handleman.go

Lines changed: 50 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package targets
1+
package internal
22

33
import (
44
"fmt"
@@ -9,20 +9,32 @@ import (
99

1010
//There're two components here: poolHandle and the handle factory.
1111
//A pool is an asynchronous request/respone worker that runs on a single go-routine and keeps file handles for each file.
12-
//The number of file handles is constraint by the max number of handlers in cache (numberOfHandlersInCache) and the max number of handles per file (numOfHandlesPerFile).
12+
//The number of file handles is constraint by the max number of handlers in cache (maxFileHandlesInCache) and the max number of handles per file (numOfHandlesPerFile).
1313
//When the max number handles is reached file handles will be closed until space is available. The handle factory opens the file handles and initializes the
1414
//target file in case the folder structure and file need to be created. Since the factory tracks if a file has been initailized
1515
//, i.e. created or truncated at the begining of the transfer, only one instance of the factory is created.
1616

17-
const maxFileHandlesInCache int32 = 600
17+
const maxFileHandlesInCache int = 600
1818

19-
type fileHandlePool struct {
19+
//HandleMode TODO
20+
type HandleMode int
21+
22+
const (
23+
//Read read only file handles
24+
Read HandleMode = iota
25+
//Write write and append file handles
26+
Write
27+
)
28+
29+
//FileHandlePool TODO
30+
type FileHandlePool struct {
2031
maxCacheSize int
2132
maxHandlesPerFile int
2233
factory *handleFactory
2334
fileHandles map[string][]*os.File
2435
overwrite bool
2536
channels poolChannels
37+
mode HandleMode
2638
}
2739
type poolChannels struct {
2840
handleReq chan poolRequest
@@ -47,13 +59,15 @@ type poolResponse struct {
4759
err error
4860
}
4961

50-
func newfileHandlePool(maxCacheSize int, maxHandlesPerFile int, overwrite bool) *fileHandlePool {
51-
pool := fileHandlePool{
52-
maxCacheSize: maxCacheSize,
62+
//NewFileHandlePool TODO
63+
func NewFileHandlePool(maxHandlesPerFile int, mode HandleMode, overwrite bool) *FileHandlePool {
64+
pool := FileHandlePool{
65+
maxCacheSize: maxFileHandlesInCache,
5366
maxHandlesPerFile: maxHandlesPerFile,
54-
factory: newhandleFactory(overwrite),
67+
factory: newhandleFactory(mode, overwrite),
5568
fileHandles: make(map[string][]*os.File),
5669
overwrite: overwrite,
70+
mode: mode,
5771
channels: poolChannels{
5872
handleReq: make(chan poolRequest, 100),
5973
closeReq: make(chan poolCloseRequest, 100),
@@ -66,24 +80,28 @@ func newfileHandlePool(maxCacheSize int, maxHandlesPerFile int, overwrite bool)
6680
return &pool
6781
}
6882

69-
func (f *fileHandlePool) getHandle(path string) (*os.File, error) {
83+
//GetHandle TODO
84+
func (f *FileHandlePool) GetHandle(path string) (*os.File, error) {
7085
respChan := make(chan poolResponse, 1)
7186
req := poolRequest{path: path, response: respChan}
7287
f.channels.handleReq <- req
7388
resp := <-respChan
7489
return resp.handle, resp.err
7590
}
7691

77-
func (f *fileHandlePool) returnHandle(path string, handle *os.File) error {
92+
//ReturnHandle TODO
93+
func (f *FileHandlePool) ReturnHandle(path string, handle *os.File) error {
7894
select {
7995
case f.channels.returnReq <- poolReturnRequest{handle: handle, path: path}:
8096
default:
81-
//close the handle if channel is fool
97+
//close the handle if channel is full
8298
return handle.Close()
8399
}
84100
return nil
85101
}
86-
func (f *fileHandlePool) closeHandles(path string) error {
102+
103+
//CloseHandles TODO
104+
func (f *FileHandlePool) CloseHandles(path string) error {
87105
respChan := make(chan error, 1)
88106
req := poolCloseRequest{path: path, err: respChan}
89107
f.channels.closeReq <- req
@@ -93,12 +111,12 @@ func (f *fileHandlePool) closeHandles(path string) error {
93111

94112
const fileHandleCacheDebug = "BP_FHC_DBG"
95113

96-
func (f *fileHandlePool) startPool() {
97-
//start := time.Now()
98-
oc := 0
99-
cc := 0
100-
cm := 0
101-
ch := 0
114+
func (f *FileHandlePool) startPool() {
115+
116+
oc := 0 //num of open requests
117+
cc := 0 //num of close requests
118+
cm := 0 //num of cache misses
119+
ch := 0 //num of cache hits
102120
dbg := os.Getenv(fileHandleCacheDebug)
103121
go func() {
104122
for {
@@ -204,6 +222,7 @@ func (f *fileHandlePool) startPool() {
204222
type handleFactory struct {
205223
init map[string]bool
206224
factoryReq chan factoryRequest
225+
mode HandleMode
207226
}
208227

209228
type factoryRequest struct {
@@ -216,11 +235,12 @@ type factoryResponse struct {
216235
err error
217236
}
218237

219-
func newhandleFactory(overwrite bool) *handleFactory {
238+
func newhandleFactory(mode HandleMode, overwrite bool) *handleFactory {
220239
reqChan := make(chan factoryRequest, 100)
221240
fact := handleFactory{
222241
init: make(map[string]bool),
223242
factoryReq: reqChan,
243+
mode: mode,
224244
}
225245

226246
fact.startFactory(overwrite)
@@ -251,12 +271,17 @@ func (h *handleFactory) startFactory(overwrite bool) {
251271
_, exists := h.init[req.path]
252272
var fh *os.File
253273
var err error
254-
if !exists {
255-
//fmt.Printf("init->%v\n", req.path)
256-
fh, err = h.initFile(req.path, overwrite)
257-
} else {
258-
//fmt.Printf("open->%v\n", req.path)
259-
fh, err = os.OpenFile(req.path, os.O_WRONLY, os.ModeAppend)
274+
switch h.mode {
275+
case Read:
276+
fh, err = os.OpenFile(req.path, os.O_RDONLY, os.ModeAppend)
277+
case Write:
278+
if !exists {
279+
fh, err = h.initFile(req.path, overwrite)
280+
} else {
281+
fh, err = os.OpenFile(req.path, os.O_WRONLY, os.ModeAppend)
282+
}
283+
default:
284+
panic(fmt.Sprintf("Invalid handle mode:%v", h.mode))
260285
}
261286

262287
select {

pipeline/pipeline.go

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,7 @@ func createPartsInPartition(partitionSize int64, partitionOffSet int64, ordinalS
151151

152152
//ConstructPartsPartition creates a slice of PartsPartition with a len of numberOfPartitions.
153153
func ConstructPartsPartition(numberOfPartitions int, size int64, blockSize int64, sourceURI string, targetAlias string, bufferQ chan []byte) []PartsPartition {
154-
//bsib := uint64(blockSize)
155154
numOfBlocks := int((size + blockSize - 1) / blockSize)
156-
157155
Partitions := make([]PartsPartition, numberOfPartitions)
158156
//the size of the partition needs to be a multiple (blockSize * int) to make sure all but the last part/block
159157
//are the same size
@@ -165,17 +163,27 @@ func ConstructPartsPartition(numberOfPartitions int, size int64, blockSize int64
165163
var partOrdinal int
166164
for p := 0; p < numberOfPartitions; p++ {
167165
poffSet := int64(int64(p) * partitionSize)
166+
168167
if p == numberOfPartitions-1 {
169168
partitionSize = int64(bytesLeft)
170169
}
171-
partition := PartsPartition{TotalNumOfParts: int64(numOfBlocks), TotalSize: size, Offset: poffSet, PartitionSize: partitionSize}
172-
parts, partOrdinal, numOfPartsInPartition = createPartsInPartition(partitionSize, poffSet, partOrdinal, numOfBlocks, blockSize, sourceURI, targetAlias, bufferQ)
170+
171+
partition := PartsPartition{TotalNumOfParts: int64(numOfBlocks),
172+
TotalSize: size,
173+
Offset: poffSet,
174+
PartitionSize: partitionSize}
175+
176+
parts, partOrdinal, numOfPartsInPartition = createPartsInPartition(partitionSize,
177+
poffSet,
178+
partOrdinal,
179+
numOfBlocks,
180+
blockSize,
181+
sourceURI, targetAlias, bufferQ)
173182

174183
partition.Parts = parts
175184
partition.NumOfParts = numOfPartsInPartition
176185
Partitions[p] = partition
177-
178-
bytesLeft = bytesLeft - int64(partitionSize)
186+
bytesLeft = bytesLeft - int64(partitionSize)
179187
}
180188

181189
return Partitions
@@ -292,7 +300,7 @@ func (p *Part) MD5() string {
292300
}
293301

294302
//MD5Bytes TODO
295-
func(p *Part) MD5Bytes()[]byte{
303+
func (p *Part) MD5Bytes() []byte {
296304
p.MD5()
297305
return p.md5Value
298306
}

sources/multifile.go

Lines changed: 48 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
"io"
1313

14+
"github.com/Azure/blobporter/internal"
1415
"github.com/Azure/blobporter/pipeline"
1516
"github.com/Azure/blobporter/util"
1617
)
@@ -21,12 +22,13 @@ import (
2122

2223
// MultiFilePipeline Contructs blocks queue and implements data readers
2324
type MultiFilePipeline struct {
24-
FilesInfo map[string]FileInfo
25-
TotalNumberOfBlocks int
26-
TotalSize uint64
27-
BlockSize uint64
28-
NumOfPartitions int
25+
filesInfo map[string]FileInfo
26+
totalNumberOfBlocks int
27+
totalSize uint64
28+
blockSize uint64
29+
numOfPartitions int
2930
includeMD5 bool
31+
handlePool *internal.FileHandlePool
3032
}
3133

3234
//FileInfo Contains the metadata associated with a file to be transferred
@@ -100,6 +102,8 @@ func NewMultiFile(params *MultiFileParams) []pipeline.SourcePipeline {
100102
return pipelines
101103
}
102104

105+
const maxNumOfHandlesPerFile int = 4
106+
103107
func newMultiFilePipeline(files []string, targetAliases []string, blockSize uint64, numOfPartitions int, md5 bool, keepDirStructure bool) pipeline.SourcePipeline {
104108
totalNumberOfBlocks := 0
105109
var totalSize uint64
@@ -142,19 +146,22 @@ func newMultiFilePipeline(files []string, targetAliases []string, blockSize uint
142146
fileInfos[files[f]] = fileInfo
143147
}
144148

145-
return &MultiFilePipeline{FilesInfo: fileInfos,
146-
TotalNumberOfBlocks: totalNumberOfBlocks,
147-
BlockSize: blockSize,
148-
TotalSize: totalSize,
149-
NumOfPartitions: numOfPartitions,
150-
includeMD5: md5}
149+
handlePool := internal.NewFileHandlePool(maxNumOfHandlesPerFile, internal.Read, false)
150+
151+
return &MultiFilePipeline{filesInfo: fileInfos,
152+
totalNumberOfBlocks: totalNumberOfBlocks,
153+
blockSize: blockSize,
154+
totalSize: totalSize,
155+
numOfPartitions: numOfPartitions,
156+
includeMD5: md5,
157+
handlePool: handlePool,
158+
}
151159
}
152160

153161
//ExecuteReader implements ExecuteReader from the pipeline.SourcePipeline Interface.
154162
//For each file the reader will maintain a open handle from which data will be read.
155163
// This implementation uses partitions (group of parts that can be read sequentially).
156164
func (f *MultiFilePipeline) ExecuteReader(partitionsQ chan pipeline.PartsPartition, partsQ chan pipeline.Part, readPartsQ chan pipeline.Part, id int, wg *sync.WaitGroup) {
157-
fileHandles := make(map[string]*os.File, len(f.FilesInfo))
158165
var err error
159166
var partition pipeline.PartsPartition
160167

@@ -167,25 +174,30 @@ func (f *MultiFilePipeline) ExecuteReader(partitionsQ chan pipeline.PartsPartiti
167174
partition, ok = <-partitionsQ
168175

169176
if !ok {
170-
for _, fh := range fileHandles {
171-
fh.Close()
177+
for _, finfo := range f.filesInfo {
178+
err = f.handlePool.CloseHandles(finfo.SourceURI)
179+
if err != nil {
180+
log.Fatal(fmt.Errorf("error closing handle for file:%v. Error:%v", finfo.SourceURI, err))
181+
}
172182
}
173183
return // no more blocks of file data to be read
174184
}
175185

186+
//check if the partition is empty, as this may happen with small files
187+
if len(partition.Parts) == 0 {
188+
continue
189+
}
190+
176191
var part pipeline.Part
177192
for pip := 0; pip < len(partition.Parts); pip++ {
178193
part = partition.Parts[pip]
179194

180-
fileURI = f.FilesInfo[part.SourceURI].SourceURI
181-
fileHandle = fileHandles[fileURI]
195+
fileURI = f.filesInfo[part.SourceURI].SourceURI
182196

183197
if fileHandle == nil {
184-
if fileHandle, err = os.Open(fileURI); err != nil {
185-
fmt.Printf("Error while opening the file %v \n", err)
186-
log.Fatal(err)
198+
if fileHandle, err = f.handlePool.GetHandle(fileURI); err != nil {
199+
log.Fatal(fmt.Errorf(" error while opening the file.\nError:%v ", err))
187200
}
188-
fileHandles[fileURI] = fileHandle
189201
}
190202

191203
if pip == 0 {
@@ -195,8 +207,7 @@ func (f *MultiFilePipeline) ExecuteReader(partitionsQ chan pipeline.PartsPartiti
195207
part.GetBuffer()
196208

197209
if _, err = fileHandle.Read(part.Data); err != nil && err != io.EOF {
198-
fmt.Printf("Error while reading the file %v \n", err)
199-
log.Fatal(err)
210+
log.Fatal(fmt.Errorf(" error while reading the file.\nError:%v ", err))
200211
}
201212

202213
util.PrintfIfDebug("ExecuteReader -> blockid:%v toread:%v name:%v read:%v ", part.BlockID, part.BytesToRead, part.TargetAlias, bytesRead)
@@ -207,16 +218,23 @@ func (f *MultiFilePipeline) ExecuteReader(partitionsQ chan pipeline.PartsPartiti
207218

208219
readPartsQ <- part
209220
}
221+
222+
//return handle
223+
if err = f.handlePool.ReturnHandle(fileURI, fileHandle); err != nil {
224+
log.Fatal(fmt.Errorf(" error returning the handle to the pool.\nPath: %v error:%v ", fileURI, err))
225+
}
226+
227+
fileHandle = nil
210228
}
211229
}
212230

213231
//GetSourcesInfo implements GetSourcesInfo from the pipeline.SourcePipeline Interface.
214232
//Returns an an array of SourceInfo with the name, alias and size of the files to be transferred.
215233
func (f *MultiFilePipeline) GetSourcesInfo() []pipeline.SourceInfo {
216234

217-
sources := make([]pipeline.SourceInfo, len(f.FilesInfo))
235+
sources := make([]pipeline.SourceInfo, len(f.filesInfo))
218236
var i = 0
219-
for _, file := range f.FilesInfo {
237+
for _, file := range f.filesInfo {
220238
sources[i] = pipeline.SourceInfo{SourceName: file.SourceURI, TargetAlias: file.TargetAlias, Size: uint64((*file.FileStats).Size())}
221239
i++
222240
}
@@ -255,18 +273,18 @@ func createPartsFromSource(size uint64, sourceNumOfBlocks int, blockSize uint64,
255273
// this implementation uses partitions to group parts into a set that can be read sequentially.
256274
// This is to avoid Window's memory pressure when calling SetFilePointer numerous times on the same handle
257275
func (f *MultiFilePipeline) ConstructBlockInfoQueue(blockSize uint64) (partitionsQ chan pipeline.PartsPartition, partsQ chan pipeline.Part, numOfBlocks int, size uint64) {
258-
numOfBlocks = f.TotalNumberOfBlocks
259-
size = f.TotalSize
260-
allPartitions := make([][]pipeline.PartsPartition, len(f.FilesInfo))
276+
numOfBlocks = f.totalNumberOfBlocks
277+
size = f.totalSize
278+
allPartitions := make([][]pipeline.PartsPartition, len(f.filesInfo))
261279
//size of the queue is equal to the number of partitions times the number of files to transfer.
262280
//a lower value will block as this method is called before readers start
263-
partitionsQ = make(chan pipeline.PartsPartition, f.NumOfPartitions*len(f.FilesInfo))
281+
partitionsQ = make(chan pipeline.PartsPartition, f.numOfPartitions*len(f.filesInfo))
264282
partsQ = nil
265283
bufferQ := pipeline.NewBytesBufferChan(uint64(blockSize))
266284
pindex := 0
267285
maxpartitionNumber := 0
268-
for _, source := range f.FilesInfo {
269-
partitions := pipeline.ConstructPartsPartition(f.NumOfPartitions, (*source.FileStats).Size(), int64(blockSize), source.SourceURI, source.TargetAlias, bufferQ)
286+
for _, source := range f.filesInfo {
287+
partitions := pipeline.ConstructPartsPartition(f.numOfPartitions, (*source.FileStats).Size(), int64(blockSize), source.SourceURI, source.TargetAlias, bufferQ)
270288
allPartitions[pindex] = partitions
271289
if len(partitions) > maxpartitionNumber {
272290
maxpartitionNumber = len(partitions)

0 commit comments

Comments
 (0)