Skip to content

Commit 140d14d

Browse files
iamrz1malept
andauthored
fix: avoid race condition across multiple running processes when deleting stale cache (#423)
Co-authored-by: Mark Lee <mark.lee@outreach.io>
1 parent 40a6de3 commit 140d14d

File tree

3 files changed

+152
-31
lines changed

3 files changed

+152
-31
lines changed

internal/modules/module.go

Lines changed: 84 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"github.com/go-git/go-git/v5/plumbing"
2727
githttp "github.com/go-git/go-git/v5/plumbing/transport/http"
2828
"github.com/go-git/go-git/v5/storage/memory"
29+
"github.com/gofrs/flock"
2930
"github.com/pkg/errors"
3031
"github.com/sirupsen/logrus"
3132
"gopkg.in/yaml.v3"
@@ -136,6 +137,16 @@ func (m *Module) RegisterExtensions(ctx context.Context, log logrus.FieldLogger,
136137
// Manifest downloads the module if not already downloaded and returns a parsed
137138
// configuration.TemplateRepositoryManifest of this module.
138139
func (m *Module) Manifest(ctx context.Context) (configuration.TemplateRepositoryManifest, error) {
140+
lockDir := FSLockDir(m.PathSlug())
141+
lock, err := exclusiveLockDirectory(lockDir)
142+
if err != nil {
143+
return configuration.TemplateRepositoryManifest{},
144+
errors.Wrapf(err, "failed to lock module cache directory %q", lockDir)
145+
}
146+
147+
//nolint:errcheck // Why: Unlock error can be safely ignored here
148+
defer lock.Unlock()
149+
139150
fs, err := m.GetFS(ctx)
140151
if err != nil {
141152
return configuration.TemplateRepositoryManifest{}, errors.Wrap(err, "failed to download fs")
@@ -182,21 +193,20 @@ func (m *Module) GetFS(ctx context.Context) (billy.Filesystem, error) {
182193
return m.fs, nil
183194
}
184195

185-
cacheDir := filepath.Join(StencilCacheDir(), "module_fs", ModuleCacheDirectory(m.URI, m.Version))
186-
logrus.Debug("cacheDir", cacheDir)
196+
cacheDir := m.FSCacheDir()
187197

188-
if useModuleCache(cacheDir) {
198+
if useCache(cacheDir) {
189199
m.fs = osfs.New(cacheDir)
190200
return m.fs, nil
191201
}
192202

193203
err = os.RemoveAll(cacheDir)
194204
if err != nil {
195-
return nil, errors.Wrapf(err, "failed to remove stale cache %q", cacheDir)
205+
return nil, errors.Wrapf(err, "failed to remove stale module cache directory %q", cacheDir)
196206
}
197207

198208
if err = os.MkdirAll(cacheDir, 0o755); err != nil {
199-
return nil, errors.Wrap(err, "failed to create cache directory")
209+
return nil, errors.Wrapf(err, "failed to create module cache directory %q", cacheDir)
200210
}
201211

202212
m.fs = osfs.New(cacheDir)
@@ -237,26 +247,88 @@ func (m *Module) GetFS(ctx context.Context) (billy.Filesystem, error) {
237247
return m.fs, nil
238248
}
239249

240-
// useModuleCache determines if the specified path should be used as a module cache.
241-
func useModuleCache(path string) bool {
250+
func (m *Module) PathSlug() string {
251+
return PathSlug(m.URI, m.Version)
252+
}
253+
254+
func (m *Module) FSCacheDir() string {
255+
return FSCacheDir(m.PathSlug())
256+
}
257+
258+
// exclusiveLockDirectory creates a new flock lock for the specified directory.
259+
func exclusiveLockDirectory(dir string) (*flock.Flock, error) {
260+
lock := flock.New(filepath.Join(dir, "ex_dir.lock"))
261+
for {
262+
locked, err := lock.TryLock()
263+
if err != nil {
264+
if !errors.Is(err, os.ErrNotExist) {
265+
return nil, err
266+
}
267+
268+
if err = os.MkdirAll(dir, 0o755); err != nil {
269+
return nil, errors.Wrapf(err, "failed to create directory %q", dir)
270+
}
271+
continue
272+
}
273+
274+
if locked {
275+
break
276+
}
277+
}
278+
279+
return lock, nil
280+
}
281+
282+
// useCache determines if the specified path should be used as a module cache.
283+
func useCache(path string) bool {
242284
info, err := os.Stat(path)
243285
if err != nil || time.Since(info.ModTime()) > ModuleCacheTTL {
244286
return false
245287
}
246288

289+
if files, err := os.ReadDir(path); err != nil || len(files) == 0 {
290+
return false
291+
}
292+
247293
return true
248294
}
249295

250-
// ModuleCacheDirectory generates a directory name for the module from the given URI and optional branch.
251-
func ModuleCacheDirectory(uri, branch string) string {
252-
if branch == "" {
253-
branch = "v0.0.0"
296+
// PathSlug returns a unique identifier for a module
297+
// using the given URI and versioning constraints.
298+
func PathSlug(uri, version string) string {
299+
if version == "" {
300+
version = "v0.0.0"
254301
}
255302

256-
return regexp.MustCompile(`[^a-zA-Z0-9@]+`).ReplaceAllString(uri+"@"+branch, "_")
303+
return regexp.MustCompile(`[^a-zA-Z0-9<>=.\[\]\-@]+`).ReplaceAllString(uri+"@"+version, "_")
257304
}
258305

259306
// StencilCacheDir returns the directory where stencil caches its data.
260307
func StencilCacheDir() string {
261308
return filepath.Join(os.TempDir(), "stencil_cache")
262309
}
310+
311+
// CacheDir returns the cache directory for a module based on its type and ID.
312+
func CacheDir(cacheType, moduleID string) string {
313+
return filepath.Join(StencilCacheDir(), cacheType, moduleID)
314+
}
315+
316+
// FSCacheDir returns the cache directory for a module based on its ID.
317+
func FSCacheDir(moduleID string) string {
318+
return CacheDir("module_fs", moduleID)
319+
}
320+
321+
// VersionCacheDir returns the version cache directory for a module based on its ID.
322+
func VersionCacheDir(moduleID string) string {
323+
return CacheDir("module_version", moduleID)
324+
}
325+
326+
// VersionLockDir returns the lock directory for a module version based on its ID.
327+
func VersionLockDir(moduleID string) string {
328+
return CacheDir("module_version_lock", moduleID)
329+
}
330+
331+
// FSLockDir returns the lock directory for a module filesystem based on its ID.
332+
func FSLockDir(moduleID string) string {
333+
return CacheDir("module_fs_lock", moduleID)
334+
}

internal/modules/module_test.go

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ package modules_test
77
import (
88
"context"
99
"os"
10-
"path/filepath"
1110
"strings"
1211
"testing"
1312
"time"
@@ -388,7 +387,7 @@ func TestGetFS_CacheUsage(t *testing.T) {
388387
Version: version,
389388
}
390389

391-
cacheDir := filepath.Join(modules.StencilCacheDir(), "module_fs", modules.ModuleCacheDirectory(repoURL, version))
390+
cacheDir := modules.FSCacheDir(modules.PathSlug(repoURL, version))
392391
err := os.RemoveAll(cacheDir)
393392
assert.NilError(t, err, "failed to remove cache directory")
394393

@@ -412,6 +411,46 @@ func TestGetFS_CacheUsage(t *testing.T) {
412411
)
413412
}
414413

414+
func TestCanRecreateCacheAfterTimeout(t *testing.T) {
415+
ctx := context.Background()
416+
opts := &modules.ModuleResolveOptions{
417+
ConcurrentResolvers: 5,
418+
ServiceManifest: &configuration.ServiceManifest{
419+
Name: "test-cache-timeout",
420+
Modules: []*configuration.TemplateRepository{
421+
{
422+
Name: "github.com/getoutreach/stencil-base",
423+
Channel: "stable",
424+
},
425+
{
426+
Name: "github.com/getoutreach/stencil-base",
427+
Version: ">=0.14.0",
428+
},
429+
},
430+
},
431+
Log: newLogger(),
432+
}
433+
434+
mods, err := modules.GetModulesForService(ctx, opts)
435+
assert.NilError(t, err, "failed to call GetModulesForService()")
436+
437+
cacheExpireDuration := modules.ModuleCacheTTL + time.Minute
438+
for _, m := range mods {
439+
err = os.Chtimes(m.FSCacheDir(), time.Now().Add(-cacheExpireDuration), time.Now().Add(-cacheExpireDuration))
440+
assert.NilError(t, err, "failed to change cache directory times")
441+
}
442+
443+
mods, err = modules.GetModulesForService(ctx, opts)
444+
assert.NilError(t, err, "failed to call GetModulesForService()")
445+
446+
for _, m := range mods {
447+
info, err := os.Stat(m.FSCacheDir())
448+
assert.NilError(t, err, "failed to stat cache directory")
449+
cachedTime := time.Since(info.ModTime())
450+
assert.Assert(t, cachedTime < time.Minute, "expected new cache: cached %v ago", cachedTime)
451+
}
452+
}
453+
415454
func assertFSExists(t *testing.T, fs billy.Filesystem) {
416455
t.Helper()
417456

internal/modules/worklist.go

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818
"github.com/getoutreach/gobox/pkg/cfg"
1919
"github.com/getoutreach/gobox/pkg/cli/updater/resolver"
2020
"github.com/getoutreach/stencil/pkg/configuration"
21-
"github.com/gofrs/flock"
2221
"github.com/pkg/errors"
2322
"github.com/sirupsen/logrus"
2423
)
@@ -212,11 +211,21 @@ func (list *workList) getLatestModuleForConstraints(ctx context.Context, item *w
212211
return module.version, nil
213212
}
214213

215-
cacheFile := filepath.Join(StencilCacheDir(), "module_version",
216-
ModuleCacheDirectory(item.uri, item.spec.conf.Channel), "version.json")
214+
versionID := fmt.Sprintf("ch_%s_cons_%v", channel, constraints)
215+
moduleID := PathSlug(item.uri, versionID)
216+
lockDir := VersionLockDir(moduleID)
217+
lock, err := exclusiveLockDirectory(lockDir)
218+
if err != nil {
219+
return nil, errors.Wrapf(err, "failed to lock module version cache dir %q", lockDir)
220+
}
217221

218-
if useModuleCache(cacheFile) {
219-
return getCachedModuleVersion(cacheFile)
222+
//nolint:errcheck // Why: Unlock error can be safely ignored here
223+
defer lock.Unlock()
224+
225+
cacheDir := VersionCacheDir(moduleID)
226+
cacheFile := filepath.Join(cacheDir, "version.json")
227+
if useCache(cacheDir) {
228+
return getCachedVersion(cacheFile)
220229
}
221230

222231
v, err := resolver.Resolve(ctx, token, &resolver.Criteria{
@@ -244,16 +253,16 @@ func (list *workList) getLatestModuleForConstraints(ctx context.Context, item *w
244253
return nil, errors.Wrapf(err, "failed to resolve module '%s' with constraints\n%s", m.conf.Name, errorString)
245254
}
246255

247-
err = setModuleVersionCache(cacheFile, v)
256+
err = setCachedVersion(cacheFile, v)
248257
if err != nil {
249258
return nil, err
250259
}
251260

252261
return v, nil
253262
}
254263

255-
// getCachedModuleVersion returns the module version from the cache file.
256-
func getCachedModuleVersion(cacheFile string) (*resolver.Version, error) {
264+
// getCachedVersion returns the module version from the cache file.
265+
func getCachedVersion(cacheFile string) (*resolver.Version, error) {
257266
data, err := os.ReadFile(cacheFile)
258267
if err != nil {
259268
return nil, errors.Wrapf(err, "failed to read cached version from file %s", cacheFile)
@@ -268,22 +277,23 @@ func getCachedModuleVersion(cacheFile string) (*resolver.Version, error) {
268277
return &cached, nil
269278
}
270279

271-
// setModuleVersionCache writes the version for a module to a local cache file.
272-
func setModuleVersionCache(cacheFile string, v *resolver.Version) error {
280+
// setCachedVersion writes the version for a module to a local cache file.
281+
func setCachedVersion(cacheFile string, v *resolver.Version) error {
282+
cacheDir := filepath.Dir(cacheFile)
273283
data, err := json.Marshal(v)
274284
if err != nil {
275285
return errors.Wrapf(err, "failed to serialize module version to cache file %s", cacheFile)
276286
}
277287

278-
lockFile := cacheFile + ".lock"
279-
fl := flock.New(lockFile)
280-
locked, err := fl.TryLock()
281-
if err != nil || !locked {
282-
return nil
288+
err = os.RemoveAll(cacheDir)
289+
if err != nil {
290+
return errors.Wrapf(err, "failed to remove module version cache %s", cacheDir)
283291
}
284292

285-
//nolint:errcheck // Why: Unlock error can be safely ignored here
286-
defer fl.Unlock()
293+
err = os.MkdirAll(cacheDir, 0o755)
294+
if err != nil {
295+
return errors.Wrapf(err, "failed to create directory for module version cache %s", cacheDir)
296+
}
287297

288298
err = os.WriteFile(cacheFile, data, 0o600)
289299
if err != nil {

0 commit comments

Comments
 (0)