Skip to content

Commit e406eb1

Browse files
committed
Support for files over 2GB on Windows
## Problem File::Stat fails with RuntimeError when handling files >= 2GB on Windows. The stat operation crashes when attempting to get file statistics for large files, making it impossible to work with files over 2GB. ## Cause Windows `stat()` function uses 32-bit integers for file sizes by default, causing truncation for files >= 2GB (2^31 bytes). The regular `stat()` function and struct stat are limited to 32-bit file sizes on Windows, while Unix systems typically use 64-bit by default. ref: https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/stat-functions?view=msvc-170#time-type-and-file-length-type-variations-of-_stat ## Solution Use Windows 64-bit stat functions and structures: - Replace `stat()` with `_stat64()` for file statistics - Replace `fstat()` with _fstat64() for file descriptor statistics - Replace struct stat with struct `_stat64` for data structures - Define macros (STAT, LSTAT, FSTAT, STAT_STRUCT) to use 64-bit variants on Windows while keeping original functions on Unix This enables proper handling of files >= 2GB on Windows platforms while maintaining compatibility with other operating systems.
1 parent bb773da commit e406eb1

File tree

3 files changed

+53
-29
lines changed

3 files changed

+53
-29
lines changed

mrbgem.rake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ MRuby::Gem::Specification.new('mruby-file-stat') do |spec|
22
spec.license = 'MIT'
33
spec.author = 'ksss <co000ri@gmail.com>'
44
spec.add_dependency('mruby-time')
5+
spec.add_test_dependency('mruby-io')
56

67
env = {
78
'CC' => "#{build.cc.command} #{build.cc.flags.join(' ')}",

src/file-stat.c

Lines changed: 38 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,20 @@
101101
# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
102102
#endif
103103

104-
#define STAT(p,s) stat(p,s)
105-
#ifdef HAVE_LSTAT
106-
# define LSTAT(p,s) lstat(p,s)
104+
#if defined(_WIN32) || defined(_WIN64)
105+
# define STAT(p,s) _stat64(p,s)
106+
# define FSTAT(fd,s) _fstat64(fd,s)
107+
# define LSTAT(p,s) _stat64(p,s)
108+
# define STAT_STRUCT struct _stat64
107109
#else
108-
# define LSTAT(p,s) stat(p,s)
110+
# define STAT(p,s) stat(p,s)
111+
# define FSTAT(fd,s) fstat(fd,s)
112+
# ifdef HAVE_LSTAT
113+
# define LSTAT(p,s) lstat(p,s)
114+
# else
115+
# define LSTAT(p,s) stat(p,s)
116+
# endif
117+
# define STAT_STRUCT struct stat
109118
#endif
110119
#define MRB_MAX_GROUPS (65536)
111120

@@ -145,18 +154,18 @@ getegid(void)
145154

146155
struct mrb_data_type mrb_stat_type = { "File::Stat", mrb_free };
147156

148-
static struct stat *
157+
static STAT_STRUCT *
149158
mrb_stat_alloc(mrb_state *mrb)
150159
{
151-
return (struct stat *)mrb_malloc(mrb, sizeof(struct stat));
160+
return (STAT_STRUCT *)mrb_malloc(mrb, sizeof(STAT_STRUCT));
152161
}
153162

154163
static mrb_value
155164
file_s_lstat(mrb_state *mrb, mrb_value klass)
156165
{
157166
struct RClass *file_class;
158167
struct RClass *stat_class;
159-
struct stat st, *ptr;
168+
STAT_STRUCT st, *ptr;
160169
mrb_value fname, tmp;
161170
char *path;
162171

@@ -190,7 +199,7 @@ file_s_lstat(mrb_state *mrb, mrb_value klass)
190199
static mrb_value
191200
stat_initialize(mrb_state *mrb, mrb_value self)
192201
{
193-
struct stat st, *ptr;
202+
STAT_STRUCT st, *ptr;
194203
mrb_value fname, tmp;
195204
char *path;
196205

@@ -213,7 +222,7 @@ stat_initialize(mrb_state *mrb, mrb_value self)
213222
}
214223
}
215224

216-
ptr = (struct stat *)DATA_PTR(self);
225+
ptr = (STAT_STRUCT *)DATA_PTR(self);
217226
if (ptr) {
218227
mrb_free(mrb, ptr);
219228
}
@@ -246,19 +255,19 @@ stat_initialize_copy(mrb_state *mrb, mrb_value copy)
246255
}
247256

248257
if (DATA_PTR(orig)) {
249-
DATA_PTR(copy) = mrb_malloc(mrb, sizeof(struct stat));
258+
DATA_PTR(copy) = mrb_malloc(mrb, sizeof(STAT_STRUCT));
250259
DATA_TYPE(copy) = &mrb_stat_type;
251-
*(struct stat *)DATA_PTR(copy) = *(struct stat *)DATA_PTR(orig);
260+
*(STAT_STRUCT *)DATA_PTR(copy) = *(STAT_STRUCT *)DATA_PTR(orig);
252261
}
253262
return copy;
254263
}
255264

256-
static struct stat *
265+
static STAT_STRUCT *
257266
get_stat(mrb_state *mrb, mrb_value self)
258267
{
259-
struct stat *st;
268+
STAT_STRUCT *st;
260269

261-
st = (struct stat *)mrb_data_get_ptr(mrb, self, &mrb_stat_type);
270+
st = (STAT_STRUCT *)mrb_data_get_ptr(mrb, self, &mrb_stat_type);
262271
if (!st) mrb_raise(mrb, E_TYPE_ERROR, "uninitialized File::Stat");
263272
return st;
264273
}
@@ -283,7 +292,7 @@ io_stat(mrb_state *mrb, mrb_value self)
283292
{
284293
struct RClass *file_class;
285294
struct RClass *stat_class;
286-
struct stat st, *ptr;
295+
STAT_STRUCT st, *ptr;
287296
mrb_value fileno;
288297

289298
if (mrb_respond_to(mrb, self, mrb_intern_lit(mrb, "fileno"))) {
@@ -293,7 +302,7 @@ io_stat(mrb_state *mrb, mrb_value self)
293302
mrb_raise(mrb, E_NOTIMP_ERROR, "`fileno' is not implemented");
294303
}
295304

296-
if (fstat(mrb_fixnum(fileno), &st) == -1) {
305+
if (FSTAT(mrb_fixnum(fileno), &st) == -1) {
297306
mrb_sys_fail(mrb, "fstat");
298307
}
299308

@@ -394,7 +403,7 @@ time_at_with_sec_nsec(mrb_state *mrb, time_t sec, long nsec)
394403
}
395404

396405
static struct timespec
397-
stat_atimespec(const struct stat *st)
406+
stat_atimespec(const STAT_STRUCT *st)
398407
{
399408
struct timespec ts;
400409
ts.tv_sec = st->st_atime;
@@ -418,7 +427,7 @@ stat_atime(mrb_state *mrb, mrb_value self)
418427
}
419428

420429
static struct timespec
421-
stat_mtimespec(const struct stat *st)
430+
stat_mtimespec(const STAT_STRUCT *st)
422431
{
423432
struct timespec ts;
424433
ts.tv_sec = st->st_mtime;
@@ -442,7 +451,7 @@ stat_mtime(mrb_state *mrb, mrb_value self)
442451
}
443452

444453
static struct timespec
445-
stat_ctimespec(const struct stat *st)
454+
stat_ctimespec(const STAT_STRUCT *st)
446455
{
447456
struct timespec ts;
448457
ts.tv_sec = st->st_ctime;
@@ -469,7 +478,7 @@ stat_ctime(mrb_state *mrb, mrb_value self)
469478
static mrb_value
470479
stat_birthtime(mrb_state *mrb, mrb_value self)
471480
{
472-
struct stat *st = get_stat(mrb, self);
481+
STAT_STRUCT *st = get_stat(mrb, self);
473482
const struct timespec *ts = &st->st_birthtimespec;
474483
return time_at_with_sec_nsec(mrb, ts->tv_sec, ts->tv_nsec);
475484
}
@@ -564,7 +573,7 @@ stat_grpowned_p(mrb_state *mrb, mrb_value self)
564573
static mrb_value
565574
stat_readable_p(mrb_state *mrb, mrb_value self)
566575
{
567-
struct stat *st;
576+
STAT_STRUCT *st;
568577
#ifdef USE_GETEUID
569578
if (geteuid() == 0)
570579
return mrb_true_value();
@@ -588,7 +597,7 @@ stat_readable_p(mrb_state *mrb, mrb_value self)
588597
static mrb_value
589598
stat_readable_real_p(mrb_state *mrb, mrb_value self)
590599
{
591-
struct stat *st;
600+
STAT_STRUCT *st;
592601

593602
#ifdef USE_GETEUID
594603
if (getuid() == 0)
@@ -613,7 +622,7 @@ static mrb_value
613622
stat_world_readable_p(mrb_state *mrb, mrb_value self)
614623
{
615624
#ifdef S_IROTH
616-
struct stat *st = get_stat(mrb, self);
625+
STAT_STRUCT *st = get_stat(mrb, self);
617626
if ((st->st_mode & (S_IROTH)) == S_IROTH) {
618627
return mrb_fixnum_value(st->st_mode & (S_IRUGO|S_IWUGO|S_IXUGO));
619628
}
@@ -629,7 +638,7 @@ stat_world_readable_p(mrb_state *mrb, mrb_value self)
629638
static mrb_value
630639
stat_writable_p(mrb_state *mrb, mrb_value self)
631640
{
632-
struct stat *st;
641+
STAT_STRUCT *st;
633642

634643
#ifdef USE_GETEUID
635644
if (geteuid() == 0)
@@ -654,7 +663,7 @@ stat_writable_p(mrb_state *mrb, mrb_value self)
654663
static mrb_value
655664
stat_writable_real_p(mrb_state *mrb, mrb_value self)
656665
{
657-
struct stat *st;
666+
STAT_STRUCT *st;
658667

659668
#ifdef USE_GETEUID
660669
if (getuid() == 0)
@@ -679,7 +688,7 @@ static mrb_value
679688
stat_world_writable_p(mrb_state *mrb, mrb_value self)
680689
{
681690
#ifdef S_IWOTH
682-
struct stat *st = get_stat(mrb, self);
691+
STAT_STRUCT *st = get_stat(mrb, self);
683692
if ((st->st_mode & (S_IWOTH)) == S_IWOTH) {
684693
return mrb_fixnum_value(st->st_mode & (S_IRUGO|S_IWUGO|S_IXUGO));
685694
}
@@ -694,7 +703,7 @@ stat_world_writable_p(mrb_state *mrb, mrb_value self)
694703
static mrb_value
695704
stat_executable_p(mrb_state *mrb, mrb_value self)
696705
{
697-
struct stat *st = get_stat(mrb, self);
706+
STAT_STRUCT *st = get_stat(mrb, self);
698707

699708
#ifdef USE_GETEUID
700709
if (geteuid() == 0) {
@@ -719,7 +728,7 @@ stat_executable_p(mrb_state *mrb, mrb_value self)
719728
static mrb_value
720729
stat_executable_real_p(mrb_state *mrb, mrb_value self)
721730
{
722-
struct stat *st = get_stat(mrb, self);
731+
STAT_STRUCT *st = get_stat(mrb, self);
723732

724733
#ifdef USE_GETEUID
725734
if (getuid() == 0)
@@ -836,7 +845,7 @@ stat_sticky_p(mrb_state *mrb, mrb_value self)
836845
static mrb_value
837846
stat_ftype(mrb_state *mrb, mrb_value self)
838847
{
839-
struct stat *st = get_stat(mrb, self);
848+
STAT_STRUCT *st = get_stat(mrb, self);
840849
const char *t;
841850

842851
if (S_ISREG(st->st_mode)) {

test/file-stat.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,20 @@ def chmod(mode, path)
301301
assert_true 0 < stat.size?
302302
end
303303

304+
assert 'File::Stat#size with large file (2GB)' do
305+
large_file = "test_large_file.tmp"
306+
target_size = 2**31 # 2GB
307+
begin
308+
File.open(large_file, 'wb') do |f|
309+
(2**19).times { f << "\0" * 4096 }
310+
end
311+
stat = File::Stat.new(large_file)
312+
assert_equal target_size, stat.size
313+
ensure
314+
File.delete(large_file) if File.exist?(large_file)
315+
end
316+
end
317+
304318
assert 'File::Stat#owned?' do
305319
stat = File::Stat.new('README.md')
306320
assert_true stat.owned?

0 commit comments

Comments
 (0)