11
11
12
12
namespace ssimu2 {
13
13
14
- double ssimu2process (const uint8_t *srcp1[3 ], const uint8_t *srcp2[3 ], float3* mem_d, float3* pinned, int stride, int width, int height, float * gaussiankernel, int maxshared, hipStream_t stream){
14
+ double ssimu2process (const uint8_t *srcp1[3 ], const uint8_t *srcp2[3 ], float3* pinned, int stride, int width, int height, float * gaussiankernel, int maxshared, hipStream_t stream){
15
15
16
16
int wh = width*height;
17
17
int whs[6 ] = {wh, ((height-1 )/2 + 1 )*((width-1 )/2 + 1 ), ((height-1 )/4 + 1 )*((width-1 )/4 + 1 ), ((height-1 )/8 + 1 )*((width-1 )/8 + 1 ), ((height-1 )/16 + 1 )*((width-1 )/16 + 1 ), ((height-1 )/32 + 1 )*((width-1 )/32 + 1 )};
@@ -22,6 +22,15 @@ double ssimu2process(const uint8_t *srcp1[3], const uint8_t *srcp2[3], float3* m
22
22
}
23
23
int totalscalesize = whs_integral[6 ];
24
24
25
+ // big memory allocation, we will try it multiple time if failed to save when too much threads are used
26
+ hipError_t erralloc;
27
+
28
+ float3* mem_d;
29
+ erralloc = hipMallocAsync (&mem_d, sizeof (float3)*totalscalesize*(2 + 6 ), stream); // 2 base image and 6 working buffers
30
+ if (erralloc != hipSuccess){
31
+ throw VshipError (OutOfVRAM, __FILE__, __LINE__);
32
+ }
33
+
25
34
float3* src1_d = mem_d; // length totalscalesize
26
35
float3* src2_d = mem_d + totalscalesize;
27
36
@@ -86,9 +95,13 @@ double ssimu2process(const uint8_t *srcp1[3], const uint8_t *srcp2[3], float3* m
86
95
try {
87
96
allscore_res = allscore_map (src1_d, src2_d, tempb1_d, tempb2_d, temps11_d, temps22_d, temps12_d, temp_d, pinned, width, height, maxshared, stream);
88
97
} catch (const VshipError& e){
98
+ hipFree (mem_d);
89
99
throw e;
90
100
}
91
101
102
+ // we are done with the gpu at that point and the synchronization has already been done in allscore_map
103
+ hipFreeAsync (mem_d, stream);
104
+
92
105
// step 6 : format the vector
93
106
std::vector<float > measure_vec (108 );
94
107
@@ -117,7 +130,6 @@ typedef struct Ssimulacra2Data{
117
130
VSNode *reference;
118
131
VSNode *distorted;
119
132
float3** PinnedMemPool;
120
- float3** VRAMMemPool;
121
133
int maxshared;
122
134
float * gaussiankernel_d;
123
135
hipStream_t* streams;
@@ -156,7 +168,7 @@ static const VSFrame *VS_CC ssimulacra2GetFrame(int n, int activationReason, voi
156
168
double val;
157
169
const int stream = d->streamSet ->pop ();
158
170
try {
159
- val = ssimu2process (srcp1, srcp2, d->VRAMMemPool [stream], d-> PinnedMemPool [stream], stride, width, height, d->gaussiankernel_d , d->maxshared , d->streams [stream]);
171
+ val = ssimu2process (srcp1, srcp2, d->PinnedMemPool [stream], stride, width, height, d->gaussiankernel_d , d->maxshared , d->streams [stream]);
160
172
} catch (const VshipError& e){
161
173
vsapi->setFilterError (e.getErrorMessage ().c_str (), frameCtx);
162
174
d->streamSet ->insert (stream);
@@ -187,11 +199,9 @@ static void VS_CC ssimulacra2Free(void *instanceData, VSCore *core, const VSAPI
187
199
vsapi->freeNode (d->distorted );
188
200
189
201
for (int i = 0 ; i < d->streamnum ; i++){
190
- hipFree (d->VRAMMemPool [i]);
191
202
hipHostFree (d->PinnedMemPool [i]);
192
203
hipStreamDestroy (d->streams [i]);
193
204
}
194
- free (d->VRAMMemPool );
195
205
free (d->PinnedMemPool );
196
206
hipFree (d->gaussiankernel_d );
197
207
free (d->streams );
@@ -279,15 +289,7 @@ static void VS_CC ssimulacra2Create(const VSMap *in, VSMap *out, void *userData,
279
289
d.streamSet = new threadSet (newstreamset);
280
290
281
291
const int pinnedsize = allocsizeScore (viref->width , viref->height , d.maxshared );
282
- int vramsize = 0 ; int w = viref->width ; int h = viref->height ;
283
- for (int scale = 0 ; scale <= 5 ; scale++){
284
- vramsize += w*h;
285
- w = (w-1 )/2 +1 ;
286
- h = (h-1 )/2 +1 ;
287
- }
288
-
289
292
d.PinnedMemPool = (float3**)malloc (sizeof (float3*)*d.streamnum );
290
- d.VRAMMemPool = (float3**)malloc (sizeof (float3*)*d.streamnum );
291
293
hipError_t erralloc;
292
294
for (int i = 0 ; i < d.streamnum ; i++){
293
295
erralloc = hipHostMalloc (d.PinnedMemPool +i, sizeof (float3)*pinnedsize);
@@ -297,13 +299,6 @@ static void VS_CC ssimulacra2Create(const VSMap *in, VSMap *out, void *userData,
297
299
vsapi->freeNode (d.distorted );
298
300
return ;
299
301
}
300
- erralloc = hipMalloc (d.VRAMMemPool +i, sizeof (float3)*8 *vramsize); // 8 planes of size totalscale
301
- if (erralloc != hipSuccess){
302
- vsapi->mapSetError (out, VshipError (OutOfVRAM, __FILE__, __LINE__).getErrorMessage ().c_str ());
303
- vsapi->freeNode (d.reference );
304
- vsapi->freeNode (d.distorted );
305
- return ;
306
- }
307
302
}
308
303
309
304
data = (Ssimulacra2Data *)malloc (sizeof (d));
0 commit comments