Skip to content

Commit 2af6002

Browse files
committed
Revert "ssimu2 memPool now"
This reverts commit a04698b.
1 parent 8138e40 commit 2af6002

File tree

1 file changed

+15
-20
lines changed

1 file changed

+15
-20
lines changed

src/ssimu2/main.hpp

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
namespace ssimu2{
1313

14-
double ssimu2process(const uint8_t *srcp1[3], const uint8_t *srcp2[3], float3* mem_d, float3* pinned, int stride, int width, int height, float* gaussiankernel, int maxshared, hipStream_t stream){
14+
double ssimu2process(const uint8_t *srcp1[3], const uint8_t *srcp2[3], float3* pinned, int stride, int width, int height, float* gaussiankernel, int maxshared, hipStream_t stream){
1515

1616
int wh = width*height;
1717
int whs[6] = {wh, ((height-1)/2 + 1)*((width-1)/2 + 1), ((height-1)/4 + 1)*((width-1)/4 + 1), ((height-1)/8 + 1)*((width-1)/8 + 1), ((height-1)/16 + 1)*((width-1)/16 + 1), ((height-1)/32 + 1)*((width-1)/32 + 1)};
@@ -22,6 +22,15 @@ double ssimu2process(const uint8_t *srcp1[3], const uint8_t *srcp2[3], float3* m
2222
}
2323
int totalscalesize = whs_integral[6];
2424

25+
//big memory allocation, we will try it multiple time if failed to save when too much threads are used
26+
hipError_t erralloc;
27+
28+
float3* mem_d;
29+
erralloc = hipMallocAsync(&mem_d, sizeof(float3)*totalscalesize*(2 + 6), stream); //2 base image and 6 working buffers
30+
if (erralloc != hipSuccess){
31+
throw VshipError(OutOfVRAM, __FILE__, __LINE__);
32+
}
33+
2534
float3* src1_d = mem_d; //length totalscalesize
2635
float3* src2_d = mem_d + totalscalesize;
2736

@@ -86,9 +95,13 @@ double ssimu2process(const uint8_t *srcp1[3], const uint8_t *srcp2[3], float3* m
8695
try{
8796
allscore_res = allscore_map(src1_d, src2_d, tempb1_d, tempb2_d, temps11_d, temps22_d, temps12_d, temp_d, pinned, width, height, maxshared, stream);
8897
} catch (const VshipError& e){
98+
hipFree(mem_d);
8999
throw e;
90100
}
91101

102+
//we are done with the gpu at that point and the synchronization has already been done in allscore_map
103+
hipFreeAsync(mem_d, stream);
104+
92105
//step 6 : format the vector
93106
std::vector<float> measure_vec(108);
94107

@@ -117,7 +130,6 @@ typedef struct Ssimulacra2Data{
117130
VSNode *reference;
118131
VSNode *distorted;
119132
float3** PinnedMemPool;
120-
float3** VRAMMemPool;
121133
int maxshared;
122134
float* gaussiankernel_d;
123135
hipStream_t* streams;
@@ -156,7 +168,7 @@ static const VSFrame *VS_CC ssimulacra2GetFrame(int n, int activationReason, voi
156168
double val;
157169
const int stream = d->streamSet->pop();
158170
try{
159-
val = ssimu2process(srcp1, srcp2, d->VRAMMemPool[stream], d->PinnedMemPool[stream], stride, width, height, d->gaussiankernel_d, d->maxshared, d->streams[stream]);
171+
val = ssimu2process(srcp1, srcp2, d->PinnedMemPool[stream], stride, width, height, d->gaussiankernel_d, d->maxshared, d->streams[stream]);
160172
} catch (const VshipError& e){
161173
vsapi->setFilterError(e.getErrorMessage().c_str(), frameCtx);
162174
d->streamSet->insert(stream);
@@ -187,11 +199,9 @@ static void VS_CC ssimulacra2Free(void *instanceData, VSCore *core, const VSAPI
187199
vsapi->freeNode(d->distorted);
188200

189201
for (int i = 0; i < d->streamnum; i++){
190-
hipFree(d->VRAMMemPool[i]);
191202
hipHostFree(d->PinnedMemPool[i]);
192203
hipStreamDestroy(d->streams[i]);
193204
}
194-
free(d->VRAMMemPool);
195205
free(d->PinnedMemPool);
196206
hipFree(d->gaussiankernel_d);
197207
free(d->streams);
@@ -279,15 +289,7 @@ static void VS_CC ssimulacra2Create(const VSMap *in, VSMap *out, void *userData,
279289
d.streamSet = new threadSet(newstreamset);
280290

281291
const int pinnedsize = allocsizeScore(viref->width, viref->height, d.maxshared);
282-
int vramsize = 0; int w = viref->width; int h = viref->height;
283-
for (int scale = 0; scale <= 5; scale++){
284-
vramsize += w*h;
285-
w = (w-1)/2+1;
286-
h = (h-1)/2+1;
287-
}
288-
289292
d.PinnedMemPool = (float3**)malloc(sizeof(float3*)*d.streamnum);
290-
d.VRAMMemPool = (float3**)malloc(sizeof(float3*)*d.streamnum);
291293
hipError_t erralloc;
292294
for (int i = 0; i < d.streamnum; i++){
293295
erralloc = hipHostMalloc(d.PinnedMemPool+i, sizeof(float3)*pinnedsize);
@@ -297,13 +299,6 @@ static void VS_CC ssimulacra2Create(const VSMap *in, VSMap *out, void *userData,
297299
vsapi->freeNode(d.distorted);
298300
return;
299301
}
300-
erralloc = hipMalloc(d.VRAMMemPool+i, sizeof(float3)*8*vramsize); //8 planes of size totalscale
301-
if (erralloc != hipSuccess){
302-
vsapi->mapSetError(out, VshipError(OutOfVRAM, __FILE__, __LINE__).getErrorMessage().c_str());
303-
vsapi->freeNode(d.reference);
304-
vsapi->freeNode(d.distorted);
305-
return;
306-
}
307302
}
308303

309304
data = (Ssimulacra2Data *)malloc(sizeof(d));

0 commit comments

Comments
 (0)