Skip to content

Commit 32659d6

Browse files
committed
reverse butter mempool and ssimu2 mempool to avoid massive vram leakage produced by python vapoursynth but also to have less impact on vram consumption inside a gpu filterchain
1 parent 2af6002 commit 32659d6

File tree

1 file changed

+14
-15
lines changed

1 file changed

+14
-15
lines changed

src/butter/main.hpp

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,19 @@ Plane_d getdiffmap(Plane_d* src1_d, Plane_d* src2_d, float* mem_d, int width, in
117117
return diffmap;
118118
}
119119

120-
std::tuple<float, float, float> butterprocess(const uint8_t *dstp, int dststride, const uint8_t *srcp1[3], const uint8_t *srcp2[3], float* mem_d, float* pinned, GaussianHandle& gaussianHandle, int stride, int width, int height, float intensity_multiplier, int maxshared, hipStream_t stream){
120+
std::tuple<float, float, float> butterprocess(const uint8_t *dstp, int dststride, const uint8_t *srcp1[3], const uint8_t *srcp2[3], float* pinned, GaussianHandle& gaussianHandle, int stride, int width, int height, float intensity_multiplier, int maxshared, hipStream_t stream){
121+
int wh = width*height;
122+
const int totalscalesize = wh;
121123

122124
//big memory allocation, we will try it multiple time if failed to save when too much threads are used
125+
hipError_t erralloc;
123126

127+
const int totalplane = 31;
128+
float* mem_d;
129+
erralloc = hipMallocAsync(&mem_d, sizeof(float)*totalscalesize*(totalplane), stream); //2 base image and 6 working buffers
130+
if (erralloc != hipSuccess){
131+
throw VshipError(OutOfVRAM, __FILE__, __LINE__);
132+
}
124133
//initial color planes
125134
Plane_d src1_d[3] = {Plane_d(mem_d, width, height, stream), Plane_d(mem_d+width*height, width, height, stream), Plane_d(mem_d+2*width*height, width, height, stream)};
126135
Plane_d src2_d[3] = {Plane_d(mem_d+3*width*height, width, height, stream), Plane_d(mem_d+4*width*height, width, height, stream), Plane_d(mem_d+5*width*height, width, height, stream)};
@@ -179,6 +188,8 @@ std::tuple<float, float, float> butterprocess(const uint8_t *dstp, int dststride
179188
throw e;
180189
}
181190

191+
hipFreeAsync(mem_d, stream);
192+
182193
return finalres;
183194
}
184195

@@ -187,7 +198,6 @@ typedef struct ButterData{
187198
VSNode *distorted;
188199
float intensity_multiplier;
189200
float** PinnedMemPool;
190-
float** VRAMMemPool;
191201
GaussianHandle gaussianHandle;
192202
int maxshared;
193203
int diffmap;
@@ -236,9 +246,9 @@ static const VSFrame *VS_CC butterGetFrame(int n, int activationReason, void *in
236246
const int stream = d->streamSet->pop();
237247
try{
238248
if (d->diffmap){
239-
val = butterprocess(vsapi->getWritePtr(dst, 0), vsapi->getStride(dst, 0), srcp1, srcp2, d->VRAMMemPool[stream], d->PinnedMemPool[stream], d->gaussianHandle, stride, width, height, d->intensity_multiplier, d->maxshared, d->streams[stream]);
249+
val = butterprocess(vsapi->getWritePtr(dst, 0), vsapi->getStride(dst, 0), srcp1, srcp2, d->PinnedMemPool[stream], d->gaussianHandle, stride, width, height, d->intensity_multiplier, d->maxshared, d->streams[stream]);
240250
} else {
241-
val = butterprocess(NULL, 0, srcp1, srcp2, d->VRAMMemPool[stream], d->PinnedMemPool[stream], d->gaussianHandle, stride, width, height, d->intensity_multiplier, d->maxshared, d->streams[stream]);
251+
val = butterprocess(NULL, 0, srcp1, srcp2, d->PinnedMemPool[stream], d->gaussianHandle, stride, width, height, d->intensity_multiplier, d->maxshared, d->streams[stream]);
242252
}
243253
} catch (const VshipError& e){
244254
vsapi->setFilterError(e.getErrorMessage().c_str(), frameCtx);
@@ -272,11 +282,9 @@ static void VS_CC butterFree(void *instanceData, VSCore *core, const VSAPI *vsap
272282
vsapi->freeNode(d->distorted);
273283

274284
for (int i = 0; i < d->streamnum; i++){
275-
hipFree(d->VRAMMemPool[i]);
276285
hipHostFree(d->PinnedMemPool[i]);
277286
hipStreamDestroy(d->streams[i]);
278287
}
279-
free(d->VRAMMemPool);
280288
free(d->PinnedMemPool);
281289
free(d->streams);
282290
d->gaussianHandle.destroy();
@@ -385,9 +393,7 @@ static void VS_CC butterCreate(const VSMap *in, VSMap *out, void *userData, VSCo
385393
d.streamSet = new threadSet(newstreamset);
386394

387395
const int pinnedsize = allocsizeScore(viref->width, viref->height);
388-
const int vramsize = viref->width*viref->height;
389396
d.PinnedMemPool = (float**)malloc(sizeof(float*)*d.streamnum);
390-
d.VRAMMemPool = (float**)malloc(sizeof(float*)*d.streamnum);
391397
hipError_t erralloc;
392398
for (int i = 0; i < d.streamnum; i++){
393399
erralloc = hipHostMalloc(d.PinnedMemPool+i, sizeof(float)*pinnedsize);
@@ -397,13 +403,6 @@ static void VS_CC butterCreate(const VSMap *in, VSMap *out, void *userData, VSCo
397403
vsapi->freeNode(d.distorted);
398404
return;
399405
}
400-
erralloc = hipMalloc(d.VRAMMemPool+i, sizeof(float)*31*vramsize);
401-
if (erralloc != hipSuccess){
402-
vsapi->mapSetError(out, VshipError(OutOfVRAM, __FILE__, __LINE__).getErrorMessage().c_str());
403-
vsapi->freeNode(d.reference);
404-
vsapi->freeNode(d.distorted);
405-
return;
406-
}
407406
}
408407

409408
data = (ButterData *)malloc(sizeof(d));

0 commit comments

Comments
 (0)