Skip to content

Commit ccad941

Browse files
authored
Merge pull request #4039 from klho/develop
Bug fix and improvements for [z]imatcopy interface.
2 parents c74ee11 + df1b1f6 commit ccad941

File tree

2 files changed

+64
-73
lines changed

2 files changed

+64
-73
lines changed

interface/imatcopy.c

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -120,71 +120,71 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
120120
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
121121
return;
122122
}
123+
123124
#ifdef NEW_IMATCOPY
124-
if ( *lda == *ldb && *rows == *cols) {
125+
if ( *lda == *ldb ) {
125126
if ( order == BlasColMajor )
126127
{
127128
if ( trans == BlasNoTrans )
128129
{
129130
IMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda );
131+
return;
130132
}
131-
else
133+
else if ( *rows == *cols )
132134
{
133135
IMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda );
136+
return;
134137
}
135138
}
136139
else
137140
{
138141
if ( trans == BlasNoTrans )
139142
{
140143
IMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda );
144+
return;
141145
}
142-
else
146+
else if ( *rows == *cols )
143147
{
144148
IMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda );
149+
return;
145150
}
146151
}
147-
return;
148152
}
149-
150153
#endif
151154

152-
if ( *lda > *ldb )
153-
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT);
154-
else
155-
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT);
155+
msize = (size_t)(*rows) * (*cols) * sizeof(FLOAT);
156156

157157
b = malloc(msize);
158158
if ( b == NULL )
159159
{
160-
printf("Memory alloc failed\n");
160+
printf("Memory alloc failed in imatcopy\n");
161161
exit(1);
162162
}
163163

164164
if ( order == BlasColMajor )
165165
{
166166
if ( trans == BlasNoTrans )
167167
{
168-
OMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda, b, *ldb );
169-
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0 , b, *ldb, a, *ldb );
168+
OMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda, b, *rows );
169+
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0 , b, *rows, a, *ldb );
170170
}
171171
else
172172
{
173-
OMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda, b, *ldb );
174-
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, b, *ldb, a, *ldb );
173+
OMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda, b, *cols );
174+
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, b, *cols, a, *ldb );
175175
}
176176
}
177177
else
178178
{
179179
if ( trans == BlasNoTrans )
180180
{
181-
OMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda, b, *ldb );
182-
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, b, *ldb, a, *ldb );
181+
OMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda, b, *cols );
182+
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, b, *cols, a, *ldb );
183183
}
184184
else
185185
{
186-
OMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda, b, *ldb );
187-
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, b, *ldb, a, *ldb );
186+
OMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda, b, *rows );
187+
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, b, *rows, a, *ldb );
188188
}
189189
}
190190

interface/zimatcopy.c

Lines changed: 46 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -125,95 +125,93 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
125125
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
126126
return;
127127
}
128+
128129
#ifdef NEW_IMATCOPY
129-
if (*lda == *ldb && *cols == *rows) {
130+
if (*lda == *ldb ) {
130131
if ( order == BlasColMajor )
131132
{
132133

133134
if ( trans == BlasNoTrans )
134135
{
135136
IMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda );
137+
return;
136138
}
137139
if ( trans == BlasConj )
138140
{
139141
IMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda );
142+
return;
140143
}
141-
if ( trans == BlasTrans )
144+
if ( trans == BlasTrans && *rows == *cols )
142145
{
143146
IMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda );
147+
return;
144148
}
145-
if ( trans == BlasTransConj )
149+
if ( trans == BlasTransConj && *rows == *cols )
146150
{
147151
IMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda );
152+
return;
148153
}
154+
149155
}
150156
else
151157
{
152158

153159
if ( trans == BlasNoTrans )
154160
{
155161
IMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda );
162+
return;
156163
}
157164
if ( trans == BlasConj )
158165
{
159166
IMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda );
167+
return;
160168
}
161-
if ( trans == BlasTrans )
169+
if ( trans == BlasTrans && *rows == *cols )
162170
{
163171
IMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda );
172+
return;
164173
}
165-
if ( trans == BlasTransConj )
174+
if ( trans == BlasTransConj && *rows == *cols )
166175
{
167176
IMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda );
177+
return;
168178
}
179+
169180
}
170-
return;
171181
}
172182
#endif
173183

174-
if ( *lda > *ldb )
175-
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT) * 2;
176-
else
177-
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT) * 2;
178-
179-
b = malloc(msize);
180-
if ( b == NULL )
181-
{
182-
printf("Memory alloc failed in zimatcopy\n");
183-
exit(1);
184-
}
184+
msize = (size_t)(*rows) * (*cols) * sizeof(FLOAT) * 2;
185185

186+
b = malloc(msize);
187+
if ( b == NULL )
188+
{
189+
printf("Memory alloc failed in zimatcopy\n");
190+
exit(1);
191+
}
186192

187193
if ( order == BlasColMajor )
188194
{
189195

190196
if ( trans == BlasNoTrans )
191197
{
192-
OMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
193-
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
194-
free(b);
195-
return;
198+
OMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *rows );
199+
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *rows, a, *ldb );
196200
}
197-
if ( trans == BlasConj )
201+
else if ( trans == BlasConj )
198202
{
199-
OMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
200-
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
201-
free(b);
202-
return;
203+
OMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *rows );
204+
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *rows, a, *ldb );
203205
}
204-
if ( trans == BlasTrans )
206+
else if ( trans == BlasTrans )
205207
{
206-
OMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
207-
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
208-
free(b);
209-
return;
208+
OMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *cols );
209+
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *cols, a, *ldb );
210210
}
211-
if ( trans == BlasTransConj )
211+
else if ( trans == BlasTransConj )
212212
{
213-
OMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
214-
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
215-
free(b);
216-
return;
213+
OMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *cols );
214+
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *cols, a, *ldb );
217215
}
218216

219217
}
@@ -222,34 +220,27 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
222220

223221
if ( trans == BlasNoTrans )
224222
{
225-
OMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
226-
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
227-
free(b);
228-
return;
223+
OMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *cols );
224+
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *cols, a, *ldb );
229225
}
230-
if ( trans == BlasConj )
226+
else if ( trans == BlasConj )
231227
{
232-
OMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
233-
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
234-
free(b);
235-
return;
228+
OMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *cols );
229+
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *cols, a, *ldb );
236230
}
237-
if ( trans == BlasTrans )
231+
else if ( trans == BlasTrans )
238232
{
239-
OMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
240-
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
241-
free(b);
242-
return;
233+
OMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *rows );
234+
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *rows, a, *ldb );
243235
}
244-
if ( trans == BlasTransConj )
236+
else if ( trans == BlasTransConj )
245237
{
246-
OMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
247-
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
248-
free(b);
249-
return;
238+
OMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *rows );
239+
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *rows, a, *ldb );
250240
}
251241

252242
}
243+
253244
free(b);
254245
return;
255246

0 commit comments

Comments
 (0)