@@ -125,95 +125,93 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
125
125
BLASFUNC (xerbla )(ERROR_NAME , & info , sizeof (ERROR_NAME ));
126
126
return ;
127
127
}
128
+
128
129
#ifdef NEW_IMATCOPY
129
- if (* lda == * ldb && * cols == * rows ) {
130
+ if (* lda == * ldb ) {
130
131
if ( order == BlasColMajor )
131
132
{
132
133
133
134
if ( trans == BlasNoTrans )
134
135
{
135
136
IMATCOPY_K_CN (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda );
137
+ return ;
136
138
}
137
139
if ( trans == BlasConj )
138
140
{
139
141
IMATCOPY_K_CNC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda );
142
+ return ;
140
143
}
141
- if ( trans == BlasTrans )
144
+ if ( trans == BlasTrans && * rows == * cols )
142
145
{
143
146
IMATCOPY_K_CT (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda );
147
+ return ;
144
148
}
145
- if ( trans == BlasTransConj )
149
+ if ( trans == BlasTransConj && * rows == * cols )
146
150
{
147
151
IMATCOPY_K_CTC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda );
152
+ return ;
148
153
}
154
+
149
155
}
150
156
else
151
157
{
152
158
153
159
if ( trans == BlasNoTrans )
154
160
{
155
161
IMATCOPY_K_RN (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda );
162
+ return ;
156
163
}
157
164
if ( trans == BlasConj )
158
165
{
159
166
IMATCOPY_K_RNC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda );
167
+ return ;
160
168
}
161
- if ( trans == BlasTrans )
169
+ if ( trans == BlasTrans && * rows == * cols )
162
170
{
163
171
IMATCOPY_K_RT (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda );
172
+ return ;
164
173
}
165
- if ( trans == BlasTransConj )
174
+ if ( trans == BlasTransConj && * rows == * cols )
166
175
{
167
176
IMATCOPY_K_RTC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda );
177
+ return ;
168
178
}
179
+
169
180
}
170
- return ;
171
181
}
172
182
#endif
173
183
174
- if ( * lda > * ldb )
175
- msize = (size_t )(* lda ) * (* ldb ) * sizeof (FLOAT ) * 2 ;
176
- else
177
- msize = (size_t )(* ldb ) * (* ldb ) * sizeof (FLOAT ) * 2 ;
178
-
179
- b = malloc (msize );
180
- if ( b == NULL )
181
- {
182
- printf ("Memory alloc failed in zimatcopy\n" );
183
- exit (1 );
184
- }
184
+ msize = (size_t )(* rows ) * (* cols ) * sizeof (FLOAT ) * 2 ;
185
185
186
+ b = malloc (msize );
187
+ if ( b == NULL )
188
+ {
189
+ printf ("Memory alloc failed in zimatcopy\n" );
190
+ exit (1 );
191
+ }
186
192
187
193
if ( order == BlasColMajor )
188
194
{
189
195
190
196
if ( trans == BlasNoTrans )
191
197
{
192
- OMATCOPY_K_CN (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * ldb );
193
- OMATCOPY_K_CN (* rows , * cols , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * ldb , a , * ldb );
194
- free (b );
195
- return ;
198
+ OMATCOPY_K_CN (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * rows );
199
+ OMATCOPY_K_CN (* rows , * cols , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * rows , a , * ldb );
196
200
}
197
- if ( trans == BlasConj )
201
+ else if ( trans == BlasConj )
198
202
{
199
- OMATCOPY_K_CNC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * ldb );
200
- OMATCOPY_K_CN (* rows , * cols , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * ldb , a , * ldb );
201
- free (b );
202
- return ;
203
+ OMATCOPY_K_CNC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * rows );
204
+ OMATCOPY_K_CN (* rows , * cols , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * rows , a , * ldb );
203
205
}
204
- if ( trans == BlasTrans )
206
+ else if ( trans == BlasTrans )
205
207
{
206
- OMATCOPY_K_CT (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * ldb );
207
- OMATCOPY_K_CN (* cols , * rows , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * ldb , a , * ldb );
208
- free (b );
209
- return ;
208
+ OMATCOPY_K_CT (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * cols );
209
+ OMATCOPY_K_CN (* cols , * rows , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * cols , a , * ldb );
210
210
}
211
- if ( trans == BlasTransConj )
211
+ else if ( trans == BlasTransConj )
212
212
{
213
- OMATCOPY_K_CTC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * ldb );
214
- OMATCOPY_K_CN (* cols , * rows , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * ldb , a , * ldb );
215
- free (b );
216
- return ;
213
+ OMATCOPY_K_CTC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * cols );
214
+ OMATCOPY_K_CN (* cols , * rows , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * cols , a , * ldb );
217
215
}
218
216
219
217
}
@@ -222,34 +220,27 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
222
220
223
221
if ( trans == BlasNoTrans )
224
222
{
225
- OMATCOPY_K_RN (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * ldb );
226
- OMATCOPY_K_RN (* rows , * cols , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * ldb , a , * ldb );
227
- free (b );
228
- return ;
223
+ OMATCOPY_K_RN (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * cols );
224
+ OMATCOPY_K_RN (* rows , * cols , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * cols , a , * ldb );
229
225
}
230
- if ( trans == BlasConj )
226
+ else if ( trans == BlasConj )
231
227
{
232
- OMATCOPY_K_RNC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * ldb );
233
- OMATCOPY_K_RN (* rows , * cols , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * ldb , a , * ldb );
234
- free (b );
235
- return ;
228
+ OMATCOPY_K_RNC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * cols );
229
+ OMATCOPY_K_RN (* rows , * cols , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * cols , a , * ldb );
236
230
}
237
- if ( trans == BlasTrans )
231
+ else if ( trans == BlasTrans )
238
232
{
239
- OMATCOPY_K_RT (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * ldb );
240
- OMATCOPY_K_RN (* cols , * rows , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * ldb , a , * ldb );
241
- free (b );
242
- return ;
233
+ OMATCOPY_K_RT (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * rows );
234
+ OMATCOPY_K_RN (* cols , * rows , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * rows , a , * ldb );
243
235
}
244
- if ( trans == BlasTransConj )
236
+ else if ( trans == BlasTransConj )
245
237
{
246
- OMATCOPY_K_RTC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * ldb );
247
- OMATCOPY_K_RN (* cols , * rows , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * ldb , a , * ldb );
248
- free (b );
249
- return ;
238
+ OMATCOPY_K_RTC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * rows );
239
+ OMATCOPY_K_RN (* cols , * rows , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * rows , a , * ldb );
250
240
}
251
241
252
242
}
243
+
253
244
free (b );
254
245
return ;
255
246
0 commit comments