Skip to content

Commit 4cf9d32

Browse files
authored
Merge pull request #1945 from xianyi/develop
Merge changes from develop for 0.3.5 release
2 parents c0827a7 + 1c75b65 commit 4cf9d32

35 files changed

+812
-93
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ matrix:
117117
- <<: *test-alpine
118118
env:
119119
- TARGET_BOX=LINUX64_MUSL
120-
- BTYPE="BINARY=64 NO_AFFINITY=1 USE_OPENMP=0 NO_LAPACK=0 TARGET=core2"
120+
- BTYPE="BINARY=64 NO_AFFINITY=1 USE_OPENMP=0 NO_LAPACK=0 TARGET=CORE2"
121121

122122
- &test-cmake
123123
os: linux

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
66
project(OpenBLAS C ASM)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 3)
9-
set(OpenBLAS_PATCH_VERSION 4)
9+
set(OpenBLAS_PATCH_VERSION 5.dev)
1010
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1111

1212
# Adhere to GNU filesystem layout conventions

Changelog.txt

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,36 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.5
4+
31-Dec-2018
5+
6+
common:
7+
* loop unrolling in TRMV has been enabled again.
8+
* A domain error in the thread workload distribution for SYRK
9+
has been fixed.
10+
* gmake builds will now automatically add -fPIC to the build
11+
options if the platform requires it.
12+
* a pthreads key leakage (and associate crash on dlclose) in
13+
the USE_TLS codepath was fixed.
14+
* building of the utest cases on systems that do not provide
15+
an implementation of complex.h was fixed.
16+
17+
x86_64:
18+
* the SkylakeX code was changed to compile on OSX.
19+
* unwanted application of the -march=skylake-avx512 option
20+
to the common code parts of a DYNAMIC_ARCH build was fixed.
21+
* improved performance of SGEMM for small workloads on Skylake X.
22+
* performance of SGEMM and DGEMM was improved on Haswell.
23+
24+
ARMV8:
25+
* a configuration error that broke the CNRM2 kernel was corrected.
26+
* compilation of the GEMM kernels with CMAKE was fixed.
27+
* DYNAMIC_ARCH builds are now available with CMAKE as well.
28+
* using CMAKE for cross-compilation to the new cpu TARGETs
29+
introduced in 0.3.4 now works.
30+
31+
POWER:
32+
* a problem in cpu autodetection for AIX has been corrected.
33+
234
====================================================================
335
Version 0.3.4
436
02-Dec-2018

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ endif
131131
endif
132132

133133
libs :
134-
ifeq ($(CORE), UNKOWN)
134+
ifeq ($(CORE), UNKNOWN)
135135
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
136136
endif
137137
ifeq ($(NOFORTRAN), 1)

Makefile.arm64

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ FCOMMON_OPT += -march=armv8-a -mtune=thunderx
3030
endif
3131

3232
ifeq ($(CORE), FALKOR)
33-
CCOMMON_OPT += -march=armv8.1-a -mtune=falkor
34-
FCOMMON_OPT += -march=armv8.1-a -mtune=falkor
33+
CCOMMON_OPT += -march=armv8-a -mtune=falkor
34+
FCOMMON_OPT += -march=armv8-a -mtune=falkor
3535
endif
3636

3737
ifeq ($(CORE), THUNDERX2T99)

Makefile.rule

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44

55
# This library's version
6-
VERSION = 0.3.4
6+
VERSION = 0.3.5.dev
77

88
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
99
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library

Makefile.system

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ endif
1212
# Catch conflicting usage of ARCH in some BSD environments
1313
ifeq ($(ARCH), amd64)
1414
override ARCH=x86_64
15+
else ifeq ($(ARCH), powerpc64)
16+
override ARCH=power
17+
else ifeq ($(ARCH), i386)
18+
override ARCH=x86
19+
else ifeq ($(ARCH), aarch64)
20+
override ARCH=arm64
1521
endif
1622

1723
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib
@@ -1148,15 +1154,19 @@ ifndef FCOMMON_OPT
11481154
FCOMMON_OPT = -O2 -frecursive
11491155
endif
11501156

1151-
1152-
11531157
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
11541158
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
11551159

11561160
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
11571161
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
11581162
#MAKEOVERRIDES =
11591163

1164+
ifdef NEED_PIC
1165+
ifeq (,$(findstring PIC,$(FFLAGS)))
1166+
override FFLAGS += -fPIC
1167+
endif
1168+
endif
1169+
11601170
#For LAPACK Fortran codes.
11611171
#Disable -fopenmp for LAPACK Fortran codes on Windows.
11621172
ifdef OS_WINDOWS

Makefile.x86_64

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ endif
99
endif
1010

1111
ifeq ($(CORE), SKYLAKEX)
12+
ifndef DYNAMIC_ARCH
1213
ifndef NO_AVX512
1314
CCOMMON_OPT += -march=skylake-avx512
1415
FCOMMON_OPT += -march=skylake-avx512
@@ -22,6 +23,18 @@ endif
2223
endif
2324
endif
2425
endif
26+
endif
27+
28+
ifeq ($(CORE), HASWELL)
29+
ifndef DYNAMIC_ARCH
30+
ifndef NO_AVX2
31+
CCOMMON_OPT += -mavx2
32+
FCOMMON_OPT += -mavx2
33+
endif
34+
endif
35+
endif
36+
37+
2538

2639
ifeq ($(OSNAME), Interix)
2740
ARFLAGS = -m x64

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ Please see Changelog.txt to view the differences between OpenBLAS and GotoBLAS2
201201
* Please use GCC version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MinGW/BSD.
202202
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture.
203203
Clang 3.0 will generate the wrong AVX binary code.
204-
* Please use GCC version 6 or LLVM version 6 and above to compile Skyalke AVX512 kernels.
204+
* Please use GCC version 6 or LLVM version 6 and above to compile Skylake AVX512 kernels.
205205
* The number of CPUs/cores should less than or equal to 256. On Linux `x86_64` (`amd64`),
206206
there is experimental support for up to 1024 CPUs/cores and 128 numa nodes if you build
207207
the library with `BIGNUMA=1`.

cmake/arch.cmake

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ endif ()
4444

4545

4646
if (DYNAMIC_ARCH)
47+
if (ARM64)
48+
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99)
49+
endif ()
50+
4751
if (X86)
4852
set(DYNAMIC_CORE KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO)
4953
endif ()

0 commit comments

Comments
 (0)