Skip to content

Commit f3f4f59

Browse files
authored
Merge pull request #205 from vin-huang/fix_asan_build
Auto-submit by Jenkins
2 parents af56d3e + 1afc06c commit f3f4f59

File tree

9 files changed

+164
-28
lines changed

9 files changed

+164
-28
lines changed

CMakeLists.txt

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ option(BUILD_CODE_COVERAGE "Build with code coverage enabled" OFF)
3333
option(BUILD_ADDRESS_SANITIZER "Build with address sanitizer enabled" OFF)
3434
# Find CUDA if the user wants a CUDA version.
3535
option(BUILD_CUDA "Look for CUDA and use that as a backend if found" OFF)
36+
option(Tensile_SEPARATE_ARCHITECTURES "Tensile to use GPU architecture specific files?" ON)
37+
option(Tensile_LAZY_LIBRARY_LOADING "Tensile to load kernels on demand?" ON)
3638

3739
# IF cuda backend disable clients tests.
3840
if(BUILD_CUDA)
@@ -241,6 +243,16 @@ set(hipsparselt_SOVERSION 0.2)
241243
# setup rocsparselt defines used for both the library and clients
242244
if( BUILD_WITH_TENSILE )
243245
list(APPEND TENSILE_DEFINES BUILD_WITH_TENSILE=1)
246+
if(Tensile_SEPARATE_ARCHITECTURES)
247+
list(APPEND TENSILE_DEFINES ROCSPARSELT_TENSILE_SEPARATE_ARCH=1)
248+
else()
249+
list(APPEND TENSILE_DEFINES ROCSPARSELT_TENSILE_SEPARATE_ARCH=0)
250+
endif()
251+
if(Tensile_LAZY_LIBRARY_LOADING)
252+
list(APPEND TENSILE_DEFINES ROCSPARSELT_TENSILE_LAZY_LOAD=1)
253+
else()
254+
list(APPEND TENSILE_DEFINES ROCSPARSELT_TENSILE_LAZY_LOAD=0)
255+
endif()
244256
else()
245257
list(APPEND TENSILE_DEFINES BUILD_WITH_TENSILE=0)
246258
endif()
@@ -349,7 +361,7 @@ set( HIPSPARSELTS_CONFIG_DIR "\${CPACK_PACKAGING_INSTALL_PREFIX}/${CMAKE_INSTALL
349361

350362
rocm_create_package(
351363
NAME ${package_name}
352-
DESCRIPTION "Radeon Open Compute Structured Sparsity Matrix Multiplication marshalling library"
364+
DESCRIPTION "ROCm Structured Sparsity Matrix Multiplication marshalling library"
353365
MAINTAINER "hipSPARSELt Maintainer <hipsparselt-maintainer@amd.com>"
354366
LDCONFIG
355367
LDCONFIG_DIR ${HIPSPARSELTS_CONFIG_DIR}

clients/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE )
2929
set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." )
3030
endif()
3131

32-
project( hipsparselt-clients LANGUAGES CXX C )
32+
project( hipsparselt-clients LANGUAGES CXX C Fortran )
3333

3434

3535
if( SKIP_LIBRARY )

clients/benchmarks/CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,10 @@ if (NOT WIN32)
7878
target_link_libraries( hipsparselt-bench PRIVATE lapack cblas )
7979
endif()
8080
list( APPEND COMMON_LINK_LIBS "-lm -lstdc++fs")
81-
if (NOT BUILD_FORTRAN_CLIENTS)
82-
list( APPEND COMMON_LINK_LIBS "-lgfortran -lflang -lflangrti") # for lapack
81+
if (CMAKE_Fortran_COMPILER_ID MATCHES "GNU")
82+
list( APPEND COMMON_LINK_LIBS "-lgfortran") # for lapack
83+
else()
84+
list( APPEND COMMON_LINK_LIBS "-lflang -lflangrti") # for lapack
8385
endif()
8486
else()
8587
list( APPEND COMMON_LINK_LIBS "libomp")

clients/gtest/CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,10 @@ endif()
8787

8888
list( APPEND COMMON_LINK_LIBS "-lm -lstdc++fs")
8989

90-
if (NOT BUILD_FORTRAN_CLIENTS)
91-
list( APPEND COMMON_LINK_LIBS "-lgfortran -lflang -lflangrti") # for lapack
90+
if (CMAKE_Fortran_COMPILER_ID MATCHES "GNU")
91+
list( APPEND COMMON_LINK_LIBS "-lgfortran") # for lapack
92+
else()
93+
list( APPEND COMMON_LINK_LIBS "-lflang -lflangrti") # for lapack
9294
endif()
9395

9496
#if (NOT WIN32)

cmake/Dependencies.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,15 @@ find_package(ROCM 0.6 QUIET CONFIG PATHS ${CMAKE_PREFIX_PATH})
3535
if(NOT ROCM_FOUND)
3636
set(PROJECT_EXTERN_DIR ${CMAKE_CURRENT_BINARY_DIR}/extern)
3737
set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download")
38-
file(DOWNLOAD https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip
38+
file(DOWNLOAD https://github.com/ROCm/rocm-cmake/archive/${rocm_cmake_tag}.zip
3939
${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.zip STATUS status LOG log)
4040

4141
list(GET status 0 status_code)
4242
list(GET status 1 status_string)
4343

4444
if(NOT status_code EQUAL 0)
4545
message(FATAL_ERROR "error: downloading
46-
'https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip' failed
46+
'https://github.com/ROCm/rocm-cmake/archive/${rocm_cmake_tag}.zip' failed
4747
status_code: ${status_code}
4848
status_string: ${status_string}
4949
log: ${log}

docs/tutorials/install/linux.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ instructions.
4646
* `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_
4747
* `git <https://git-scm.com/>`_
4848
* `CMake <https://cmake.org/>`_ 3.5 or later
49-
* `AMD ROCm <https://github.com/RadeonOpenCompute/ROCm>`_
49+
* `AMD ROCm <https://github.com/ROCm/ROCm>`_
5050
* (Optional, for clients) `GoogleTest <https://github.com/google/googletest>`_
5151

5252
Download hipSPARSELt

library/src/CMakeLists.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,12 @@ if(NOT BUILD_CUDA)
5353
if(PACKAGE_TENSILE_LIBRARY)
5454
set(Tensile_Options ${Tensile_Options} GENERATE_PACKAGE)
5555
endif()
56-
56+
if(Tensile_SEPARATE_ARCHITECTURES)
57+
set(Tensile_Options ${Tensile_Options} SEPARATE_ARCHITECTURES)
58+
endif()
59+
if(Tensile_LAZY_LIBRARY_LOADING)
60+
set(Tensile_Options ${Tensile_Options} LAZY_LIBRARY_LOADING)
61+
endif()
5762
if(Tensile_BUILD_ID)
5863
set(Options ${Options} "--build-id=${Tensile_BUILD_ID}")
5964
endif()

library/src/hcc_detail/rocsparselt/src/tensile_host.cpp

Lines changed: 132 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include <Tensile/Contractions.hpp>
4242
#include <Tensile/EmbeddedLibrary.hpp>
4343
#include <Tensile/MasterSolutionLibrary.hpp>
44+
#include <Tensile/PlaceholderLibrary.hpp>
4445
#include <Tensile/Tensile.hpp>
4546
#include <Tensile/TensorDescriptor.hpp>
4647
#include <Tensile/Utils.hpp>
@@ -450,14 +451,94 @@ namespace
450451
return inputs;
451452
}
452453

454+
TensileLite::LazyLoadingInit getLazyLoadingArch(int deviceID)
455+
{
456+
hipDeviceProp_t deviceProperties;
457+
HIP_CHECK_EXC(hipGetDeviceProperties(&deviceProperties, deviceID));
458+
// strip out xnack/ecc from name
459+
std::string deviceFullString(deviceProperties.gcnArchName);
460+
std::string deviceString = deviceFullString.substr(0, deviceFullString.find(":"));
461+
462+
if(deviceString.find("gfx803") != std::string::npos)
463+
{
464+
return TensileLite::LazyLoadingInit::gfx803;
465+
}
466+
else if(deviceString.find("gfx900") != std::string::npos)
467+
{
468+
return TensileLite::LazyLoadingInit::gfx900;
469+
}
470+
else if(deviceString.find("gfx906") != std::string::npos)
471+
{
472+
return TensileLite::LazyLoadingInit::gfx906;
473+
}
474+
else if(deviceString.find("gfx908") != std::string::npos)
475+
{
476+
return TensileLite::LazyLoadingInit::gfx908;
477+
}
478+
else if(deviceString.find("gfx90a") != std::string::npos)
479+
{
480+
return TensileLite::LazyLoadingInit::gfx90a;
481+
}
482+
else if(deviceString.find("gfx940") != std::string::npos)
483+
{
484+
return TensileLite::LazyLoadingInit::gfx940;
485+
}
486+
else if(deviceString.find("gfx941") != std::string::npos)
487+
{
488+
return TensileLite::LazyLoadingInit::gfx941;
489+
}
490+
else if(deviceString.find("gfx942") != std::string::npos)
491+
{
492+
return TensileLite::LazyLoadingInit::gfx942;
493+
}
494+
else if(deviceString.find("gfx1010") != std::string::npos)
495+
{
496+
return TensileLite::LazyLoadingInit::gfx1010;
497+
}
498+
else if(deviceString.find("gfx1011") != std::string::npos)
499+
{
500+
return TensileLite::LazyLoadingInit::gfx1011;
501+
}
502+
else if(deviceString.find("gfx1012") != std::string::npos)
503+
{
504+
return TensileLite::LazyLoadingInit::gfx1012;
505+
}
506+
else if(deviceString.find("gfx1030") != std::string::npos)
507+
{
508+
return TensileLite::LazyLoadingInit::gfx1030;
509+
}
510+
else if(deviceString.find("gfx1100") != std::string::npos)
511+
{
512+
return TensileLite::LazyLoadingInit::gfx1100;
513+
}
514+
else if(deviceString.find("gfx1101") != std::string::npos)
515+
{
516+
return TensileLite::LazyLoadingInit::gfx1101;
517+
}
518+
else if(deviceString.find("gfx1102") != std::string::npos)
519+
{
520+
return TensileLite::LazyLoadingInit::gfx1102;
521+
}
522+
else if(deviceString.find("gfx1200") != std::string::npos)
523+
{
524+
return TensileLite::LazyLoadingInit::gfx1200;
525+
}
526+
else if(deviceString.find("gfx1201") != std::string::npos)
527+
{
528+
return TensileLite::LazyLoadingInit::gfx1201;
529+
}
530+
return TensileLite::LazyLoadingInit::None;
531+
}
532+
453533
/**************************************************
454534
* The TensileHost struct interfaces with Tensile *
455535
**************************************************/
456536
class TensileHost
457537
{
458538
// The library object
459539
std::shared_ptr<TensileLite::MasterSolutionLibrary<TensileLite::ContractionProblemGemm>> m_library;
460-
std::shared_ptr<hipDeviceProp_t> m_deviceProp;
540+
std::unordered_set<TensileLite::LazyLoadingInit> m_deviceSet;
541+
std::unordered_map<std::string, std::shared_ptr<hipDeviceProp_t>> m_devicePropMap;
461542

462543
// The adapter object. mutable is used to allow adapters to be modified
463544
// even when they are stored in a const vector which is immutable in size
@@ -508,9 +589,9 @@ namespace
508589
return m_library;
509590
}
510591

511-
auto& get_device_property() const
592+
auto& get_device_property(const std::string& deviceName) const
512593
{
513-
return m_deviceProp;
594+
return m_devicePropMap.at(deviceName);
514595
}
515596

516597
auto& get_adapters() const
@@ -576,7 +657,7 @@ namespace
576657

577658
// only load modules for the current architecture
578659
auto dir = path + "/*" + processor + "*co";
579-
660+
#if ROCSPARSELT_TENSILE_LAZY_LOAD == 0
580661
bool no_match = false;
581662
#ifdef WIN32
582663
std::replace(dir.begin(), dir.end(), '/', '\\');
@@ -630,28 +711,59 @@ namespace
630711
<< std::endl;
631712
(void)once;
632713
}
633-
714+
#endif // ROCSPARSELT_TENSILE_LAZY_LOAD == 0
634715
// We initialize a local static variable with a lambda function call to avoid
635716
// race conditions when multiple threads with different device IDs try to
636717
// initialize library. This ensures that only one thread initializes library,
637718
// and other threads trying to initialize library wait for it to complete.
638719
static int once = [&] {
720+
// Determine library path
721+
std::string tensileLibPath;
722+
#if ROCSPARSELT_TENSILE_LAZY_LOAD
723+
#ifdef TENSILE_YAML
724+
tensileLibPath = path + "/TensileLibrary_lazy_" + processor + ".yaml";
725+
#else
726+
tensileLibPath = path + "/TensileLibrary_lazy_" + processor + ".dat";
727+
#endif
728+
#else
639729
#ifdef TENSILE_YAML
640-
path += "/TensileLibrary.yaml";
730+
tensileLibPath = path + "/TensileLibrary_" + processor + ".yaml";
641731
#else
642-
path += "/TensileLibrary.dat";
732+
tensileLibPath = path + "/TensileLibrary_" + processor + ".dat";
733+
#endif
643734
#endif
644-
if(!TestPath(path))
735+
if(!TestPath(tensileLibPath))
645736
{
646-
hipsparselt_cerr << "\nhipsparselt_error: Cannot read " << path << ": "
737+
hipsparselt_cerr << "\nhipsparselt_error: Cannot read " << tensileLibPath << ": "
647738
<< strerror(errno) << std::endl;
648739
//rocsparselt_abort();
649740
}
650741

651-
auto lib = TensileLite::LoadLibraryFile<TensileLite::ContractionProblemGemm>(path);
742+
// Get devices
743+
hipDeviceProp_t prop;
744+
int count;
745+
HIP_CHECK_EXC(hipGetDeviceCount(&count));
746+
for(int devId = 0; devId < count; devId++)
747+
{
748+
auto deviceArch = getLazyLoadingArch(devId);
749+
if(m_deviceSet.find(deviceArch) == m_deviceSet.end())
750+
{
751+
// populate the arch list for lazy loading
752+
m_deviceSet.insert(deviceArch);
753+
// populate device property map, used in finding solutions based on arch
754+
HIP_CHECK_EXC(hipGetDeviceProperties(&prop, devId));
755+
// strip out xnack/ecc from name
756+
std::string deviceFullString(prop.gcnArchName);
757+
std::string deviceString
758+
= deviceFullString.substr(0, deviceFullString.find(":"));
759+
m_devicePropMap[deviceString] = std::make_shared<hipDeviceProp_t>(prop);
760+
}
761+
}
762+
763+
auto lib = TensileLite::LoadLibraryFile<TensileLite::ContractionProblemGemm>(tensileLibPath);
652764
if(!lib)
653765
{
654-
hipsparselt_cerr << "\nhipsparselt_error: Could not load " << path << std::endl;
766+
hipsparselt_cerr << "\nhipsparselt_error: Could not load " << tensileLibPath << std::endl;
655767
return -1;
656768
}
657769
else
@@ -662,17 +774,15 @@ namespace
662774
return 0;
663775
}();
664776

777+
static_cast<void>(adapter.initializeLazyLoading(processor, path));
778+
779+
665780
if(!m_library && once != 0)
666781
{
667782
hipsparselt_cerr << "\nhipsparselt_error: Could not initialize Tensile library"
668783
<< std::endl;
669784
//rocsparselt_abort();
670785
}
671-
672-
hipDeviceProp_t prop;
673-
THROW_IF_HIP_ERROR(hipGetDeviceProperties(&prop, deviceId));
674-
675-
m_deviceProp = std::make_shared<hipDeviceProp_t>(prop);
676786
}
677787
};
678788

@@ -719,7 +829,7 @@ namespace
719829
if(library)
720830
*library = host.get_library();
721831
if(deviceProp)
722-
*deviceProp = host.get_device_property();
832+
*deviceProp = host.get_device_property(rocsparselt_internal_get_arch_name());
723833

724834
return *adapter;
725835
}
@@ -919,6 +1029,11 @@ rocsparselt_status getBestSolutions(const RocsparseltContractionProblem<Ti, To,
9191029
// auto &adapter =
9201030
get_library_and_adapter(&library, &deviceProp, prob.handle->device);
9211031

1032+
if(!library)
1033+
{
1034+
return rocsparselt_status_invalid_pointer;
1035+
}
1036+
9221037
hardware = TensileLite::hip::GetDevice(*deviceProp);
9231038
auto tensile_prob = ConstructTensileProblem(prob);
9241039
// auto handle = prob.handle;

tensilelite_tag.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
d6449b6f17868f0d5801651cc85e237c345f017b
1+
510dcac724743ff35ec8c60270bc08505eddcfd7

0 commit comments

Comments
 (0)