Skip to content

Commit a8b116a

Browse files
hjelmnawlauria
authored andcommitted
smsc/knem: Add shared-memory single-copy support for KNEM
This commit adds a knem component to the shared-memory single-copy framework. This modules supports copy_to and copy_from with registration required on the remote peer. Signed-off-by: Nathan Hjelm <hjelmn@google.com> (cherry picked from commit 80707bd)
1 parent f6c2b34 commit a8b116a

File tree

8 files changed

+736
-0
lines changed

8 files changed

+736
-0
lines changed

opal/mca/smsc/knem/Makefile.am

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#
2+
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3+
# University Research and Technology
4+
# Corporation. All rights reserved.
5+
# Copyright (c) 2004-2009 The University of Tennessee and The University
6+
# of Tennessee Research Foundation. All rights
7+
# reserved.
8+
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
9+
# University of Stuttgart. All rights reserved.
10+
# Copyright (c) 2004-2005 The Regents of the University of California.
11+
# All rights reserved.
12+
# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved.
13+
# Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
14+
# reserved.
15+
# Copyright (c) 2017 IBM Corporation. All rights reserved.
16+
# Copyright (c) 2020-2021 Google, LLC. All rights reserved.
17+
# $COPYRIGHT$
18+
#
19+
# Additional copyrights may follow
20+
#
21+
# $HEADER$
22+
#
23+
24+
EXTRA_DIST = post_configure.sh
25+
26+
AM_CPPFLAGS = $(smsc_knem_CPPFLAGS)
27+
28+
dist_opaldata_DATA = help-smsc-knem.txt
29+
30+
libmca_smsc_knem_la_sources = \
31+
smsc_knem_component.c \
32+
smsc_knem_module.c \
33+
smsc_knem_internal.h \
34+
smsc_knem.h
35+
36+
# Make the output library in this directory, and name it either
37+
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
38+
# (for static builds).
39+
40+
if MCA_BUILD_opal_smsc_knem_DSO
41+
component_noinst =
42+
component_install = mca_smsc_knem.la
43+
else
44+
component_noinst = libmca_smsc_knem.la
45+
component_install =
46+
endif
47+
48+
mcacomponentdir = $(opallibdir)
49+
mcacomponent_LTLIBRARIES = $(component_install)
50+
mca_smsc_knem_la_SOURCES = $(libmca_smsc_knem_la_sources)
51+
mca_smsc_knem_la_LDFLAGS = -module -avoid-version $(smsc_knem_LDFLAGS)
52+
mca_smsc_knem_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la \
53+
$(smsc_knem_LIBS)
54+
55+
noinst_LTLIBRARIES = $(component_noinst)
56+
libmca_smsc_knem_la_SOURCES = $(libmca_smsc_knem_la_sources)
57+
libmca_smsc_knem_la_LIBADD = $(smsc_knem_LIBS)
58+
libmca_smsc_knem_la_LDFLAGS = -module -avoid-version $(smsc_knem_LDFLAGS)

opal/mca/smsc/knem/configure.m4

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# -*- shell-script -*-
2+
#
3+
# Copyright (c) 2009 The University of Tennessee and The University
4+
# of Tennessee Research Foundation. All rights
5+
# reserved.
6+
# Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
7+
# Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
8+
# reserved.
9+
# Copyright (c) 2015 Research Organization for Information Science
10+
# and Technology (RIST). All rights reserved.
11+
# Copyright (c) 2021 Google, LLC. All rights reserved.
12+
# $COPYRIGHT$
13+
#
14+
# Additional copyrights may follow
15+
#
16+
# $HEADER$
17+
#
18+
19+
# MCA_smsc_knem_CONFIG([action-if-can-compile],
20+
# [action-if-cant-compile])
21+
# ------------------------------------------------
22+
AC_DEFUN([MCA_opal_smsc_knem_CONFIG],[
23+
AC_CONFIG_FILES([opal/mca/smsc/knem/Makefile])
24+
25+
OPAL_CHECK_KNEM([smsc_knem], [$1], [$2])
26+
27+
AC_SUBST([smsc_knem_CFLAGS])
28+
AC_SUBST([smsc_knem_CPPFLAGS])
29+
AC_SUBST([smsc_knem_LDFLAGS])
30+
AC_SUBST([smsc_knem_LIBS])
31+
])dnl

opal/mca/smsc/knem/help-smsc-knem.txt

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
# -*- text -*-
2+
#
3+
# Copyright (c) 2004-2009 The University of Tennessee and The University
4+
# of Tennessee Research Foundation. All rights
5+
# reserved.
6+
# Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved.
7+
# Copyright (c) 2012-2014 Los Alamos National Security, LLC.
8+
# All rights reserved.
9+
# Copyright (c) 2014 Research Organization for Information Science
10+
# and Technology (RIST). All rights reserved.
11+
# $COPYRIGHT$
12+
#
13+
# Additional copyrights may follow
14+
#
15+
# $HEADER$
16+
#
17+
# This is the US/English help file for Open MPI's shared memory support.
18+
#
19+
[sys call fail]
20+
A system call failed during sm shared memory BTL initialization
21+
that should not have. It is likely that your MPI job will now either
22+
abort or experience performance degradation.
23+
24+
System call: %s
25+
Error: %s (errno %d)
26+
#
27+
[no locality]
28+
WARNING: Missing locality information required for sm shared memory
29+
BTL initialization. Continuing without shared memory support.
30+
#
31+
[knem permission denied]
32+
WARING: Open MPI failed to open the /dev/knem device due to a
33+
permissions problem. Please check with your system administrator to
34+
get the permissions fixed, or set the btl_sm_single_copy_mechanism
35+
MCA variable to none to silence this warning and run without knem
36+
support.
37+
38+
Local host: %s
39+
/dev/knem permissions: 0%o
40+
#
41+
[knem fail open]
42+
WARNING: Open MPI failed to open the /dev/knem device due to a local
43+
error. Please check with your system administrator to get the problem
44+
fixed, or set the btl_sm_single_copy_mechanism MCA variable to none
45+
to silence this warning and run without knem support.
46+
47+
The sm shared memory BTL will fall back on another single-copy
48+
mechanism if one is available. This may result in lower performance.
49+
50+
Local host: %s
51+
Errno: %d (%s)
52+
#
53+
[knem get ABI fail]
54+
WARNING: Open MPI failed to retrieve the ABI version from the
55+
/dev/knem device due to a local error. This usually indicates an
56+
error in your knem installation; please check with your system
57+
administrator, or set the btl_sm_single_copy_mechanism MCA variable
58+
to none to silence this warning and run without knem support.
59+
60+
The sm shared memory BTL will fall back on another single-copy
61+
mechanism if one is available. This may result in lower performance.
62+
63+
Local host: %s
64+
Errno: %d (%s)
65+
#
66+
[knem ABI mismatch]
67+
WARNING: Open MPI was compiled with support for one version of the
68+
knem kernel module, but it discovered a different version running in
69+
/dev/knem. Open MPI needs to be installed with support for the same
70+
version of knem as is in the running Linux kernel. Please check with
71+
your system administrator, or set the btl_sm_single_copy_mechanism
72+
MCA variable to none to silence this warning and run without knem
73+
support.
74+
75+
The sm shared memory BTL will fall back on another single-copy
76+
mechanism if one is available. This may result in lower performance.
77+
78+
Local host: %s
79+
Open MPI's knem version: 0x%x
80+
/dev/knem's version: 0x%x
81+
#
82+
[knem mmap fail]
83+
Open MPI failed to map support from the knem Linux kernel module; this
84+
shouldn't happen. Please check with your system administrator, or set
85+
the btl_sm_single_copy_mechanism MCA variable to none to silence
86+
this warning and run without knem support.
87+
88+
The sm shared memory BTL will fall back on another single-copy
89+
mechanism if one is available. This may result in lower performance.
90+
91+
Local host: %s
92+
System call: mmap()
93+
Errno: %d (%s)
94+
#
95+
[knem init error]
96+
Open MPI encountered an error during the knem initialization. Please
97+
check with your system administrator, or set the
98+
btl_sm_single_copy_mechanism MCA variable to none to silence this
99+
warning and run without knem support.
100+
101+
The sm shared memory BTL will fall back on another single-copy
102+
mechanism if one is available. This may result in lower performance.
103+
104+
Local host: %s
105+
System call: %s
106+
Errno: %d (%s)
107+
#
108+
[knem requested but not available]
109+
WARNING: Linux kernel knem support was requested via the
110+
btl_sm_single_copy_mechanism MCA parameter, but Knem support was either not
111+
compiled into this Open MPI installation, or Knem support was unable
112+
to be activated in this process.
113+
114+
The sm BTL will fall back on another single-copy mechanism if one
115+
is available. This may result in lower performance.
116+
117+
Local host: %s
118+
#
119+
[cma-permission-denied]
120+
WARNING: Linux kernel CMA support was requested via the
121+
btl_sm_single_copy_mechanism MCA variable, but CMA support is
122+
not available due to restrictive ptrace settings.
123+
124+
The sm shared memory BTL will fall back on another single-copy
125+
mechanism if one is available. This may result in lower performance.
126+
127+
Local host: %s
128+
#
129+
[cma-different-user-namespace-error]
130+
ERROR: Linux kernel CMA support was requested via the
131+
btl_sm_single_copy_mechanism MCA variable, but CMA support is
132+
not available due to different user namespaces.
133+
134+
Your MPI job will abort now. Please select another value for
135+
btl_sm_single_copy_mechanism.
136+
137+
Local host: %s
138+
#
139+
[cma-different-user-namespace-warning]
140+
WARNING: The default btl_sm_single_copy_mechanism CMA is
141+
not available due to different user namespaces.
142+
143+
The sm shared memory BTL will fall back on another single-copy
144+
mechanism if one is available. This may result in lower performance.
145+
146+
Local host: %s
147+
#
148+
[xpmem-make-failed]
149+
WARNING: Could not generate an xpmem segment id for this process'
150+
address space.
151+
152+
The sm shared memory BTL will fall back on another single-copy
153+
mechanism if one is available. This may result in lower performance.
154+
155+
Local host: %s
156+
Error code: %d (%s)

opal/mca/smsc/knem/post_configure.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
DIRECT_CALL_HEADER="opal/mca/smsc/knem/smsc_knem.h"

opal/mca/smsc/knem/smsc_knem.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2+
/*
3+
* Copyright (c) 2021 Google, Inc. All rights reserved.
4+
* $COPYRIGHT$
5+
*
6+
* Additional copyrights may follow
7+
*
8+
* $HEADER$
9+
*/
10+
11+
#ifndef OPAL_MCA_SMSC_KNEM_SMSC_KNEM_H
12+
#define OPAL_MCA_SMSC_KNEM_SMSC_KNEM_H
13+
14+
#include "opal_config.h"
15+
16+
#include "opal/mca/smsc/smsc.h"
17+
18+
mca_smsc_endpoint_t *mca_smsc_knem_get_endpoint(opal_proc_t *peer_proc);
19+
void mca_smsc_knem_return_endpoint(mca_smsc_endpoint_t *endpoint);
20+
21+
int mca_smsc_knem_copy_to(mca_smsc_endpoint_t *endpoint, void *local_address, void *remote_address,
22+
size_t size, void *reg_data);
23+
int mca_smsc_knem_copy_from(mca_smsc_endpoint_t *endpoint, void *local_address,
24+
void *remote_address, size_t size, void *reg_data);
25+
26+
void *mca_smsc_knem_register_region(void *local_address, size_t size);
27+
void mca_smsc_knem_deregister_region(void *reg_data);
28+
29+
/* unsupported interfaces defined to support MCA direct */
30+
void *mca_smsc_knem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t flags,
31+
void *remote_address, size_t size, void **local_mapping);
32+
void mca_smsc_knem_unmap_peer_region(void *ctx);
33+
34+
#endif /* OPAL_MCA_SMSC_KNEM_SMSC_KNEM_H */

0 commit comments

Comments
 (0)