Skip to content

Add AMDGCN option similar to cuda-compute-capabilities #4860

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions easybuild/framework/easyconfig/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
'toolchainopts': [None, 'Extra options for compilers', TOOLCHAIN],

# BUILD easyconfig parameters
'amdgcn_compute_capabilities': [[], "List of AMDGCN compute capabilities to build with (if supported)", BUILD],
'banned_linked_shared_libs': [[], "List of shared libraries (names, file names, or paths) which are not allowed "
"to be linked in any installed binary/library", BUILD],
'bitbucket_account': ['%(namelower)s', "Bitbucket account name to be used to resolve template values in source"
Expand Down
19 changes: 19 additions & 0 deletions easybuild/framework/easyconfig/easyconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -1987,6 +1987,25 @@ def get_cuda_cc_template_value(self, key):
error_msg = "%s is not a template value based on --cuda-compute-capabilities/cuda_compute_capabilities"
raise EasyBuildError(error_msg, key)

def get_amdgcn_cc_template_value(self, key):
"""
Get template value based on --amdgcn-compute-capabilities EasyBuild configuration option
and amdgcn_compute_capabilities easyconfig parameter.
Returns user-friendly error message in case neither are defined,
or if an unknown key is used.
"""
if key.startswith('amdgcn_') and any(x == key for x in TEMPLATE_NAMES_DYNAMIC):
try:
return self.template_values[key]
except KeyError:
error_msg = "Template value '%s' is not defined!\n"
error_msg += "Make sure that either the --amdgcn-compute-capabilities EasyBuild configuration "
error_msg += "option is set, or that the amdgcn_compute_capabilities easyconfig parameter is defined."
raise EasyBuildError(error_msg, key)
else:
error_msg = "%s is not a template value based on --amdgcn-compute-capabilities/amdgcn_compute_capabilities"
raise EasyBuildError(error_msg, key)


def det_installversion(version, toolchain_name, toolchain_version, prefix, suffix):
"""Deprecated 'det_installversion' function, to determine exact install version, based on supplied parameters."""
Expand Down
13 changes: 13 additions & 0 deletions easybuild/framework/easyconfig/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@
# template values which are only generated dynamically
TEMPLATE_NAMES_DYNAMIC = {
'arch': 'System architecture (e.g. x86_64, aarch64, ppc64le, ...)',
'amdgcn_compute_capabilities': "Comma-separated list of AMDGCN compute capabilities, as specified via "
"--amdgcn-compute-capabilities configuration option or "
"via amdgcn_compute_capabilities easyconfig parameter",
'amdgcn_cc_space_sep': "Space-separated list of AMDGCN compute capabilities",
'amdgcn_cc_semicolon_sep': "Semicolon-separated list of AMDGCN compute capabilities",
'cuda_compute_capabilities': "Comma-separated list of CUDA compute capabilities, as specified via "
"--cuda-compute-capabilities configuration option or "
"via cuda_compute_capabilities easyconfig parameter",
Expand Down Expand Up @@ -478,6 +483,14 @@ def template_constant_dict(config, ignore=None, toolchain=None):
template_values['cuda_sm_comma_sep'] = ','.join(sm_values)
template_values['cuda_sm_space_sep'] = ' '.join(sm_values)

# step 7. AMDGCN compute capabilities
# Use the commandline / easybuild config option if given, else use the value from the EC (as a default)
amdgcn_cc = build_option('amdgcn_compute_capabilities') or config.get('amdgcn_compute_capabilities')
if amdgcn_cc:
template_values['amdgcn_compute_capabilities'] = ','.join(amdgcn_cc)
template_values['amdgcn_cc_space_sep'] = ' '.join(amdgcn_cc)
template_values['amdgcn_cc_semicolon_sep'] = ';'.join(amdgcn_cc)

unknown_names = []
for key in template_values:
if not (key in common_template_names or key in TEMPLATE_NAMES_DYNAMIC):
Expand Down
1 change: 1 addition & 0 deletions easybuild/tools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
BUILD_OPTIONS_CMDLINE = {
None: [
'aggregate_regtest',
'amdgcn_compute_capabilities',
'backup_modules',
'banned_linked_shared_libs',
'checksum_priority',
Expand Down
20 changes: 19 additions & 1 deletion easybuild/tools/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,9 @@ def override_options(self):
None, 'store_true', False),
'allow-use-as-root-and-accept-consequences': ("Allow using of EasyBuild as root (NOT RECOMMENDED!)",
None, 'store_true', False),
'amdgcn-compute-capabilities': ("List of AMDGCN compute capabilities to use when building GPU software; "
"values should be specified as gfx[xyz], as defined by the LLVM targets, "
"for example: gfx1101,gfx90a,gfx1030", 'strlist', 'extend', None),
'backup-modules': ("Back up an existing module file, if any. "
"Auto-enabled when using --module-only or --skip",
None, 'store_true', None), # default None to allow auto-enabling if not disabled
Expand Down Expand Up @@ -950,6 +953,21 @@ def validate(self):
error_msg = "Incorrect values in --cuda-compute-capabilities (expected pattern: '%s'): %s"
error_msgs.append(error_msg % (cuda_cc_regex.pattern, ', '.join(faulty_cuda_ccs)))

# Support accelerators using the gfx[...] naming scheme.
# This applies to all AMD GPUs since Southern Islands (2013)
# For more information: https://llvm.org/docs/AMDGPUUsage.html#processors
if self.options.amdgcn_compute_capabilities:
# General accelerator naming convention
amdgcn_cc_regex = re.compile(r'gfx[0-9]+[a-z]?$')
# Generic convention.
# Regex is not perfect, as it doesn't catch gfx[...]--generic
amdgcn_generic_regex = re.compile(r'gfx[0-9]+[-]?[0-9]?-generic$')
faulty_amdgcn_ccs = [x for x in self.options.amdgcn_compute_capabilities
if not amdgcn_cc_regex.match(x) and not amdgcn_generic_regex.match(x)]
if faulty_amdgcn_ccs:
error_msg = "Incorrect values in --amdgcn-compute-capabilities (expected pattern: '%s'): %s"
error_msgs.append(error_msg % (amdgcn_cc_regex.pattern, ', '.join(faulty_amdgcn_ccs)))

if error_msgs:
raise EasyBuildError(
"Found problems validating the options: %s", '\n'.join(error_msgs),
Expand Down Expand Up @@ -2064,7 +2082,7 @@ def set_tmpdir(tmpdir=None, raise_error=False):

# avoid having special characters like '[' and ']' in the tmpdir pathname,
# it is known to cause problems (e.g., with Python install tools, CUDA's nvcc, etc.);
# only common characteris like alphanumeric, '_', '-', '.' and '/' are retained; others are converted to 'X'
# only common characters like alphanumeric, '_', '-', '.' and '/' are retained; others are converted to 'X'
special_chars_regex = r'[^\w/.-]'
if re.search(special_chars_regex, current_tmpdir):
current_tmpdir = re.sub(special_chars_regex, 'X', current_tmpdir)
Expand Down
78 changes: 78 additions & 0 deletions test/framework/easyconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -4807,6 +4807,35 @@ def test_cuda_compute_capabilities(self):
self.assertEqual(ec['preinstallopts'], 'period="4.2 6.3" noperiod="42 63"')
self.assertEqual(ec['installopts'], '4.2,6.3')

def test_amdgcn_compute_capabilities(self):
self.contents = textwrap.dedent("""
easyblock = 'ConfigureMake'
name = 'test'
version = '0.2'
homepage = 'https://example.com'
description = 'test'
toolchain = SYSTEM
amdgcn_compute_capabilities = ['gfx90a', 'gfx1101', 'gfx11-generic', 'gfx10-3-generic']
buildopts = ('comma="%(amdgcn_compute_capabilities)s" space="%(amdgcn_cc_space_sep)s" '
'semi="%(amdgcn_cc_semicolon_sep)s"')
installopts = '%(amdgcn_compute_capabilities)s'
""")
self.prep()

ec = EasyConfig(self.eb_file)
self.assertEqual(ec['buildopts'], 'comma="gfx90a,gfx1101,gfx11-generic,gfx10-3-generic" '
'space="gfx90a gfx1101 gfx11-generic gfx10-3-generic" '
'semi="gfx90a;gfx1101;gfx11-generic;gfx10-3-generic"')
self.assertEqual(ec['installopts'], 'gfx90a,gfx1101,gfx11-generic,gfx10-3-generic')

# build options overwrite it
init_config(build_options={'amdgcn_compute_capabilities': ['gfx90a', 'gfx1101']})
ec = EasyConfig(self.eb_file)
self.assertEqual(ec['buildopts'], 'comma="gfx90a,gfx1101" '
'space="gfx90a gfx1101" '
'semi="gfx90a;gfx1101"')
self.assertEqual(ec['installopts'], 'gfx90a,gfx1101')

def test_det_copy_ec_specs(self):
"""Test det_copy_ec_specs function."""

Expand Down Expand Up @@ -5110,6 +5139,55 @@ def test_get_cuda_cc_template_value(self):
for key, expected in cuda_template_values.items():
self.assertEqual(ec.get_cuda_cc_template_value(key), expected)

def test_get_amgcn_cc_template_value(self):
"""
Test getting template value based on --amdgcn-compute-capabilities / amdgcn_compute_capabilities.
"""
self.contents = '\n'.join([
'easyblock = "ConfigureMake"',
'name = "pi"',
'version = "3.14"',
'homepage = "http://example.com"',
'description = "test easyconfig"',
'toolchain = SYSTEM',
])
self.prep()
ec = EasyConfig(self.eb_file)

error_pattern = ("foobar is not a template value based on "
"--amdgcn-compute-capabilities/amdgcn_compute_capabilities")
self.assertErrorRegex(EasyBuildError, error_pattern, ec.get_amdgcn_cc_template_value, 'foobar')

error_pattern = r"Template value '%s' is not defined!\n"
error_pattern += r"Make sure that either the --amdgcn-compute-capabilities EasyBuild configuration "
error_pattern += "option is set, or that the amdgcn_compute_capabilities easyconfig parameter is defined."
amdgcn_template_values = {
'amdgcn_compute_capabilities': 'gfx90a,gfx1100,gfx10-3-generic',
'amdgcn_cc_space_sep': 'gfx90a gfx1100 gfx10-3-generic',
'amdgcn_cc_semicolon_sep': 'gfx90a;gfx1100;gfx10-3-generic',
}
for key in amdgcn_template_values:
self.assertErrorRegex(EasyBuildError, error_pattern % key, ec.get_amdgcn_cc_template_value, key)

update_build_option('amdgcn_compute_capabilities', ['gfx90a', 'gfx1100', 'gfx10-3-generic'])
ec = EasyConfig(self.eb_file)

for key, expected in amdgcn_template_values.items():
self.assertEqual(ec.get_amdgcn_cc_template_value(key), expected)

update_build_option('amdgcn_compute_capabilities', None)
ec = EasyConfig(self.eb_file)

for key in amdgcn_template_values:
self.assertErrorRegex(EasyBuildError, error_pattern % key, ec.get_amdgcn_cc_template_value, key)

self.contents += "\namdgcn_compute_capabilities = ['gfx90a', 'gfx1100', 'gfx10-3-generic']"
self.prep()
ec = EasyConfig(self.eb_file)

for key, expected in amdgcn_template_values.items():
self.assertEqual(ec.get_amdgcn_cc_template_value(key), expected)

def test_count_files(self):
"""Tests for EasyConfig.count_files method."""
test_ecs_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'easyconfigs', 'test_ecs')
Expand Down