From 0e7aaf33805624254dd344b4e11e351ff4e54975 Mon Sep 17 00:00:00 2001 From: Jan Andre Reuter Date: Fri, 25 Apr 2025 19:59:07 +0200 Subject: [PATCH] Add AMDGCN compute capability option Signed-off-by: Jan Andre Reuter --- easybuild/framework/easyconfig/default.py | 1 + easybuild/framework/easyconfig/easyconfig.py | 19 +++++ easybuild/framework/easyconfig/templates.py | 13 ++++ easybuild/tools/config.py | 1 + easybuild/tools/options.py | 20 ++++- test/framework/easyconfig.py | 78 ++++++++++++++++++++ 6 files changed, 131 insertions(+), 1 deletion(-) diff --git a/easybuild/framework/easyconfig/default.py b/easybuild/framework/easyconfig/default.py index 66ed417b61..b5945d623b 100644 --- a/easybuild/framework/easyconfig/default.py +++ b/easybuild/framework/easyconfig/default.py @@ -84,6 +84,7 @@ 'toolchainopts': [None, 'Extra options for compilers', TOOLCHAIN], # BUILD easyconfig parameters + 'amdgcn_compute_capabilities': [[], "List of AMDGCN compute capabilities to build with (if supported)", BUILD], 'banned_linked_shared_libs': [[], "List of shared libraries (names, file names, or paths) which are not allowed " "to be linked in any installed binary/library", BUILD], 'bitbucket_account': ['%(namelower)s', "Bitbucket account name to be used to resolve template values in source" diff --git a/easybuild/framework/easyconfig/easyconfig.py b/easybuild/framework/easyconfig/easyconfig.py index f7a1ea0f01..47f8fee6f9 100644 --- a/easybuild/framework/easyconfig/easyconfig.py +++ b/easybuild/framework/easyconfig/easyconfig.py @@ -1987,6 +1987,25 @@ def get_cuda_cc_template_value(self, key): error_msg = "%s is not a template value based on --cuda-compute-capabilities/cuda_compute_capabilities" raise EasyBuildError(error_msg, key) + def get_amdgcn_cc_template_value(self, key): + """ + Get template value based on --amdgcn-compute-capabilities EasyBuild configuration option + and amdgcn_compute_capabilities easyconfig parameter. + Returns user-friendly error message in case neither are defined, + or if an unknown key is used. + """ + if key.startswith('amdgcn_') and any(x == key for x in TEMPLATE_NAMES_DYNAMIC): + try: + return self.template_values[key] + except KeyError: + error_msg = "Template value '%s' is not defined!\n" + error_msg += "Make sure that either the --amdgcn-compute-capabilities EasyBuild configuration " + error_msg += "option is set, or that the amdgcn_compute_capabilities easyconfig parameter is defined." + raise EasyBuildError(error_msg, key) + else: + error_msg = "%s is not a template value based on --amdgcn-compute-capabilities/amdgcn_compute_capabilities" + raise EasyBuildError(error_msg, key) + def det_installversion(version, toolchain_name, toolchain_version, prefix, suffix): """Deprecated 'det_installversion' function, to determine exact install version, based on supplied parameters.""" diff --git a/easybuild/framework/easyconfig/templates.py b/easybuild/framework/easyconfig/templates.py index 46fe4c5970..58fc24d2f5 100644 --- a/easybuild/framework/easyconfig/templates.py +++ b/easybuild/framework/easyconfig/templates.py @@ -88,6 +88,11 @@ # template values which are only generated dynamically TEMPLATE_NAMES_DYNAMIC = { 'arch': 'System architecture (e.g. x86_64, aarch64, ppc64le, ...)', + 'amdgcn_compute_capabilities': "Comma-separated list of AMDGCN compute capabilities, as specified via " + "--amdgcn-compute-capabilities configuration option or " + "via amdgcn_compute_capabilities easyconfig parameter", + 'amdgcn_cc_space_sep': "Space-separated list of AMDGCN compute capabilities", + 'amdgcn_cc_semicolon_sep': "Semicolon-separated list of AMDGCN compute capabilities", 'cuda_compute_capabilities': "Comma-separated list of CUDA compute capabilities, as specified via " "--cuda-compute-capabilities configuration option or " "via cuda_compute_capabilities easyconfig parameter", @@ -478,6 +483,14 @@ def template_constant_dict(config, ignore=None, toolchain=None): template_values['cuda_sm_comma_sep'] = ','.join(sm_values) template_values['cuda_sm_space_sep'] = ' '.join(sm_values) + # step 7. AMDGCN compute capabilities + # Use the commandline / easybuild config option if given, else use the value from the EC (as a default) + amdgcn_cc = build_option('amdgcn_compute_capabilities') or config.get('amdgcn_compute_capabilities') + if amdgcn_cc: + template_values['amdgcn_compute_capabilities'] = ','.join(amdgcn_cc) + template_values['amdgcn_cc_space_sep'] = ' '.join(amdgcn_cc) + template_values['amdgcn_cc_semicolon_sep'] = ';'.join(amdgcn_cc) + unknown_names = [] for key in template_values: if not (key in common_template_names or key in TEMPLATE_NAMES_DYNAMIC): diff --git a/easybuild/tools/config.py b/easybuild/tools/config.py index eb4af47650..613abc421f 100644 --- a/easybuild/tools/config.py +++ b/easybuild/tools/config.py @@ -220,6 +220,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX): BUILD_OPTIONS_CMDLINE = { None: [ 'aggregate_regtest', + 'amdgcn_compute_capabilities', 'backup_modules', 'banned_linked_shared_libs', 'checksum_priority', diff --git a/easybuild/tools/options.py b/easybuild/tools/options.py index b4c5cfb229..1246b98391 100644 --- a/easybuild/tools/options.py +++ b/easybuild/tools/options.py @@ -370,6 +370,9 @@ def override_options(self): None, 'store_true', False), 'allow-use-as-root-and-accept-consequences': ("Allow using of EasyBuild as root (NOT RECOMMENDED!)", None, 'store_true', False), + 'amdgcn-compute-capabilities': ("List of AMDGCN compute capabilities to use when building GPU software; " + "values should be specified as gfx[xyz], as defined by the LLVM targets, " + "for example: gfx1101,gfx90a,gfx1030", 'strlist', 'extend', None), 'backup-modules': ("Back up an existing module file, if any. " "Auto-enabled when using --module-only or --skip", None, 'store_true', None), # default None to allow auto-enabling if not disabled @@ -950,6 +953,21 @@ def validate(self): error_msg = "Incorrect values in --cuda-compute-capabilities (expected pattern: '%s'): %s" error_msgs.append(error_msg % (cuda_cc_regex.pattern, ', '.join(faulty_cuda_ccs))) + # Support accelerators using the gfx[...] naming scheme. + # This applies to all AMD GPUs since Southern Islands (2013) + # For more information: https://llvm.org/docs/AMDGPUUsage.html#processors + if self.options.amdgcn_compute_capabilities: + # General accelerator naming convention + amdgcn_cc_regex = re.compile(r'gfx[0-9]+[a-z]?$') + # Generic convention. + # Regex is not perfect, as it doesn't catch gfx[...]--generic + amdgcn_generic_regex = re.compile(r'gfx[0-9]+[-]?[0-9]?-generic$') + faulty_amdgcn_ccs = [x for x in self.options.amdgcn_compute_capabilities + if not amdgcn_cc_regex.match(x) and not amdgcn_generic_regex.match(x)] + if faulty_amdgcn_ccs: + error_msg = "Incorrect values in --amdgcn-compute-capabilities (expected pattern: '%s'): %s" + error_msgs.append(error_msg % (amdgcn_cc_regex.pattern, ', '.join(faulty_amdgcn_ccs))) + if error_msgs: raise EasyBuildError( "Found problems validating the options: %s", '\n'.join(error_msgs), @@ -2064,7 +2082,7 @@ def set_tmpdir(tmpdir=None, raise_error=False): # avoid having special characters like '[' and ']' in the tmpdir pathname, # it is known to cause problems (e.g., with Python install tools, CUDA's nvcc, etc.); - # only common characteris like alphanumeric, '_', '-', '.' and '/' are retained; others are converted to 'X' + # only common characters like alphanumeric, '_', '-', '.' and '/' are retained; others are converted to 'X' special_chars_regex = r'[^\w/.-]' if re.search(special_chars_regex, current_tmpdir): current_tmpdir = re.sub(special_chars_regex, 'X', current_tmpdir) diff --git a/test/framework/easyconfig.py b/test/framework/easyconfig.py index 7e3eebbfdd..ba2ccdcd66 100644 --- a/test/framework/easyconfig.py +++ b/test/framework/easyconfig.py @@ -4807,6 +4807,35 @@ def test_cuda_compute_capabilities(self): self.assertEqual(ec['preinstallopts'], 'period="4.2 6.3" noperiod="42 63"') self.assertEqual(ec['installopts'], '4.2,6.3') + def test_amdgcn_compute_capabilities(self): + self.contents = textwrap.dedent(""" + easyblock = 'ConfigureMake' + name = 'test' + version = '0.2' + homepage = 'https://example.com' + description = 'test' + toolchain = SYSTEM + amdgcn_compute_capabilities = ['gfx90a', 'gfx1101', 'gfx11-generic', 'gfx10-3-generic'] + buildopts = ('comma="%(amdgcn_compute_capabilities)s" space="%(amdgcn_cc_space_sep)s" ' + 'semi="%(amdgcn_cc_semicolon_sep)s"') + installopts = '%(amdgcn_compute_capabilities)s' + """) + self.prep() + + ec = EasyConfig(self.eb_file) + self.assertEqual(ec['buildopts'], 'comma="gfx90a,gfx1101,gfx11-generic,gfx10-3-generic" ' + 'space="gfx90a gfx1101 gfx11-generic gfx10-3-generic" ' + 'semi="gfx90a;gfx1101;gfx11-generic;gfx10-3-generic"') + self.assertEqual(ec['installopts'], 'gfx90a,gfx1101,gfx11-generic,gfx10-3-generic') + + # build options overwrite it + init_config(build_options={'amdgcn_compute_capabilities': ['gfx90a', 'gfx1101']}) + ec = EasyConfig(self.eb_file) + self.assertEqual(ec['buildopts'], 'comma="gfx90a,gfx1101" ' + 'space="gfx90a gfx1101" ' + 'semi="gfx90a;gfx1101"') + self.assertEqual(ec['installopts'], 'gfx90a,gfx1101') + def test_det_copy_ec_specs(self): """Test det_copy_ec_specs function.""" @@ -5110,6 +5139,55 @@ def test_get_cuda_cc_template_value(self): for key, expected in cuda_template_values.items(): self.assertEqual(ec.get_cuda_cc_template_value(key), expected) + def test_get_amgcn_cc_template_value(self): + """ + Test getting template value based on --amdgcn-compute-capabilities / amdgcn_compute_capabilities. + """ + self.contents = '\n'.join([ + 'easyblock = "ConfigureMake"', + 'name = "pi"', + 'version = "3.14"', + 'homepage = "http://example.com"', + 'description = "test easyconfig"', + 'toolchain = SYSTEM', + ]) + self.prep() + ec = EasyConfig(self.eb_file) + + error_pattern = ("foobar is not a template value based on " + "--amdgcn-compute-capabilities/amdgcn_compute_capabilities") + self.assertErrorRegex(EasyBuildError, error_pattern, ec.get_amdgcn_cc_template_value, 'foobar') + + error_pattern = r"Template value '%s' is not defined!\n" + error_pattern += r"Make sure that either the --amdgcn-compute-capabilities EasyBuild configuration " + error_pattern += "option is set, or that the amdgcn_compute_capabilities easyconfig parameter is defined." + amdgcn_template_values = { + 'amdgcn_compute_capabilities': 'gfx90a,gfx1100,gfx10-3-generic', + 'amdgcn_cc_space_sep': 'gfx90a gfx1100 gfx10-3-generic', + 'amdgcn_cc_semicolon_sep': 'gfx90a;gfx1100;gfx10-3-generic', + } + for key in amdgcn_template_values: + self.assertErrorRegex(EasyBuildError, error_pattern % key, ec.get_amdgcn_cc_template_value, key) + + update_build_option('amdgcn_compute_capabilities', ['gfx90a', 'gfx1100', 'gfx10-3-generic']) + ec = EasyConfig(self.eb_file) + + for key, expected in amdgcn_template_values.items(): + self.assertEqual(ec.get_amdgcn_cc_template_value(key), expected) + + update_build_option('amdgcn_compute_capabilities', None) + ec = EasyConfig(self.eb_file) + + for key in amdgcn_template_values: + self.assertErrorRegex(EasyBuildError, error_pattern % key, ec.get_amdgcn_cc_template_value, key) + + self.contents += "\namdgcn_compute_capabilities = ['gfx90a', 'gfx1100', 'gfx10-3-generic']" + self.prep() + ec = EasyConfig(self.eb_file) + + for key, expected in amdgcn_template_values.items(): + self.assertEqual(ec.get_amdgcn_cc_template_value(key), expected) + def test_count_files(self): """Tests for EasyConfig.count_files method.""" test_ecs_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'easyconfigs', 'test_ecs')