Combined with matching changes within rocr-runtime ebuild, this patch allows to load compatible kernels whenever possible. For example if AMDGPU_TARGETS is set to gfx1030 and some application was started on gfx1036, it loads gfx1030 kernel. Author: Cordell Bloor https://salsa.debian.org/rocm-team/rocm-hipamd/-/blob/master/debian/patches/0025-improve-rocclr-isa-compatibility-check.patch https://salsa.debian.org/rocm-team/rocm-hipamd/-/blob/master/debian/patches/0026-extend-hip-isa-compatibility-check.patch --- a/hipamd/src/hip_code_object.cpp +++ b/hipamd/src/hip_code_object.cpp @@ -390,47 +390,123 @@ static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_ return true; } -static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, - std::string agent_triple_target_id) { +struct GfxPattern { + std::string root; + std::string suffixes; +}; + +static bool matches(const GfxPattern& p, const std::string& s) { + if (p.root.size() + 1 != s.size()) { + return false; + } + if (0 != std::memcmp(p.root.data(), s.data(), p.root.size())) { + return false; + } + return p.suffixes.find(s[p.root.size()]) != std::string::npos; +} + +static bool isGfx900EquivalentProcessor(const std::string& processor) { + return matches(GfxPattern{"gfx90", "029c"}, processor); +} + +static bool isGfx900SupersetProcessor(const std::string& processor) { + return matches(GfxPattern{"gfx90", "0269c"}, processor); +} + +static bool isGfx1030EquivalentProcessor(const std::string& processor) { + return matches(GfxPattern{"gfx103", "0123456"}, processor); +} + +static bool isGfx1010EquivalentProcessor(const std::string& processor) { + return matches(GfxPattern{"gfx101", "0"}, processor); +} + +static bool isGfx1010SupersetProcessor(const std::string& processor) { + return matches(GfxPattern{"gfx101", "0123"}, processor); +} + +enum CompatibilityScore { + CS_EXACT_MATCH = 1 << 4, + CS_PROCESSOR_MATCH = 1 << 3, + CS_PROCESSOR_COMPATIBLE = 1 << 2, + CS_XNACK_SPECIALIZED = 1 << 1, + CS_SRAM_ECC_SPECIALIZED = 1 << 0, + CS_INCOMPATIBLE = 0, +}; + +static int getProcessorCompatibilityScore(const std::string& co_processor, + const std::string& agent_processor) { + if (co_processor == agent_processor) + return CS_PROCESSOR_MATCH; + + if (isGfx900SupersetProcessor(agent_processor)) + return isGfx900EquivalentProcessor(co_processor) ? CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE; + + if (isGfx1010SupersetProcessor(agent_processor)) + return isGfx1010EquivalentProcessor(co_processor) ? CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE; + + if (isGfx1030EquivalentProcessor(agent_processor)) + return isGfx1030EquivalentProcessor(co_processor) ? CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE; + + return CS_INCOMPATIBLE; +} + +static int getCompatiblityScore(std::string co_triple_target_id, + std::string agent_triple_target_id) { // Primitive Check - if (co_triple_target_id == agent_triple_target_id) return true; + if (co_triple_target_id == agent_triple_target_id) return CS_EXACT_MATCH; // Parse code object triple target id if (!consume(co_triple_target_id, std::string(AMDGCN_TARGET_TRIPLE) + '-')) { - return false; + return CS_INCOMPATIBLE; } std::string co_processor; char co_sram_ecc, co_xnack; if (!getTargetIDValue(co_triple_target_id, co_processor, co_sram_ecc, co_xnack)) { - return false; + return CS_INCOMPATIBLE; } - if (!co_triple_target_id.empty()) return false; + if (!co_triple_target_id.empty()) return CS_INCOMPATIBLE; // Parse agent isa triple target id if (!consume(agent_triple_target_id, std::string(AMDGCN_TARGET_TRIPLE) + '-')) { - return false; + return CS_INCOMPATIBLE; } std::string agent_isa_processor; char isa_sram_ecc, isa_xnack; if (!getTargetIDValue(agent_triple_target_id, agent_isa_processor, isa_sram_ecc, isa_xnack)) { - return false; + return CS_INCOMPATIBLE; } - if (!agent_triple_target_id.empty()) return false; + if (!agent_triple_target_id.empty()) return CS_INCOMPATIBLE; // Check for compatibility - if (agent_isa_processor != co_processor) return false; - if (co_sram_ecc != ' ') { - if (co_sram_ecc != isa_sram_ecc) return false; + int processor_score = getProcessorCompatibilityScore(co_processor, agent_isa_processor); + if (processor_score == CS_INCOMPATIBLE) { + return CS_INCOMPATIBLE; } - if (co_xnack != ' ') { - if (co_xnack != isa_xnack) return false; + + int xnack_bonus; + if (co_xnack == ' ') { + xnack_bonus = 0; + } else if (co_xnack == isa_xnack) { + xnack_bonus = CS_XNACK_SPECIALIZED; + } else { + return CS_INCOMPATIBLE; } - return true; + int sram_ecc_bonus; + if (co_sram_ecc == ' ') { + sram_ecc_bonus = 0; + } else if (co_sram_ecc == isa_sram_ecc) { + sram_ecc_bonus = CS_SRAM_ECC_SPECIALIZED; + } else { + return CS_INCOMPATIBLE; + } + + return processor_score + xnack_bonus + sram_ecc_bonus; } // This will be moved to COMGR eventually @@ -483,6 +559,7 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary( for (size_t i = 0; i < agent_triple_target_ids.size(); i++) { code_objs.push_back(std::make_pair(nullptr, 0)); } + std::vector compatibility_score(agent_triple_target_ids.size()); const auto obheader = reinterpret_cast(data); const auto* desc = &obheader->desc[0]; @@ -495,17 +572,19 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary( reinterpret_cast(reinterpret_cast(obheader) + desc->offset); const size_t image_size = desc->size; - if (num_code_objs == 0) break; std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize}; std::string co_triple_target_id; if (!getTripleTargetID(bundleEntryId, image, co_triple_target_id)) continue; for (size_t dev = 0; dev < agent_triple_target_ids.size(); ++dev) { - if (code_objs[dev].first) continue; - if (isCodeObjectCompatibleWithDevice(co_triple_target_id, agent_triple_target_ids[dev])) { + if (compatibility_score[dev] >= CS_PROCESSOR_MATCH) continue; + int score = getCompatiblityScore(co_triple_target_id, agent_triple_target_ids[dev]); + if (score > compatibility_score[dev]) { + compatibility_score[dev] = score; + if (!code_objs[dev].first) + --num_code_objs; code_objs[dev] = std::make_pair(image, image_size); - --num_code_objs; } } } --- a/rocclr/device/device.cpp +++ b/rocclr/device/device.cpp @@ -234,10 +234,49 @@ std::string Isa::isaName() const { return std::string(hsaIsaNamePrefix) + targetId(); } +template +static bool Contains(const std::array& arr, const T& value) { + return std::find(std::begin(arr), std::end(arr), value) != std::end(arr); +} + +static bool IsVersionCompatible(const Isa &codeObjectIsa, + const Isa &agentIsa) { + if (codeObjectIsa.versionMajor() == agentIsa.versionMajor() && + codeObjectIsa.versionMinor() == agentIsa.versionMinor()) { + + if (codeObjectIsa.versionStepping() == agentIsa.versionStepping()) { + return true; // exact match + } + + // The code object and the agent may sometimes be compatible if + // they differ only by stepping version. + if (codeObjectIsa.versionMajor() == 9 && + codeObjectIsa.versionMinor() == 0) { + const std::array gfx900_equivalent = { 0, 2, 9, 12 }; + const std::array gfx900_superset = { 0, 2, 6, 9, 12 }; + if (Contains(gfx900_equivalent, codeObjectIsa.versionStepping()) && + Contains(gfx900_superset, agentIsa.versionStepping())) { + return true; // gfx900 compatible object and agent + } + } else if (codeObjectIsa.versionMajor() == 10) { + if (codeObjectIsa.versionMinor() == 1) { + const std::array gfx1010_equivalent = { 0 }; + const std::array gfx1010_superset = { 0, 1, 2, 3 }; + if (Contains(gfx1010_equivalent, codeObjectIsa.versionStepping()) && + Contains(gfx1010_superset, agentIsa.versionStepping())) { + return true; // gfx1010 compatible object and agent + } + } else if (codeObjectIsa.versionMinor() == 3) { + return true; // gfx1030 compatible object and agent + } + } + } + + return false; +} + bool Isa::isCompatible(const Isa &codeObjectIsa, const Isa &agentIsa) { - if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() || - codeObjectIsa.versionMinor() != agentIsa.versionMinor() || - codeObjectIsa.versionStepping() != agentIsa.versionStepping()) + if (!IsVersionCompatible(codeObjectIsa, agentIsa)) return false; assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() &&