|
1 | 1 | #include "ggml-vulkan.h" |
2 | | - |
| 2 | +#include <vulkan/vulkan_core.h> |
3 | 3 | #ifdef GGML_VULKAN_RUN_TESTS |
4 | 4 | #include <chrono> |
5 | 5 | #endif |
|
9 | 9 | #include <algorithm> |
10 | 10 | #include <cmath> |
11 | 11 | #include <iostream> |
12 | | -#include <limits> |
13 | 12 | #include <tuple> |
14 | 13 | #include <vector> |
15 | 14 | #include <sstream> |
16 | 15 | #include <utility> |
17 | 16 | #include <memory> |
| 17 | +#include <limits> |
| 18 | +#include <map> |
18 | 19 |
|
19 | 20 | #include "ggml.h" |
20 | 21 | #include "ggml-backend-impl.h" |
@@ -1566,8 +1567,10 @@ static void ggml_vk_print_gpu_info(size_t idx) { |
1566 | 1567 | vk::PhysicalDeviceProperties2 props2; |
1567 | 1568 | vk::PhysicalDeviceMaintenance3Properties props3; |
1568 | 1569 | vk::PhysicalDeviceSubgroupProperties subgroup_props; |
| 1570 | + vk::PhysicalDeviceDriverProperties driver_props; |
1569 | 1571 | props2.pNext = &props3; |
1570 | 1572 | props3.pNext = &subgroup_props; |
| 1573 | + subgroup_props.pNext = &driver_props; |
1571 | 1574 | physical_device.getProperties2(&props2); |
1572 | 1575 |
|
1573 | 1576 | const size_t subgroup_size = subgroup_props.subgroupSize; |
@@ -1611,7 +1614,7 @@ static void ggml_vk_print_gpu_info(size_t idx) { |
1611 | 1614 | fp16 = fp16 && vk12_features.shaderFloat16; |
1612 | 1615 |
|
1613 | 1616 | std::string device_name = props2.properties.deviceName.data(); |
1614 | | - std::cerr << GGML_VK_NAME << idx << ": " << device_name << " | uma: " << uma << " | fp16: " << fp16 << " | warp size: " << subgroup_size << std::endl; |
| 1617 | + std::cerr << GGML_VK_NAME << idx << ": " << device_name << " (" << driver_props.driverName << ") | uma: " << uma << " | fp16: " << fp16 << " | warp size: " << subgroup_size << std::endl; |
1615 | 1618 |
|
1616 | 1619 | if (props2.properties.deviceType == vk::PhysicalDeviceType::eCpu) { |
1617 | 1620 | std::cerr << "ggml_vulkan: Warning: Device type is CPU. This is probably not the device you want." << std::endl; |
@@ -1707,7 +1710,80 @@ void ggml_vk_instance_init() { |
1707 | 1710 | vk::PhysicalDeviceProperties props = devices[i].getProperties(); |
1708 | 1711 |
|
1709 | 1712 | if (props.deviceType == vk::PhysicalDeviceType::eDiscreteGpu) { |
1710 | | - vk_instance.device_indices.push_back(i); |
| 1713 | + // Check if there are two physical devices corresponding to the same GPU |
| 1714 | + auto old_device = std::find_if( |
| 1715 | + vk_instance.device_indices.begin(), |
| 1716 | + vk_instance.device_indices.end(), |
| 1717 | + [&devices, &props](const size_t k){ return devices[k].getProperties().deviceID == props.deviceID; } |
| 1718 | + ); |
| 1719 | + if (old_device == vk_instance.device_indices.end()) { |
| 1720 | + vk_instance.device_indices.push_back(i); |
| 1721 | + } else { |
| 1722 | + // There can be two physical devices corresponding to the same GPU if there are 2 different drivers |
| 1723 | + // This can cause error when splitting layers aross the devices, need to keep only 1 |
| 1724 | +#ifdef GGML_VULKAN_DEBUG |
| 1725 | + std::cerr << "Device " << i << " and device " << *old_device << " have the same device id" << std::endl; |
| 1726 | +#endif |
| 1727 | + |
| 1728 | + vk::PhysicalDeviceProperties2 old_prop; |
| 1729 | + vk::PhysicalDeviceDriverProperties old_driver; |
| 1730 | + old_prop.pNext = &old_driver; |
| 1731 | + devices[*old_device].getProperties2(&old_prop); |
| 1732 | + |
| 1733 | + vk::PhysicalDeviceProperties2 new_prop; |
| 1734 | + vk::PhysicalDeviceDriverProperties new_driver; |
| 1735 | + new_prop.pNext = &new_driver; |
| 1736 | + devices[i].getProperties2(&new_prop); |
| 1737 | + |
| 1738 | + std::map<vk::DriverId, int> driver_priorities {}; |
| 1739 | + int old_priority = std::numeric_limits<int>::max(); |
| 1740 | + int new_priority = std::numeric_limits<int>::max(); |
| 1741 | + |
| 1742 | + // Check https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkDriverId.html for the list of driver id |
| 1743 | + // Smaller number -> higher priority |
| 1744 | + switch (old_prop.properties.vendorID) { |
| 1745 | + case VK_VENDOR_ID_AMD: |
| 1746 | + driver_priorities[vk::DriverId::eMesaRadv] = 1; |
| 1747 | + driver_priorities[vk::DriverId::eAmdOpenSource] = 2; |
| 1748 | + driver_priorities[vk::DriverId::eAmdProprietary] = 3; |
| 1749 | + break; |
| 1750 | + case VK_VENDOR_ID_INTEL: |
| 1751 | + driver_priorities[vk::DriverId::eIntelOpenSourceMESA] = 1; |
| 1752 | + driver_priorities[vk::DriverId::eIntelProprietaryWindows] = 2; |
| 1753 | + break; |
| 1754 | + case VK_VENDOR_ID_NVIDIA: |
| 1755 | + driver_priorities[vk::DriverId::eNvidiaProprietary] = 1; |
| 1756 | + |
| 1757 | + VK_API_VERSION_MAJOR(VK_API_VERSION_1_3); |
| 1758 | +#if defined(VK_API_VERSION_1_3) && VK_HEADER_VERSION >= 235 |
| 1759 | + driver_priorities[vk::DriverId::eMesaNvk] = 2; |
| 1760 | +#endif |
| 1761 | + break; |
| 1762 | + } |
| 1763 | + |
| 1764 | + if (driver_priorities.count(old_driver.driverID)) { |
| 1765 | + old_priority = driver_priorities[old_driver.driverID]; |
| 1766 | + } |
| 1767 | + if (driver_priorities.count(new_driver.driverID)) { |
| 1768 | + new_priority = driver_priorities[new_driver.driverID]; |
| 1769 | + } |
| 1770 | + |
| 1771 | + if (new_priority < old_priority) { |
| 1772 | + auto r = std::remove(vk_instance.device_indices.begin(), vk_instance.device_indices.end(), *old_device); |
| 1773 | + vk_instance.device_indices.erase(r, vk_instance.device_indices.end()); |
| 1774 | + vk_instance.device_indices.push_back(i); |
| 1775 | + |
| 1776 | +#ifdef GGML_VULKAN_DEBUG |
| 1777 | + std::cerr << "Prioritize device " << i << " driver " << new_driver.driverName << " over device " << *old_device << " driver " << old_driver.driverName << std::endl; |
| 1778 | +#endif |
| 1779 | + } |
| 1780 | +#ifdef GGML_VULKAN_DEBUG |
| 1781 | + else { |
| 1782 | + std::cerr << "Prioritize device " << *old_device << " driver " << old_driver.driverName << " over device " << i << " driver " << new_driver.driverName << std::endl; |
| 1783 | + |
| 1784 | + } |
| 1785 | +#endif |
| 1786 | + } |
1711 | 1787 | } |
1712 | 1788 | } |
1713 | 1789 |
|
|
0 commit comments