Spaces:
Running
Running
vulkan: Find optimal memory type but with fallback (llama/5381)
Browse files- ggml-vulkan.cpp +42 -23
ggml-vulkan.cpp
CHANGED
|
@@ -707,9 +707,21 @@ static void ggml_vk_queue_cleanup(ggml_backend_vk_context * ctx, vk_queue& q) {
|
|
| 707 |
q.cmd_buffer_idx = 0;
|
| 708 |
}
|
| 709 |
|
| 710 |
-
static
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 711 |
#ifdef GGML_VULKAN_DEBUG
|
| 712 |
-
std::cerr << "ggml_vk_create_buffer(" << size << ", " << to_string(req_flags) << ")" << std::endl;
|
| 713 |
#endif
|
| 714 |
vk_buffer buf = std::make_shared<vk_buffer_struct>();
|
| 715 |
|
|
@@ -736,15 +748,15 @@ static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t siz
|
|
| 736 |
|
| 737 |
uint32_t memory_type_index = UINT32_MAX;
|
| 738 |
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
| 745 |
}
|
| 746 |
|
| 747 |
-
if (memory_type_index
|
| 748 |
ctx->device.lock()->device.destroyBuffer(buf->buffer);
|
| 749 |
buf->size = 0;
|
| 750 |
throw vk::OutOfDeviceMemoryError("No suitable memory type found");
|
|
@@ -758,10 +770,9 @@ static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t siz
|
|
| 758 |
buf->size = 0;
|
| 759 |
throw e;
|
| 760 |
}
|
| 761 |
-
buf->memory_property_flags = req_flags;
|
| 762 |
buf->ptr = nullptr;
|
| 763 |
|
| 764 |
-
if (
|
| 765 |
buf->ptr = ctx->device.lock()->device.mapMemory(buf->device_memory, 0, VK_WHOLE_SIZE);
|
| 766 |
}
|
| 767 |
|
|
@@ -778,9 +789,9 @@ static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t siz
|
|
| 778 |
return buf;
|
| 779 |
}
|
| 780 |
|
| 781 |
-
static vk_buffer ggml_vk_create_buffer_check(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags) {
|
| 782 |
try {
|
| 783 |
-
return ggml_vk_create_buffer(ctx, size, req_flags);
|
| 784 |
} catch (const vk::SystemError& e) {
|
| 785 |
std::cerr << "ggml_vulkan: Memory allocation of size " << size << " failed." << std::endl;
|
| 786 |
std::cerr << "ggml_vulkan: " << e.what() << std::endl;
|
|
@@ -791,16 +802,16 @@ static vk_buffer ggml_vk_create_buffer_check(ggml_backend_vk_context * ctx, size
|
|
| 791 |
static vk_buffer ggml_vk_create_buffer_device(ggml_backend_vk_context * ctx, size_t size) {
|
| 792 |
vk_buffer buf;
|
| 793 |
try {
|
| 794 |
-
buf = ggml_vk_create_buffer(ctx, size, vk::MemoryPropertyFlagBits::eDeviceLocal);
|
| 795 |
-
} catch (const vk::SystemError& e) {
|
| 796 |
if (ctx->device.lock()->uma) {
|
| 797 |
// Fall back to host memory type
|
| 798 |
-
buf =
|
| 799 |
} else {
|
| 800 |
-
|
| 801 |
-
std::cerr << "ggml_vulkan: " << e.what() << std::endl;
|
| 802 |
-
throw e;
|
| 803 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 804 |
}
|
| 805 |
|
| 806 |
return buf;
|
|
@@ -1422,7 +1433,9 @@ static void * ggml_vk_host_malloc(ggml_backend_vk_context * ctx, size_t size) {
|
|
| 1422 |
#ifdef GGML_VULKAN_DEBUG
|
| 1423 |
std::cerr << "ggml_vk_host_malloc(" << size << ")" << std::endl;
|
| 1424 |
#endif
|
| 1425 |
-
vk_buffer buf = ggml_vk_create_buffer(ctx, size,
|
|
|
|
|
|
|
| 1426 |
|
| 1427 |
if(!(buf->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible)) {
|
| 1428 |
fprintf(stderr, "WARNING: failed to allocate %.2f MB of pinned memory\n",
|
|
@@ -1568,7 +1581,9 @@ static void deferred_memcpy(void * dst, const void * src, size_t size, std::vect
|
|
| 1568 |
static void ggml_vk_ensure_sync_staging_buffer(ggml_backend_vk_context * ctx, size_t size) {
|
| 1569 |
if (ctx->sync_staging == nullptr || ctx->sync_staging->size < size) {
|
| 1570 |
ggml_vk_destroy_buffer(ctx->sync_staging);
|
| 1571 |
-
ctx->sync_staging = ggml_vk_create_buffer_check(ctx, size,
|
|
|
|
|
|
|
| 1572 |
}
|
| 1573 |
}
|
| 1574 |
|
|
@@ -4082,7 +4097,9 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
|
|
| 4082 |
std::cerr << "ggml_vk_preallocate_buffers(qx_size: " << ctx->prealloc_size_qx << " qy_size: " << ctx->prealloc_size_qy << " x_size: " << ctx->prealloc_size_x << " y_size: " << ctx->prealloc_size_y << " split_k_size: " << ctx->prealloc_size_split_k << ")" << std::endl;
|
| 4083 |
#endif
|
| 4084 |
#if defined(GGML_VULKAN_RUN_TESTS)
|
| 4085 |
-
ctx->staging = ggml_vk_create_buffer_check(ctx, 100ul * 1024ul * 1024ul,
|
|
|
|
|
|
|
| 4086 |
ggml_vk_test_transfer(ctx, 8192 * 1000, false);
|
| 4087 |
ggml_vk_test_transfer(ctx, 8192 * 1000, true);
|
| 4088 |
|
|
@@ -4174,7 +4191,9 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
|
|
| 4174 |
if (ctx->staging != nullptr) {
|
| 4175 |
ggml_vk_destroy_buffer(ctx->staging);
|
| 4176 |
}
|
| 4177 |
-
ctx->staging = ggml_vk_create_buffer_check(ctx, ctx->staging_size,
|
|
|
|
|
|
|
| 4178 |
}
|
| 4179 |
}
|
| 4180 |
|
|
|
|
| 707 |
q.cmd_buffer_idx = 0;
|
| 708 |
}
|
| 709 |
|
| 710 |
+
static uint32_t find_properties(const vk::PhysicalDeviceMemoryProperties* mem_props, vk::MemoryRequirements* mem_req, vk::MemoryPropertyFlags flags) {
|
| 711 |
+
for (uint32_t i = 0; i < mem_props->memoryTypeCount; ++i) {
|
| 712 |
+
vk::MemoryType memory_type = mem_props->memoryTypes[i];
|
| 713 |
+
if ((mem_req->memoryTypeBits & ((uint64_t)1 << i)) &&
|
| 714 |
+
(flags & memory_type.propertyFlags) == flags &&
|
| 715 |
+
mem_props->memoryHeaps[memory_type.heapIndex].size >= mem_req->size) {
|
| 716 |
+
return static_cast<int32_t>(i);
|
| 717 |
+
}
|
| 718 |
+
}
|
| 719 |
+
return UINT32_MAX;
|
| 720 |
+
}
|
| 721 |
+
|
| 722 |
+
static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags, vk::MemoryPropertyFlags fallback_flags = vk::MemoryPropertyFlags(0)) {
|
| 723 |
#ifdef GGML_VULKAN_DEBUG
|
| 724 |
+
std::cerr << "ggml_vk_create_buffer(" << size << ", " << to_string(req_flags) << ", " << to_string(fallback_flags) << ")" << std::endl;
|
| 725 |
#endif
|
| 726 |
vk_buffer buf = std::make_shared<vk_buffer_struct>();
|
| 727 |
|
|
|
|
| 748 |
|
| 749 |
uint32_t memory_type_index = UINT32_MAX;
|
| 750 |
|
| 751 |
+
memory_type_index = find_properties(&mem_props, &mem_req, req_flags);
|
| 752 |
+
buf->memory_property_flags = req_flags;
|
| 753 |
+
|
| 754 |
+
if (memory_type_index == UINT32_MAX && fallback_flags) {
|
| 755 |
+
memory_type_index = find_properties(&mem_props, &mem_req, fallback_flags);
|
| 756 |
+
buf->memory_property_flags = fallback_flags;
|
| 757 |
}
|
| 758 |
|
| 759 |
+
if (memory_type_index == UINT32_MAX) {
|
| 760 |
ctx->device.lock()->device.destroyBuffer(buf->buffer);
|
| 761 |
buf->size = 0;
|
| 762 |
throw vk::OutOfDeviceMemoryError("No suitable memory type found");
|
|
|
|
| 770 |
buf->size = 0;
|
| 771 |
throw e;
|
| 772 |
}
|
|
|
|
| 773 |
buf->ptr = nullptr;
|
| 774 |
|
| 775 |
+
if (buf->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
|
| 776 |
buf->ptr = ctx->device.lock()->device.mapMemory(buf->device_memory, 0, VK_WHOLE_SIZE);
|
| 777 |
}
|
| 778 |
|
|
|
|
| 789 |
return buf;
|
| 790 |
}
|
| 791 |
|
| 792 |
+
static vk_buffer ggml_vk_create_buffer_check(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags, vk::MemoryPropertyFlags fallback_flags = vk::MemoryPropertyFlags(0)) {
|
| 793 |
try {
|
| 794 |
+
return ggml_vk_create_buffer(ctx, size, req_flags, fallback_flags);
|
| 795 |
} catch (const vk::SystemError& e) {
|
| 796 |
std::cerr << "ggml_vulkan: Memory allocation of size " << size << " failed." << std::endl;
|
| 797 |
std::cerr << "ggml_vulkan: " << e.what() << std::endl;
|
|
|
|
| 802 |
static vk_buffer ggml_vk_create_buffer_device(ggml_backend_vk_context * ctx, size_t size) {
|
| 803 |
vk_buffer buf;
|
| 804 |
try {
|
|
|
|
|
|
|
| 805 |
if (ctx->device.lock()->uma) {
|
| 806 |
// Fall back to host memory type
|
| 807 |
+
buf = ggml_vk_create_buffer(ctx, size, vk::MemoryPropertyFlagBits::eDeviceLocal, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
|
| 808 |
} else {
|
| 809 |
+
buf = ggml_vk_create_buffer(ctx, size, vk::MemoryPropertyFlagBits::eDeviceLocal);
|
|
|
|
|
|
|
| 810 |
}
|
| 811 |
+
} catch (const vk::SystemError& e) {
|
| 812 |
+
std::cerr << "ggml_vulkan: Device memory allocation of size " << size << " failed." << std::endl;
|
| 813 |
+
std::cerr << "ggml_vulkan: " << e.what() << std::endl;
|
| 814 |
+
throw e;
|
| 815 |
}
|
| 816 |
|
| 817 |
return buf;
|
|
|
|
| 1433 |
#ifdef GGML_VULKAN_DEBUG
|
| 1434 |
std::cerr << "ggml_vk_host_malloc(" << size << ")" << std::endl;
|
| 1435 |
#endif
|
| 1436 |
+
vk_buffer buf = ggml_vk_create_buffer(ctx, size,
|
| 1437 |
+
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached,
|
| 1438 |
+
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
|
| 1439 |
|
| 1440 |
if(!(buf->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible)) {
|
| 1441 |
fprintf(stderr, "WARNING: failed to allocate %.2f MB of pinned memory\n",
|
|
|
|
| 1581 |
static void ggml_vk_ensure_sync_staging_buffer(ggml_backend_vk_context * ctx, size_t size) {
|
| 1582 |
if (ctx->sync_staging == nullptr || ctx->sync_staging->size < size) {
|
| 1583 |
ggml_vk_destroy_buffer(ctx->sync_staging);
|
| 1584 |
+
ctx->sync_staging = ggml_vk_create_buffer_check(ctx, size,
|
| 1585 |
+
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached,
|
| 1586 |
+
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
|
| 1587 |
}
|
| 1588 |
}
|
| 1589 |
|
|
|
|
| 4097 |
std::cerr << "ggml_vk_preallocate_buffers(qx_size: " << ctx->prealloc_size_qx << " qy_size: " << ctx->prealloc_size_qy << " x_size: " << ctx->prealloc_size_x << " y_size: " << ctx->prealloc_size_y << " split_k_size: " << ctx->prealloc_size_split_k << ")" << std::endl;
|
| 4098 |
#endif
|
| 4099 |
#if defined(GGML_VULKAN_RUN_TESTS)
|
| 4100 |
+
ctx->staging = ggml_vk_create_buffer_check(ctx, 100ul * 1024ul * 1024ul,
|
| 4101 |
+
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached
|
| 4102 |
+
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
|
| 4103 |
ggml_vk_test_transfer(ctx, 8192 * 1000, false);
|
| 4104 |
ggml_vk_test_transfer(ctx, 8192 * 1000, true);
|
| 4105 |
|
|
|
|
| 4191 |
if (ctx->staging != nullptr) {
|
| 4192 |
ggml_vk_destroy_buffer(ctx->staging);
|
| 4193 |
}
|
| 4194 |
+
ctx->staging = ggml_vk_create_buffer_check(ctx, ctx->staging_size,
|
| 4195 |
+
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached,
|
| 4196 |
+
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
|
| 4197 |
}
|
| 4198 |
}
|
| 4199 |
|