From 5919014be88699818d9ccf03ffb5626243e39beb Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Wed, 11 Jun 2025 17:51:17 -0300 Subject: [PATCH 1/4] add backend support for changing the VAE tiling threshold --- expose.h | 2 +- koboldcpp.py | 4 ++-- otherarch/sdcpp/sdtype_adapter.cpp | 17 ++++++++++++++--- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/expose.h b/expose.h index c9d59d25f88e..25527ed1c449 100644 --- a/expose.h +++ b/expose.h @@ -162,7 +162,7 @@ struct sd_load_model_inputs const int threads = 0; const int quant = 0; const bool taesd = false; - const bool notile = false; + const int tiled_vae_threshold = 0; const char * t5xxl_filename = nullptr; const char * clipl_filename = nullptr; const char * clipg_filename = nullptr; diff --git a/koboldcpp.py b/koboldcpp.py index d8fc1c6d6a0a..4328d40df982 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -272,7 +272,7 @@ class sd_load_model_inputs(ctypes.Structure): ("threads", ctypes.c_int), ("quant", ctypes.c_int), ("taesd", ctypes.c_bool), - ("notile", ctypes.c_bool), + ("tiled_vae_threshold", ctypes.c_int), ("t5xxl_filename", ctypes.c_char_p), ("clipl_filename", ctypes.c_char_p), ("clipg_filename", ctypes.c_char_p), @@ -1549,7 +1549,7 @@ def sd_load_model(model_filename,vae_filename,lora_filename,t5xxl_filename,clipl inputs.threads = thds inputs.quant = quant inputs.taesd = True if args.sdvaeauto else False - inputs.notile = True if args.sdnotile else False + inputs.tiled_vae_threshold = -1 if args.sdnotile else 0 inputs.vae_filename = vae_filename.encode("UTF-8") inputs.lora_filename = lora_filename.encode("UTF-8") inputs.lora_multiplier = args.sdloramult diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp index cfec7efc85ce..bce195e6c930 100644 --- a/otherarch/sdcpp/sdtype_adapter.cpp +++ b/otherarch/sdcpp/sdtype_adapter.cpp @@ -119,7 +119,7 @@ static uint8_t * input_mask_buffer = NULL; static uint8_t * input_photomaker_buffer = NULL; static std::string sdplatformenv, sddeviceenv, sdvulkandeviceenv; -static bool notiling = false; +static int cfg_tiled_vae_threshold = 0; static int cfg_square_limit = 0; static int cfg_side_limit = 0; static bool sd_is_quiet = false; @@ -136,7 +136,7 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) { std::string clipl_filename = inputs.clipl_filename; std::string clipg_filename = inputs.clipg_filename; std::string photomaker_filename = inputs.photomaker_filename; - notiling = inputs.notile; + cfg_tiled_vae_threshold = inputs.tiled_vae_threshold; cfg_side_limit = inputs.img_hard_limit; cfg_square_limit = inputs.img_soft_limit; printf("\nImageGen Init - Load Model: %s\n",inputs.model_filename); @@ -482,7 +482,18 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) printf("\nKCPP SD: Requested dimensions %dx%d changed to %dx%d\n", inputs.width, inputs.height, sd_params->width, sd_params->height); } - bool dotile = (sd_params->width>768 || sd_params->height>768) && !notiling; + int tiled_vae_threshold; + if (cfg_tiled_vae_threshold == 0) { + tiled_vae_threshold = 768; + } else { + if (cfg_tiled_vae_threshold > 0) { + tiled_vae_threshold = cfg_tiled_vae_threshold; + } else { + tiled_vae_threshold = 8192; // effectively avoids tiling + } + } + + bool dotile = (sd_params->width>tiled_vae_threshold || sd_params->height>tiled_vae_threshold); set_sd_vae_tiling(sd_ctx,dotile); //changes vae tiling, prevents memory related crash/oom if (sd_params->clip_skip <= 0) { From 496e5f2038a53ba4f552eaabbe7155bb0d5aad5e Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Wed, 11 Jun 2025 18:52:46 -0300 Subject: [PATCH 2/4] trigger VAE tiling by image area instead of dimensions I've tested with GGML_VULKAN_MEMORY_DEBUG all resolutions with the same 768x768 area (even extremes like 64x9216), and many below that: all consistently allocate 6656 bytes per image pixel. As tiling is primarily useful to avoid excessive memory usage, it seems reasonable to enable VAE tiling based on area rather than maximum image side. However, as there is currently no user interface option to change it back to a lower value, it's best to maintain the default behavior for now. --- otherarch/sdcpp/sdtype_adapter.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp index bce195e6c930..50b3621126fd 100644 --- a/otherarch/sdcpp/sdtype_adapter.cpp +++ b/otherarch/sdcpp/sdtype_adapter.cpp @@ -483,17 +483,24 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) } int tiled_vae_threshold; + bool dotile; if (cfg_tiled_vae_threshold == 0) { + // Legacy behavior: limit by image dimensions tiled_vae_threshold = 768; + dotile = (sd_params->width>tiled_vae_threshold || sd_params->height>tiled_vae_threshold); } else { if (cfg_tiled_vae_threshold > 0) { tiled_vae_threshold = cfg_tiled_vae_threshold; } else { tiled_vae_threshold = 8192; // effectively avoids tiling } + // when explicitely set, limit by image area + // on the Vulkan backend, typically limited at 4G for a single allocation, + // the memory used for the VAE buffer is 6656 bytes per image pixel; a + // 768x768 square image (and all resolutions with the same area) stays a + // bit below that limit, at 3.66G. + dotile = (sd_params->width*sd_params->height > tiled_vae_threshold*tiled_vae_threshold); } - - bool dotile = (sd_params->width>tiled_vae_threshold || sd_params->height>tiled_vae_threshold); set_sd_vae_tiling(sd_ctx,dotile); //changes vae tiling, prevents memory related crash/oom if (sd_params->clip_skip <= 0) { From 18b24aef23f0dc8b8e168d2a8337013214a4adca Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Fri, 20 Jun 2025 10:06:47 -0300 Subject: [PATCH 3/4] replace the notile option with a configurable threshold This allows selecting a lower threshold value, reducing the peak memory usage. The legacy sdnotile parameter gets automatically converted to the new parameter, if it's the only one supplied. --- koboldcpp.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 4328d40df982..477017f8f28f 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -1549,7 +1549,7 @@ def sd_load_model(model_filename,vae_filename,lora_filename,t5xxl_filename,clipl inputs.threads = thds inputs.quant = quant inputs.taesd = True if args.sdvaeauto else False - inputs.tiled_vae_threshold = -1 if args.sdnotile else 0 + inputs.tiled_vae_threshold = args.sdtiledvae inputs.vae_filename = vae_filename.encode("UTF-8") inputs.lora_filename = lora_filename.encode("UTF-8") inputs.lora_multiplier = args.sdloramult @@ -4303,7 +4303,7 @@ def hide_tooltip(event): sd_clipg_var = ctk.StringVar() sd_photomaker_var = ctk.StringVar() sd_vaeauto_var = ctk.IntVar(value=0) - sd_notile_var = ctk.IntVar(value=0) + sd_tiled_vae_var = ctk.StringVar(value="") sd_clamped_var = ctk.StringVar(value="0") sd_clamped_soft_var = ctk.StringVar(value="0") sd_threads_var = ctk.StringVar(value=str(default_threads)) @@ -5033,7 +5033,7 @@ def toggletaesd(a,b,c): sdvaeitem2.grid() sdvaeitem3.grid() makecheckbox(images_tab, "Use TAE SD (AutoFix Broken VAE)", sd_vaeauto_var, 42,command=toggletaesd,tooltiptxt="Replace VAE with TAESD. May fix bad VAE.") - makecheckbox(images_tab, "No VAE Tiling", sd_notile_var, 44,tooltiptxt="Disables VAE tiling, may not work for large images.") + makelabelentry(images_tab, "VAE Tiling Above:", sd_tiled_vae_var, 44, 50, padx=190,singleline=True,tooltip="Enable VAE Tiling for images above this size, to save memory.\n-1 disables tiling for all images. Leave at 0 for the default limit (768).") # audio tab audio_tab = tabcontent["Audio"] @@ -5266,7 +5266,7 @@ def export_vars(): args.sdthreads = (0 if sd_threads_var.get()=="" else int(sd_threads_var.get())) args.sdclamped = (0 if int(sd_clamped_var.get())<=0 else int(sd_clamped_var.get())) args.sdclampedsoft = (0 if int(sd_clamped_soft_var.get())<=0 else int(sd_clamped_soft_var.get())) - args.sdnotile = (True if sd_notile_var.get()==1 else False) + args.sdtiledvae = (0 if sd_tiled_vae_var.get()=="" else int(sd_tiled_vae_var.get())) if sd_vaeauto_var.get()==1: args.sdvaeauto = True args.sdvae = "" @@ -5488,7 +5488,8 @@ def import_vars(dict): sd_clipg_var.set(dict["sdclipg"] if ("sdclipg" in dict and dict["sdclipg"]) else "") sd_photomaker_var.set(dict["sdphotomaker"] if ("sdphotomaker" in dict and dict["sdphotomaker"]) else "") sd_vaeauto_var.set(1 if ("sdvaeauto" in dict and dict["sdvaeauto"]) else 0) - sd_notile_var.set(1 if ("sdnotile" in dict and dict["sdnotile"]) else 0) + sd_tiled_vae_var.set(int(dict["sdtiledvae"]) if ("sdtiledvae" in dict and dict["sdtiledvae"]) else 0) + sd_lora_var.set(dict["sdlora"] if ("sdlora" in dict and dict["sdlora"]) else "") sd_loramult_var.set(str(dict["sdloramult"]) if ("sdloramult" in dict and dict["sdloramult"]) else "1.0") @@ -5856,6 +5857,8 @@ def convert_invalid_args(args): dict["model_param"] = model_value elif isinstance(model_value, list) and model_value: # Non-empty list dict["model_param"] = model_value[0] # Take the first file in the list + if "sdnotile" in dict and ("sdtiledvae" not in dict or dict["sdtiledvae"] == 0): + dict["sdtiledvae"] = (-1 if dict["sdnotile"] else 0) # convert legacy option return args def setuptunnel(global_memory, has_sd): @@ -7269,8 +7272,8 @@ def range_checker(arg: str): sdparsergrouplora.add_argument("--sdquant", help="If specified, loads the model quantized to save memory.", action='store_true') sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify an image generation LORA safetensors model to be applied.", default="") sdparsergroup.add_argument("--sdloramult", metavar=('[amount]'), help="Multiplier for the image LORA model to be applied.", type=float, default=1.0) - sdparsergroup.add_argument("--sdnotile", help="Disables VAE tiling, may not work for large images.", action='store_true') - + sdparsergroup.add_argument("--sdtiledvae", metavar=('[maxres]'), help="Adjust the automatic VAE tiling trigger for images above this size. 1 will always use tiling, -1 disables it for all images; 0 or unspecified uses the default 768", type=int, default=0) + sdparsergroup.add_argument("--sdnotile", help=argparse.SUPPRESS, action='store_true') # legacy option, see sdtiledvae whisperparsergroup = parser.add_argument_group('Whisper Transcription Commands') whisperparsergroup.add_argument("--whispermodel", metavar=('[filename]'), help="Specify a Whisper .bin model to enable Speech-To-Text transcription.", default="") From 830ae981097d370c8bdf5c16f382ff7448abc86e Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 21 Jun 2025 09:48:54 +0800 Subject: [PATCH 4/4] simplify tiling checks, 768 default visible in launcher --- koboldcpp.py | 17 +++++++++-------- otherarch/sdcpp/sdtype_adapter.cpp | 23 ++++------------------- 2 files changed, 13 insertions(+), 27 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 5eab08f042bd..8ac5de215138 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -52,6 +52,7 @@ default_visionmaxres = 1024 net_save_slots = 10 savestate_limit = 3 #3 savestate slots +default_vae_tile_threshold = 768 # abuse prevention stop_token_max = 256 @@ -4303,7 +4304,7 @@ def hide_tooltip(event): sd_clipg_var = ctk.StringVar() sd_photomaker_var = ctk.StringVar() sd_vaeauto_var = ctk.IntVar(value=0) - sd_tiled_vae_var = ctk.StringVar(value="") + sd_tiled_vae_var = ctk.StringVar(value=str(default_vae_tile_threshold)) sd_clamped_var = ctk.StringVar(value="0") sd_clamped_soft_var = ctk.StringVar(value="0") sd_threads_var = ctk.StringVar(value=str(default_threads)) @@ -5033,7 +5034,7 @@ def toggletaesd(a,b,c): sdvaeitem2.grid() sdvaeitem3.grid() makecheckbox(images_tab, "Use TAE SD (AutoFix Broken VAE)", sd_vaeauto_var, 42,command=toggletaesd,tooltiptxt="Replace VAE with TAESD. May fix bad VAE.") - makelabelentry(images_tab, "VAE Tiling Above:", sd_tiled_vae_var, 44, 50, padx=190,singleline=True,tooltip="Enable VAE Tiling for images above this size, to save memory.\n-1 disables tiling for all images. Leave at 0 for the default limit (768).") + makelabelentry(images_tab, "VAE Tiling Threshold:", sd_tiled_vae_var, 44, 50, padx=144,singleline=True,tooltip="Enable VAE Tiling for images above this size, to save memory.\nSet to 0 to disable VAE tiling.") # audio tab audio_tab = tabcontent["Audio"] @@ -5266,7 +5267,7 @@ def export_vars(): args.sdthreads = (0 if sd_threads_var.get()=="" else int(sd_threads_var.get())) args.sdclamped = (0 if int(sd_clamped_var.get())<=0 else int(sd_clamped_var.get())) args.sdclampedsoft = (0 if int(sd_clamped_soft_var.get())<=0 else int(sd_clamped_soft_var.get())) - args.sdtiledvae = (0 if sd_tiled_vae_var.get()=="" else int(sd_tiled_vae_var.get())) + args.sdtiledvae = (default_vae_tile_threshold if sd_tiled_vae_var.get()=="" else int(sd_tiled_vae_var.get())) if sd_vaeauto_var.get()==1: args.sdvaeauto = True args.sdvae = "" @@ -5488,7 +5489,7 @@ def import_vars(dict): sd_clipg_var.set(dict["sdclipg"] if ("sdclipg" in dict and dict["sdclipg"]) else "") sd_photomaker_var.set(dict["sdphotomaker"] if ("sdphotomaker" in dict and dict["sdphotomaker"]) else "") sd_vaeauto_var.set(1 if ("sdvaeauto" in dict and dict["sdvaeauto"]) else 0) - sd_tiled_vae_var.set(int(dict["sdtiledvae"]) if ("sdtiledvae" in dict and dict["sdtiledvae"]) else 0) + sd_tiled_vae_var.set(str(dict["sdtiledvae"]) if ("sdtiledvae" in dict and dict["sdtiledvae"]) else str(default_vae_tile_threshold)) sd_lora_var.set(dict["sdlora"] if ("sdlora" in dict and dict["sdlora"]) else "") sd_loramult_var.set(str(dict["sdloramult"]) if ("sdloramult" in dict and dict["sdloramult"]) else "1.0") @@ -5857,8 +5858,8 @@ def convert_invalid_args(args): dict["model_param"] = model_value elif isinstance(model_value, list) and model_value: # Non-empty list dict["model_param"] = model_value[0] # Take the first file in the list - if "sdnotile" in dict and ("sdtiledvae" not in dict or dict["sdtiledvae"] == 0): - dict["sdtiledvae"] = (-1 if dict["sdnotile"] else 0) # convert legacy option + if "sdnotile" in dict and "sdtiledvae" not in dict: + dict["sdtiledvae"] = (0 if (dict["sdnotile"]) else default_vae_tile_threshold) # convert legacy option return args def setuptunnel(global_memory, has_sd): @@ -7272,8 +7273,7 @@ def range_checker(arg: str): sdparsergrouplora.add_argument("--sdquant", help="If specified, loads the model quantized to save memory.", action='store_true') sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify an image generation LORA safetensors model to be applied.", default="") sdparsergroup.add_argument("--sdloramult", metavar=('[amount]'), help="Multiplier for the image LORA model to be applied.", type=float, default=1.0) - sdparsergroup.add_argument("--sdtiledvae", metavar=('[maxres]'), help="Adjust the automatic VAE tiling trigger for images above this size. 1 will always use tiling, -1 disables it for all images; 0 or unspecified uses the default 768", type=int, default=0) - sdparsergroup.add_argument("--sdnotile", help=argparse.SUPPRESS, action='store_true') # legacy option, see sdtiledvae + sdparsergroup.add_argument("--sdtiledvae", metavar=('[maxres]'), help="Adjust the automatic VAE tiling trigger for images above this size. 0 disables vae tiling.", type=int, default=default_vae_tile_threshold) whisperparsergroup = parser.add_argument_group('Whisper Transcription Commands') whisperparsergroup.add_argument("--whispermodel", metavar=('[filename]'), help="Specify a Whisper .bin model to enable Speech-To-Text transcription.", default="") @@ -7299,5 +7299,6 @@ def range_checker(arg: str): deprecatedgroup.add_argument("--sdconfig", help=argparse.SUPPRESS, nargs='+') compatgroup.add_argument("--noblas", help=argparse.SUPPRESS, action='store_true') compatgroup3.add_argument("--nommap", help=argparse.SUPPRESS, action='store_true') + deprecatedgroup.add_argument("--sdnotile", help=argparse.SUPPRESS, action='store_true') # legacy option, see sdtiledvae main(launch_args=parser.parse_args(),default_args=parser.parse_args([])) diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp index 50b3621126fd..49ed84ca8b2e 100644 --- a/otherarch/sdcpp/sdtype_adapter.cpp +++ b/otherarch/sdcpp/sdtype_adapter.cpp @@ -137,6 +137,8 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) { std::string clipg_filename = inputs.clipg_filename; std::string photomaker_filename = inputs.photomaker_filename; cfg_tiled_vae_threshold = inputs.tiled_vae_threshold; + cfg_tiled_vae_threshold = (cfg_tiled_vae_threshold > 8192 ? 8192 : cfg_tiled_vae_threshold); + cfg_tiled_vae_threshold = (cfg_tiled_vae_threshold <= 0 ? 8192 : cfg_tiled_vae_threshold); //if negative dont tile cfg_side_limit = inputs.img_hard_limit; cfg_square_limit = inputs.img_soft_limit; printf("\nImageGen Init - Load Model: %s\n",inputs.model_filename); @@ -482,25 +484,8 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) printf("\nKCPP SD: Requested dimensions %dx%d changed to %dx%d\n", inputs.width, inputs.height, sd_params->width, sd_params->height); } - int tiled_vae_threshold; - bool dotile; - if (cfg_tiled_vae_threshold == 0) { - // Legacy behavior: limit by image dimensions - tiled_vae_threshold = 768; - dotile = (sd_params->width>tiled_vae_threshold || sd_params->height>tiled_vae_threshold); - } else { - if (cfg_tiled_vae_threshold > 0) { - tiled_vae_threshold = cfg_tiled_vae_threshold; - } else { - tiled_vae_threshold = 8192; // effectively avoids tiling - } - // when explicitely set, limit by image area - // on the Vulkan backend, typically limited at 4G for a single allocation, - // the memory used for the VAE buffer is 6656 bytes per image pixel; a - // 768x768 square image (and all resolutions with the same area) stays a - // bit below that limit, at 3.66G. - dotile = (sd_params->width*sd_params->height > tiled_vae_threshold*tiled_vae_threshold); - } + // trigger tiling by image area, the memory used for the VAE buffer is 6656 bytes per image pixel, default 768x768 + bool dotile = (sd_params->width*sd_params->height > cfg_tiled_vae_threshold*cfg_tiled_vae_threshold); set_sd_vae_tiling(sd_ctx,dotile); //changes vae tiling, prevents memory related crash/oom if (sd_params->clip_skip <= 0) {