multimodalart HF Staff commited on
Commit
8396596
·
verified ·
1 Parent(s): 3f08dd1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -90,25 +90,24 @@ pipeline = DistilledPipeline(
90
  quantization=QuantizationPolicy.fp8_cast(),
91
  )
92
 
93
- # Preload all models so first request is fast.
94
- # On ZeroGPU, .to('cuda') is intercepted and actual GPU allocation
95
- # happens inside the @spaces.GPU decorated function.
96
- print("Preloading models...")
 
97
  ledger = pipeline.model_ledger
98
- _transformer = ledger.transformer()
99
  _video_encoder = ledger.video_encoder()
100
  _video_decoder = ledger.video_decoder()
101
  _audio_decoder = ledger.audio_decoder()
102
  _vocoder = ledger.vocoder()
103
  _spatial_upsampler = ledger.spatial_upsampler()
104
 
105
- ledger.transformer = lambda: _transformer
106
  ledger.video_encoder = lambda: _video_encoder
107
  ledger.video_decoder = lambda: _video_decoder
108
  ledger.audio_decoder = lambda: _audio_decoder
109
  ledger.vocoder = lambda: _vocoder
110
  ledger.spatial_upsampler = lambda: _spatial_upsampler
111
- print("All models preloaded!")
112
 
113
  # Connect to text encoder space
114
  print(f"Connecting to text encoder space: {TEXT_ENCODER_SPACE}")
 
90
  quantization=QuantizationPolicy.fp8_cast(),
91
  )
92
 
93
+ # Preload small models for ZeroGPU tensor packing.
94
+ # DO NOT preload the transformer (~20GB) the pipeline needs to load/unload
95
+ # it between stages (FP8 upcast doubles it to ~44GB during forward pass).
96
+ # Keeping it cached prevents cleanup_memory() from freeing it.
97
+ print("Preloading small models...")
98
  ledger = pipeline.model_ledger
 
99
  _video_encoder = ledger.video_encoder()
100
  _video_decoder = ledger.video_decoder()
101
  _audio_decoder = ledger.audio_decoder()
102
  _vocoder = ledger.vocoder()
103
  _spatial_upsampler = ledger.spatial_upsampler()
104
 
 
105
  ledger.video_encoder = lambda: _video_encoder
106
  ledger.video_decoder = lambda: _video_decoder
107
  ledger.audio_decoder = lambda: _audio_decoder
108
  ledger.vocoder = lambda: _vocoder
109
  ledger.spatial_upsampler = lambda: _spatial_upsampler
110
+ print("Small models preloaded! (transformer loads on demand per stage)")
111
 
112
  # Connect to text encoder space
113
  print(f"Connecting to text encoder space: {TEXT_ENCODER_SPACE}")