abhishekpawar7218 commited on
Commit
c979f49
·
1 Parent(s): 2f9fb17

Update space

Browse files
Files changed (1) hide show
  1. app.py +11 -1
app.py CHANGED
@@ -27,8 +27,18 @@ def load_model():
27
  low_cpu_mem_usage=True
28
  )
29
 
 
 
 
 
 
30
  # Load tokenizer
31
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
 
 
 
 
 
32
 
33
  # Load LoRA adapter
34
  print(f"Loading LoRA adapter: {ADAPTER_MODEL}")
@@ -79,4 +89,4 @@ demo = gr.Interface(
79
  description=f"Running on {'CUDA' if torch.cuda.is_available() else 'CPU'} - May be slow on CPU."
80
  )
81
 
82
- demo.launch()
 
27
  low_cpu_mem_usage=True
28
  )
29
 
30
+ # Resize token embeddings to match the adapter
31
+ if base_model.get_input_embeddings().weight.shape[0] != 128257:
32
+ base_model.resize_token_embeddings(128257)
33
+ print(f"Resized embeddings from {base_model.get_input_embeddings().weight.shape[0]-1} to 128257")
34
+
35
  # Load tokenizer
36
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
37
+
38
+ # Ensure tokenizer has the same vocab size as the model
39
+ if len(tokenizer) != base_model.get_input_embeddings().weight.shape[0]:
40
+ tokenizer.add_special_tokens({'pad_token': '[PAD]'})
41
+ print(f"Added padding token to tokenizer. New size: {len(tokenizer)}")
42
 
43
  # Load LoRA adapter
44
  print(f"Loading LoRA adapter: {ADAPTER_MODEL}")
 
89
  description=f"Running on {'CUDA' if torch.cuda.is_available() else 'CPU'} - May be slow on CPU."
90
  )
91
 
92
+ demo.launch()