dnakov commited on
Commit
c5e3db2
·
verified ·
1 Parent(s): 288ea71

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. nanochat_tokenizer.py +12 -2
nanochat_tokenizer.py CHANGED
@@ -32,16 +32,26 @@ class NanochatTokenizer(PreTrainedTokenizer):
32
  if os.path.isfile(local_file):
33
  vocab_file = local_file
34
  else:
 
 
 
 
 
 
 
 
 
 
35
  # Download from HF Hub
36
  try:
37
  vocab_file = hf_hub_download(
38
- repo_id=self.name_or_path,
39
  filename="tokenizer.pkl",
40
  repo_type="model"
41
  )
42
  except Exception as e:
43
  raise ValueError(
44
- f"Could not find or download tokenizer.pkl for {self.name_or_path}: {e}"
45
  )
46
 
47
  if vocab_file is None or not os.path.isfile(vocab_file):
 
32
  if os.path.isfile(local_file):
33
  vocab_file = local_file
34
  else:
35
+ # Extract repo ID from cache path if needed
36
+ repo_id = self.name_or_path
37
+ if "models--" in str(repo_id):
38
+ # Cache path format: .../models--namespace--repo_name/snapshots/...
39
+ parts = str(repo_id).split("models--")
40
+ if len(parts) > 1:
41
+ # Get the models--namespace--repo_name part
42
+ repo_part = parts[1].split("/")[0]
43
+ repo_id = repo_part.replace("--", "/")
44
+
45
  # Download from HF Hub
46
  try:
47
  vocab_file = hf_hub_download(
48
+ repo_id=repo_id,
49
  filename="tokenizer.pkl",
50
  repo_type="model"
51
  )
52
  except Exception as e:
53
  raise ValueError(
54
+ f"Could not find or download tokenizer.pkl for {repo_id}: {e}"
55
  )
56
 
57
  if vocab_file is None or not os.path.isfile(vocab_file):