NataliaH commited on
Commit
3674769
·
verified ·
1 Parent(s): 670726c

Initial upload of tiny GPT-2 model

Browse files
Files changed (6) hide show
  1. README.md +47 -32
  2. config.json +1 -39
  3. pytorch_model.bin +2 -2
  4. special_tokens_map.json +21 -4
  5. tokenizer_config.json +3 -1
  6. vocab.json +0 -0
README.md CHANGED
@@ -1,36 +1,51 @@
1
-
2
  ---
3
  tags:
4
- - language-model
5
- - transformer-decoder
6
- - tiny-shakespeare
 
7
  license: mit
8
  datasets:
9
- - tiny_shakespeare
10
- model_description: |
11
- This is a small autoregressive language model based on the Transformer architecture trained on the Tiny Shakespeare dataset.
12
-
13
- ## Model Description
14
- The model is a custom implementation of a TransformerDecoderModel, which uses a decoder-only architecture similar to GPT-2.
15
- It was trained on the Tiny Shakespeare dataset to generate text in the style of William Shakespeare.
16
-
17
- ## Training Details
18
- The model was trained and tracked using [Weights & Biases](https://wandb.ai/honcharova-de-hannover/LanguageModel_Project?nw=nwuserhoncharovade).
19
-
20
- ## How to Use
21
- To generate text with this model, you can load it and the tokenizer as follows:
22
-
23
- ```python
24
- from transformers import AutoTokenizer
25
- from transformers import GPT2LMHeadModel
26
-
27
- # Load the model and tokenizer
28
- model = GPT2LMHeadModel.from_pretrained('NataliaH/gpt2-tiny-shakespeare')
29
- tokenizer = AutoTokenizer.from_pretrained('NataliaH/gpt2-tiny-shakespeare')
30
-
31
- # Provide input text and generate output
32
- input_text = 'To be or not to be'
33
- inputs = tokenizer(input_text, return_tensors='pt')
34
- outputs = model.generate(**inputs)
35
- print(tokenizer.decode(outputs[0], skip_special_tokens=True))
36
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  tags:
3
+ - language-model
4
+ - gpt-2
5
+ - fine-tuned
6
+ - tiny-shakespeare
7
  license: mit
8
  datasets:
9
+ - tiny_shakespeare
10
+ ---
11
+
12
+ # GPT-2 Tiny Shakespeare Model
13
+
14
+ This is a small autoregressive language model based on the Transformer architecture trained on the Tiny Shakespeare dataset.
15
+
16
+ ## Model Description
17
+ The model is a custom implementation of a TransformerDecoderModel, which uses a decoder-only architecture similar to GPT-2. It was trained on the Tiny Shakespeare dataset to generate text in the style of William Shakespeare.
18
+
19
+ ## How to Use
20
+ To generate text with this model, you can load it and the tokenizer as follows:
21
+
22
+ ```python
23
+ from transformers import GPT2LMHeadModel, GPT2Tokenizer
24
+
25
+ model = GPT2LMHeadModel.from_pretrained('NataliaH/gpt2-tiny-shakespeare')
26
+ tokenizer = GPT2Tokenizer.from_pretrained('NataliaH/gpt2-tiny-shakespeare')
27
+
28
+ input_text = 'To be or not to be'
29
+ inputs = tokenizer(input_text, return_tensors='pt')
30
+ outputs = model.generate(**inputs)
31
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
32
+ ```
33
+
34
+ ## Tags
35
+ - Transformer
36
+ - GPT-2
37
+ - Tiny Shakespeare
38
+ - Language Model
39
+ - Text Generation
40
+ - Autoregressive
41
+
42
+ ## Training Details
43
+
44
+ - **Epochs**: 3
45
+ - **Batch size**: 4
46
+ - **Learning Rate**: 5e-5
47
+ - **Loss Function**: Cross-Entropy Loss
48
+ - **Optimizer**: AdamW
49
+
50
+ ## License
51
+ This model is licensed under the MIT license.
config.json CHANGED
@@ -1,39 +1 @@
1
- {
2
- "activation_function": "gelu_new",
3
- "architectures": [
4
- "GPT2LMHeadModel"
5
- ],
6
- "attn_pdrop": 0.1,
7
- "bos_token_id": 50256,
8
- "embd_pdrop": 0.1,
9
- "eos_token_id": 50256,
10
- "initializer_range": 0.02,
11
- "layer_norm_epsilon": 1e-05,
12
- "model_type": "gpt2",
13
- "n_ctx": 1024,
14
- "n_embd": 768,
15
- "n_head": 12,
16
- "n_inner": null,
17
- "n_layer": 12,
18
- "n_positions": 1024,
19
- "pad_token_id": 50256,
20
- "reorder_and_upcast_attn": false,
21
- "resid_pdrop": 0.1,
22
- "scale_attn_by_inverse_layer_idx": false,
23
- "scale_attn_weights": true,
24
- "summary_activation": null,
25
- "summary_first_dropout": 0.1,
26
- "summary_proj_to_labels": true,
27
- "summary_type": "cls_index",
28
- "summary_use_proj": true,
29
- "task_specific_params": {
30
- "text-generation": {
31
- "do_sample": true,
32
- "max_length": 50
33
- }
34
- },
35
- "torch_dtype": "float32",
36
- "transformers_version": "4.51.3",
37
- "use_cache": true,
38
- "vocab_size": 50257
39
- }
 
1
+ {"architectures": ["TransformerDecoderModel"], "vocab_size": 50257}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:902374a1f9aef83539bb5392218d589d97ff1ce05f76edb74fbb8914679cb28f
3
- size 120143250
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b2922ff630927e80f0e2261ea775429909dd615ce8947dacb59054d75c00b36
3
+ size 497813466
special_tokens_map.json CHANGED
@@ -1,6 +1,23 @@
1
  {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|endoftext|>",
5
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
  }
tokenizer_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "50256": {
@@ -13,9 +14,10 @@
13
  "bos_token": "<|endoftext|>",
14
  "clean_up_tokenization_spaces": false,
15
  "eos_token": "<|endoftext|>",
 
16
  "extra_special_tokens": {},
17
  "model_max_length": 1024,
18
- "pad_token": "<|endoftext|>",
19
  "tokenizer_class": "GPT2Tokenizer",
20
  "unk_token": "<|endoftext|>"
21
  }
 
1
  {
2
+ "add_bos_token": false,
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
  "50256": {
 
14
  "bos_token": "<|endoftext|>",
15
  "clean_up_tokenization_spaces": false,
16
  "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
  "extra_special_tokens": {},
19
  "model_max_length": 1024,
20
+ "pad_token": null,
21
  "tokenizer_class": "GPT2Tokenizer",
22
  "unk_token": "<|endoftext|>"
23
  }
vocab.json CHANGED
The diff for this file is too large to render. See raw diff