raygx commited on
Commit
dff9dfa
1 Parent(s): e18907e

Upload TFGPT2ForSequenceClassification

Browse files
Files changed (3) hide show
  1. README.md +11 -15
  2. config.json +6 -6
  3. tf_model.h5 +2 -2
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: raygx/Nepali-DistilGPT2
3
  tags:
4
  - generated_from_keras_callback
5
  model-index:
@@ -12,11 +12,11 @@ probably proofread and complete it, then remove this comment. -->
12
 
13
  # distilGPT-NepSA
14
 
15
- This model is a fine-tuned version of [raygx/Nepali-DistilGPT2](https://huggingface.co/raygx/Nepali-DistilGPT2) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
- - Train Loss: 0.7238
18
- - Validation Loss: 0.7132
19
- - Epoch: 5
20
 
21
  ## Model description
22
 
@@ -35,24 +35,20 @@ More information needed
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
- - optimizer: {'name': 'AdamWeightDecay', 'learning_rate': 2e-06, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False, 'weight_decay_rate': 0.005}
39
  - training_precision: float32
40
 
41
  ### Training results
42
 
43
  | Train Loss | Validation Loss | Epoch |
44
  |:----------:|:---------------:|:-----:|
45
- | 1.0605 | 0.8926 | 0 |
46
- | 0.8693 | 0.8015 | 1 |
47
- | 0.8041 | 0.7605 | 2 |
48
- | 0.7711 | 0.7366 | 3 |
49
- | 0.7469 | 0.7236 | 4 |
50
- | 0.7238 | 0.7132 | 5 |
51
 
52
 
53
  ### Framework versions
54
 
55
- - Transformers 4.31.0
56
- - TensorFlow 2.12.0
57
- - Datasets 2.14.4
58
  - Tokenizers 0.13.3
 
1
  ---
2
+ license: apache-2.0
3
  tags:
4
  - generated_from_keras_callback
5
  model-index:
 
12
 
13
  # distilGPT-NepSA
14
 
15
+ This model is a fine-tuned version of [raygx/distilGPT-Nepali](https://huggingface.co/raygx/distilGPT-Nepali) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Train Loss: 0.6596
18
+ - Validation Loss: 0.6809
19
+ - Epoch: 1
20
 
21
  ## Model description
22
 
 
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
+ - optimizer: {'name': 'AdamWeightDecay', 'learning_rate': 1e-05, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False, 'weight_decay_rate': 0.03}
39
  - training_precision: float32
40
 
41
  ### Training results
42
 
43
  | Train Loss | Validation Loss | Epoch |
44
  |:----------:|:---------------:|:-----:|
45
+ | 0.8788 | 0.7572 | 0 |
46
+ | 0.6596 | 0.6809 | 1 |
 
 
 
 
47
 
48
 
49
  ### Framework versions
50
 
51
+ - Transformers 4.28.1
52
+ - TensorFlow 2.11.0
53
+ - Datasets 2.1.0
54
  - Tokenizers 0.13.3
config.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "_name_or_path": "raygx/Nepali-DistilGPT2",
3
  "_num_labels": 1,
4
  "activation_function": "gelu_new",
5
  "architectures": [
6
  "GPT2ForSequenceClassification"
7
  ],
8
  "attn_pdrop": 0.1,
9
- "bos_token_id": 1,
10
  "embd_pdrop": 0.1,
11
- "eos_token_id": 2,
12
  "id2label": {
13
  "0": "NEUTRAL",
14
  "1": "POSITIVE",
@@ -24,7 +24,7 @@
24
  "model_type": "gpt2",
25
  "n_ctx": 1024,
26
  "n_embd": 768,
27
- "n_head": 6,
28
  "n_inner": null,
29
  "n_layer": 6,
30
  "n_positions": 1024,
@@ -44,7 +44,7 @@
44
  "max_length": 50
45
  }
46
  },
47
- "transformers_version": "4.31.0",
48
  "use_cache": true,
49
- "vocab_size": 50000
50
  }
 
1
  {
2
+ "_name_or_path": "raygx/distilGPT-Nepali",
3
  "_num_labels": 1,
4
  "activation_function": "gelu_new",
5
  "architectures": [
6
  "GPT2ForSequenceClassification"
7
  ],
8
  "attn_pdrop": 0.1,
9
+ "bos_token_id": null,
10
  "embd_pdrop": 0.1,
11
+ "eos_token_id": null,
12
  "id2label": {
13
  "0": "NEUTRAL",
14
  "1": "POSITIVE",
 
24
  "model_type": "gpt2",
25
  "n_ctx": 1024,
26
  "n_embd": 768,
27
+ "n_head": 12,
28
  "n_inner": null,
29
  "n_layer": 6,
30
  "n_positions": 1024,
 
44
  "max_length": 50
45
  }
46
  },
47
+ "transformers_version": "4.28.1",
48
  "use_cache": true,
49
+ "vocab_size": 50003
50
  }
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d61db576793435960e0346a64e4eb48a56d49096170860eddab818346fa1f035
3
- size 326968664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cf295902ff41cd0fe53ad81486af836bd3c509e68586f9c7f3adbc99977219b
3
+ size 480590728