aria-dev commited on
Commit
c347f2e
1 Parent(s): 0fe67ee

update tokenizer

Browse files
README.md CHANGED
@@ -59,7 +59,7 @@ tags:
59
  pip install transformers==4.45.0 accelerate==0.34.1 sentencepiece==0.2.0 torchvision requests torch Pillow
60
  pip install flash-attn --no-build-isolation
61
 
62
- # For better performance, you can install grouped-gemm, which may take 3-5 minutes to install
63
  pip install grouped_gemm==0.1.6
64
  ```
65
 
 
59
  pip install transformers==4.45.0 accelerate==0.34.1 sentencepiece==0.2.0 torchvision requests torch Pillow
60
  pip install flash-attn --no-build-isolation
61
 
62
+ # For better inference performance, you can install grouped-gemm, which may take 3-5 minutes to install
63
  pip install grouped_gemm==0.1.6
64
  ```
65
 
added_tokens.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "</s>": 100353,
3
- "<s>": 100352
4
- }
 
 
 
 
 
special_tokens_map.json CHANGED
@@ -1,18 +1,4 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "</s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
  "unk_token": {
17
  "content": "<unk>",
18
  "lstrip": false,
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "unk_token": {
3
  "content": "<unk>",
4
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -10,28 +10,12 @@
10
  "rstrip": false,
11
  "single_word": false,
12
  "special": true
13
- },
14
- "100352": {
15
- "content": "<s>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": true
21
- },
22
- "100353": {
23
- "content": "</s>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": true
29
  }
30
  },
31
- "bos_token": "<s>",
32
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}{% elif message['content'] is iterable %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<fim_prefix><|img|><fim_suffix>{% endif %}{% endfor %}{% endif %}<|im_end|>\n{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
33
  "clean_up_tokenization_spaces": false,
34
- "eos_token": "</s>",
35
  "legacy": true,
36
  "model_max_length": 1000000000000000019884624838656,
37
  "pad_token": null,
 
10
  "rstrip": false,
11
  "single_word": false,
12
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  }
14
  },
15
+ "bos_token": null,
16
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}{% elif message['content'] is iterable %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<fim_prefix><|img|><fim_suffix>{% endif %}{% endfor %}{% endif %}<|im_end|>\n{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
17
  "clean_up_tokenization_spaces": false,
18
+ "eos_token": null,
19
  "legacy": true,
20
  "model_max_length": 1000000000000000019884624838656,
21
  "pad_token": null,