|
--- |
|
library_name: transformers |
|
license: mit |
|
language: |
|
- fa |
|
pipeline_tag: token-classification |
|
--- |
|
Named entity recognition On Persian dataset |
|
|
|
traindataset=20484 persian sentense |
|
|
|
valdataset=2561 |
|
|
|
AutoTokenizer=HooshvareLab/bert-fa-base-uncased |
|
|
|
ner_tags= |
|
['O', 'B-pro', |
|
'I-pro', |
|
'B-pers', |
|
'I-pers', |
|
'B-org', |
|
'I-org', |
|
'B-loc', |
|
'I-loc', |
|
'B-fac', |
|
'I-fac', |
|
'B-event', |
|
'I-event'] |
|
|
|
training_args= |
|
learning_rate=2e-5, |
|
|
|
per_device_train_batch_size=16, |
|
|
|
per_device_eval_batch_size=16, |
|
|
|
num_train_epochs=4, |
|
|
|
weight_decay=0.01 |
|
|
|
|
|
Training Loss=0.001000 |
|
|
|
sample1: |
|
'entity': 'B-loc', |
|
'score': 0.9998902, |
|
'index': 2, |
|
'word': 'تهران', |
|
|
|
sample2: |
|
'entity': 'B-pers', |
|
'score': 0.99988234, |
|
'index': 2, |
|
'word': 'عباس', |
|
|
|
|
|
for use this model: |
|
|
|
from transformers import pipeline |
|
|
|
pipe = pipeline("token-classification", model="NLPclass/Named_entity_recognition_persian") |
|
|
|
sentence = "" |
|
|
|
predicted_ner = pipe(sentence) |
|
|
|
for entity in predicted_ner: |
|
|
|
print(f"Entity: {entity['word']}, Label: {entity['entity']}") |