chavezord commited on
Commit
10af882
1 Parent(s): bc04125

Upload 11 files

Browse files
Dockerfile.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dockerfile
2
+ FROM python:3.10-slim
3
+
4
+ WORKDIR /app
5
+
6
+ # Install dependencies
7
+ COPY requirements.txt .
8
+ RUN pip install --no-cache-dir -r requirements.txt
9
+
10
+ # Copy the app files
11
+ COPY . .
12
+
13
+ # Expose the port FastAPI will run on
14
+ EXPOSE 7860
15
+
16
+ # Run the application
17
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from fastapi import FastAPI, Body, File, UploadFile, Request
3
+ from fastapi.responses import HTMLResponse, RedirectResponse
4
+ from fastapi.staticfiles import StaticFiles
5
+ from fastapi.templating import Jinja2Templates
6
+
7
+ from models.Flashcardd import Flashcard
8
+ from translations.translate import load_model_and_tokenizer, translate
9
+
10
+
11
+ app = FastAPI()
12
+
13
+
14
+ @app.get("/")
15
+ async def home():
16
+ return {"message": "hola-mondo"}
17
+
18
+
19
+ @app.post("/flashcards/create_flashcard")
20
+ async def create_flashcard(new_flashcard=Body()) -> Flashcard:
21
+ print(f"new flashcard: {new_flashcard}")
22
+ from_lang = new_flashcard["from"]
23
+ to_lang = new_flashcard["to"]
24
+ model, tokenizer = load_model_and_tokenizer(from_lang=from_lang, to_lang=to_lang)
25
+ translation = translate(new_flashcard["word"], model, tokenizer)
26
+
27
+ return Flashcard(
28
+ name=new_flashcard["word"],
29
+ translation=translation,
30
+ # sample_sentence=f"{new_flashcard['word']}",
31
+ sample_sentence="notes..",
32
+ # "translation": f"translation: {new_flashcard['word']}",
33
+ # "translation": translation,
34
+ )
models/Flashcardd.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, constr
2
+ from typing import Union, Optional
3
+
4
+
5
+ class Flashcard(BaseModel):
6
+ name: constr(max_length=100, min_length=5) # type: ignore
7
+ translation: Optional[str] = ""
8
+ sample_sentence: Optional[str] = ""
9
+
10
+
models/__init__.py ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ fastapi==0.112.0
3
+ sentencepiece==0.2.0
4
+ numpy==1.23.1
5
+ sacremoses==0.1.1
6
+ tokenizers==0.19.1
7
+ transformers==4.43.3
8
+ huggingface-hub==0.24.5
9
+ torch
translations/__init__.py ADDED
File without changes
translations/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (163 Bytes). View file
 
translations/__pycache__/model_name_mapping.cpython-39.pyc ADDED
Binary file (1.84 kB). View file
 
translations/__pycache__/translate.cpython-39.pyc ADDED
Binary file (1.59 kB). View file
 
translations/model_name_mapping.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass, field
2
+ from typing import Dict, Literal, Tuple
3
+ from pydantic import BaseModel, ValidationError
4
+ from enum import Enum
5
+
6
+
7
+ class ModelNameMapping(BaseModel):
8
+ data: Dict[Tuple, str] = field(default_factory=dict)
9
+
10
+ def __getitem__(self, key: Tuple) -> str:
11
+ return self.data[key]
12
+
13
+ def get(self, key: Tuple) -> str:
14
+ return self.data.get(key, "")
15
+
16
+
17
+ class LanguagesEnum(Enum):
18
+ es = "es"
19
+ en = "en"
20
+ ca = "ca"
21
+ pt = "pt"
22
+
23
+
24
+ es_to_en: Tuple = (LanguagesEnum.es.value, LanguagesEnum.en.value)
25
+ es_to_ca: Tuple = (LanguagesEnum.es.value, LanguagesEnum.ca.value)
26
+ es_to_po: Tuple = (LanguagesEnum.es.value, LanguagesEnum.pt.value)
27
+
28
+ en_to_es: Tuple = (LanguagesEnum.en.value, LanguagesEnum.es.value)
29
+ en_to_po: Tuple = (LanguagesEnum.en.value, LanguagesEnum.pt.value)
30
+
31
+ ca_to_es: Tuple = (LanguagesEnum.ca.value, LanguagesEnum.es.value)
32
+
33
+ po_to_en: Tuple = (LanguagesEnum.pt.value, LanguagesEnum.en.value)
34
+ po_to_es: Tuple = (LanguagesEnum.pt.value, LanguagesEnum.es.value)
35
+
36
+ models = {
37
+ es_to_en: "Helsinki-NLP/opus-mt-es-en",
38
+ es_to_ca: "Helsinki-NLP/opus-mt-es-ca",
39
+ en_to_es: "Helsinki-NLP/opus-mt-en-es",
40
+ en_to_po: "Helsinki-NLP/opus-mt-tc-big-en-pt",
41
+ ca_to_es: "Helsinki-NLP/opus-mt-ca-es",
42
+ po_to_en: "Helsinki-NLP/opus-mt-pt-en",
43
+ po_to_es: "Helsinki-NLP/opus-mt-pt-es",
44
+ }
45
+
46
+
47
+ MODEL_NAME_MAPPING = ModelNameMapping(data=models)
translations/translate.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import MarianMTModel, MarianTokenizer
2
+ from translations.model_name_mapping import MODEL_NAME_MAPPING
3
+
4
+
5
+ def load_model_and_tokenizer(from_lang: str, to_lang: str):
6
+ print(f"load_model_and_tokenizer from: {from_lang}, to: {to_lang}")
7
+
8
+ model_name = MODEL_NAME_MAPPING.get((from_lang, to_lang))
9
+ print(f"model_name: {model_name}")
10
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
11
+ model = MarianMTModel.from_pretrained(model_name)
12
+ return model, tokenizer
13
+
14
+
15
+ def translate(text, model, tokenizer):
16
+ translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True))
17
+ translated_text = [
18
+ tokenizer.decode(t, skip_special_tokens=True) for t in translated
19
+ ]
20
+ return translated_text[0]
21
+
22
+
23
+
24
+ def test_translations(from_lang, to_lang, text_to_translate):
25
+ model, tokenizer = load_model_and_tokenizer(from_lang=from_lang, to_lang=to_lang)
26
+ translation = translate(text_to_translate, model, tokenizer)
27
+ print(f"Translated text from: {from_lang}, to: {to_lang}, translation: {translation}")
28
+
29
+
30
+ if __name__ == "__main__":
31
+ # text_to_translate = "hola amigos, tengo hambre"
32
+
33
+ # test_translations(from_lang="es", to_lang="en", text_to_translate=text_to_translate)
34
+ # test_translations(from_lang="es", to_lang="ca", text_to_translate=text_to_translate)
35
+ # test_translations(from_lang="es", to_lang="po", text_to_translate=text_to_translate)
36
+
37
+ text_to_translate = "hello friends, who's hungry?"
38
+ test_translations(from_lang="en", to_lang="es", text_to_translate=text_to_translate)
39
+ # test_translations(from_lang="en", to_lang="ca", text_to_translate=text_to_translate)
40
+ test_translations(from_lang="en", to_lang="pt", text_to_translate=text_to_translate)
41
+ # test_translations(from_lang="es", to_lang="po", text_to_translate=text_to_translate)
42
+
43
+
44
+ text_to_translate = "un cafè sense sucre i amb llet, si us plau"
45
+ test_translations(from_lang="ca", to_lang="es", text_to_translate=text_to_translate)