44 lines
1.7 KiB
Python
44 lines
1.7 KiB
Python
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, M2M100Model, M2M100Tokenizer, NllbTokenizer, \
|
|
NllbMoeModel, NllbTokenizerFast, T5Tokenizer, T5Model, T5ForConditionalGeneration
|
|
import torch
|
|
import os
|
|
|
|
|
|
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
|
|
|
|
# Modell und Tokenizer für NLLB laden
|
|
model_name = r"facebook/nllb-200-3.3B"
|
|
|
|
# Tokenizer und Modell laden
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name, src_lang="jpn_Jpan")
|
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16)
|
|
|
|
model.save_pretrained(f"E:\\4K Anime\\models\\{model_name}", safe_serialization=False)
|
|
tokenizer.save_pretrained(f"E:\\4K Anime\\models\\{model_name}")
|
|
|
|
# Gerät wählen
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
model = model.to(device)
|
|
|
|
# Übersetzungsfunktion
|
|
def translate(text):
|
|
# Text tokenisieren und an das gleiche Gerät wie das Modell schicken
|
|
batch = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
|
|
|
|
with torch.no_grad():
|
|
# Modellvorhersage erzeugen
|
|
try:
|
|
generated = model.generate(**batch, do_sample=False, forced_bos_token_id=tokenizer.convert_tokens_to_ids("eng_Latn"), max_new_tokens=150)
|
|
except Exception as e:
|
|
print(f"Fehler bei der Modellvorhersage: {e}")
|
|
return None
|
|
|
|
# Übersetzung dekodieren
|
|
translated = tokenizer.decode(generated[0], skip_special_tokens=True)
|
|
return translated
|
|
|
|
# Beispieltext übersetzen
|
|
text = f"新暦12年。人類は地球の重力という枷から解き放たれる前に、肉体という枷から逃げ出すほうに注力していた。"
|
|
result = translate(text)
|
|
print(result)
|