init
This commit is contained in:
@@ -0,0 +1,43 @@
|
||||
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, M2M100Model, M2M100Tokenizer, NllbTokenizer, \
|
||||
NllbMoeModel, NllbTokenizerFast, T5Tokenizer, T5Model, T5ForConditionalGeneration
|
||||
import torch
|
||||
import os
|
||||
|
||||
|
||||
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
|
||||
|
||||
# Modell und Tokenizer für NLLB laden
|
||||
model_name = r"facebook/nllb-200-3.3B"
|
||||
|
||||
# Tokenizer und Modell laden
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name, src_lang="jpn_Jpan")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16)
|
||||
|
||||
model.save_pretrained(f"E:\\4K Anime\\models\\{model_name}", safe_serialization=False)
|
||||
tokenizer.save_pretrained(f"E:\\4K Anime\\models\\{model_name}")
|
||||
|
||||
# Gerät wählen
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
model = model.to(device)
|
||||
|
||||
# Übersetzungsfunktion
|
||||
def translate(text):
|
||||
# Text tokenisieren und an das gleiche Gerät wie das Modell schicken
|
||||
batch = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
|
||||
|
||||
with torch.no_grad():
|
||||
# Modellvorhersage erzeugen
|
||||
try:
|
||||
generated = model.generate(**batch, do_sample=False, forced_bos_token_id=tokenizer.convert_tokens_to_ids("eng_Latn"), max_new_tokens=150)
|
||||
except Exception as e:
|
||||
print(f"Fehler bei der Modellvorhersage: {e}")
|
||||
return None
|
||||
|
||||
# Übersetzung dekodieren
|
||||
translated = tokenizer.decode(generated[0], skip_special_tokens=True)
|
||||
return translated
|
||||
|
||||
# Beispieltext übersetzen
|
||||
text = f"新暦12年。人類は地球の重力という枷から解き放たれる前に、肉体という枷から逃げ出すほうに注力していた。"
|
||||
result = translate(text)
|
||||
print(result)
|
||||
Reference in New Issue
Block a user