import io
import re
from mosestokenizer import *
out_pre = ""
pre_list = []
in_pre = io.open("/content/tgt-test.txt",'r', encoding="utf-8")
pre_list.extend(in_pre.readlines())
for i, item in enumerate(pre_list):
pre_list[i] = re.sub(r" ","",pre_list[i])
pre_list[i] = re.sub(r"▁"," ",pre_list[i])
detokenize = MosesDetokenizer('ru')
for i, item in enumerate(pre_list):
temp = item.strip().split(" ")
item = detokenize(temp)
out_pre = out_pre + item.lower() + "\n"
f_pre = open("/content/tgt-test-dec.txt","w")
f_pre.writelines(out_pre)
f_pre.close()