I have issues with lot of plugins regarding encoding/decoding of text.
Even when you know the encoding name in ST2, there’s no facility to get the right codec in Python because the name are not the same.
I use the module below for the conversion, but it’s cumbersome to include it in all plugins.
And make a separate package with it mean that you introduce a dependance for others packages that want to use it.
So Jon, what do you think about including something like that in standard ST2 distribution ?
[code]import codecs
st_encodings_list =
“UTF-8”,
“UTF-8 with BOM”,
“UTF-16 LE”,
“UTF-16 LE with BOM”,
“UTF-16 BE”,
“UTF-16 BE with BOM”,
“Western (Windows 1252)”,
“Western (ISO 8859-1)”,
“Western (ISO 8859-3)”,
“Western (ISO 8859-15)”,
“Western (Mac Roman)”,
“DOS (CP 437)”,
“Arabic (Windows 1256)”,
“Arabic (ISO 8859-6)”,
“Baltic (Windows 1257)”,
“Baltic (ISO 8859-4)”,
“Celtic (ISO 8859-14)”,
“Central European (Windows 1250)”,
“Central European (ISO 8859-2)”,
“Cyrillic (Windows 1251)”,
“Cyrillic (Windows 866)”,
“Cyrillic (ISO 8859-5)”,
“Cyrillic (KOI8-R)”,
“Cyrillic (KOI8-U)”,
“Estonian (ISO 8859-13)”,
“Greek (Windows 1253)”,
“Greek (ISO 8859-7)”,
“Hebrew (Windows 1255)”,
“Hebrew (ISO 8859-8)”,
“Nordic (ISO 8859-10)”,
“Romanian (ISO 8859-16)”,
“Turkish (Windows 1254)”,
“Turkish (ISO 8859-9)”,
“Vietnamese (Windows 1258)”,
“Hexadecimal”
]
py_encodings_list =
“utf-8”,
“utf-8-sig”,
“utf-16-le”,
“utf-16”,
“utf-16-be”,
“utf-16”,
“cp1252”,
“iso8859-1”,
“iso8859-3”,
“iso8859-15”,
“mac-roman”,
“cp437”,
“cp1256”,
“iso8859-6”,
“cp1257”,
“iso8859-4”,
“iso8859-14”,
“cp1250”,
“iso8859-2”,
“cp1251”,
“cp866”,
“iso8859-5”,
“koi8-r”,
“koi8-u”,
“iso8859-13”,
“cp1253”,
“iso8859-7”,
“cp1255”,
“iso8859-8”,
“iso8859-10”,
“iso8859-16”,
“cp1254”,
“iso8859-9”,
“cp1258”,
None
]
def st2python(encoding):
try:
index = st_encodings_list.index(encoding)
except ValueError:
return
return py_encodings_list[index]
def python2st(encoding):
try:
py_encoding = codecs.lookup(encoding).name
except LookupError:
return
try:
index = py_encodings_list.index(py_encoding)
except ValueError:
return
return st_encodings_list[index]
class ST2DecodeError(ValueError):
pass
def decode_st(text, encoding):
enc = st2python(encoding)
if enc:
return text.decode(enc)
else:
raise ST2DecodeError(“unknown Sublime Text encoding: %s” % encoding)[/code]