utf8 decode python code example
Example 1: python unicode point to utf8 string
#! /usr/bin/python3
import re
def makeNice(s):
return re.subn('(#U[0-9a-f]{4})', lambda cp: chr(int(cp.groups()[0][2:],16)), s) [0]
a = '-#U2605-#U79c1-'
print(a, makeNice(a))
Example 2: decode utf8 whit python
import codecs
BLOCKSIZE = 1048576 # or some other, desired size in bytes
with codecs.open(sourceFileName, "r", "your-source-encoding") as sourceFile:
with codecs.open(targetFileName, "w", "utf-8") as targetFile:
while True:
contents = sourceFile.read(BLOCKSIZE)
if not contents:
break
targetFile.write(contents)