1234567891011121314151617181920212223242526272829303132333435363738394041 |
- import binascii
- import chardet
- def check_is_encode_error(string):
- try:
- string.encode('gbk')
- except UnicodeEncodeError:
- return True
- return False
- sss= '銆愭潗鏂欒澶囥?戠鍥涘笀鍙厠杈炬媺甯?220kV鍙樼數绔欏強71鍥?220kV鍙樼數绔欎富鍙樺瀹规墿寤哄伐绋?锛堢患鍚堣嚜鍔ㄥ寲锛夋嫑鏍囧叕鍛?'
- # sss ='cdsc你家妇女的可能'
- is_code = check_is_encode_error(sss)
- print(is_code)
- print(sss.encode('utf-8'))
- # print (bytes(sss.encode('utf-8')).decode('Big5'))
- # hex = sss.encode('utf-8')
- # print(hex.decode("ASCII"))
- # print(binascii.unhexlify(hex.decode("ASCII")))
- # print (chardet.UniversalDetector.ESC_DETECTOR())
- '''EUC-JP, SHIFT_JIS, and ISO-2022-JP (Japanese 日文)
- EUC-KR and ISO-2022-KR (Korean 韩文)
- KOI8-R, MacCyrillic, IBM855, IBM866, ISO-8859-5, and windows-1251 (Russian 俄文)
- ISO-8859-2 and windows-1250 (Hungarian 匈牙利文)
- ISO-8859-5 and windows-1251 (Bulgarian 保加利亚文)
- ISO-8859-1 and windows-1252 (Western European languages 西欧文字)
- ISO-8859-7 and windows-1253 (Greek 希腊文)
- ISO-8859-8 and windows-1255 (Visual and Logical Hebrew 视觉顺序和逻辑顺序的希伯来文)
- ASCII'''
- chart = ["EUC-JP","ASCII","UTF-8","UTF-16","UTF-32","TIS-620","ISO-8859-8","ISO-8859-7","ISO-8859-1","ISO-8859-5","ISO-8859-2",
- "windows-1250","windows-1251","windows-1252","windows-1253","windows-1255","KOI8-R","MacCyrillic","IBM855","IBM866","EUC-KR",
- "ISO-2022-KR","SHIFT_JIS","ISO-2022-JP"]
- for char in chart:
- try:
- print(bytes(sss.encode('utf-8')).decode(char))
- except:
- pass
|