data_spider
/
crawlab_feader


			
							1234567891011121314151617181920212223242526272829303132333435363738394041
							import binascii

import chardet


def check_is_encode_error(string):
    try:
        string.encode('gbk')
    except UnicodeEncodeError:
        return True
    return False

sss= '銆愭潗鏂欒澶囥？戠鍥涘笀鍙厠杈炬媺甯？220kV鍙樼數绔欏強71鍥？220kV鍙樼數绔欎富鍙樺瀹规墿寤哄伐绋？锛堢患鍚堣嚜鍔ㄥ寲锛夋嫑鏍囧叕鍛？'
# sss ='cdsc你家妇女的可能'
is_code = check_is_encode_error(sss)
print(is_code)
print(sss.encode('utf-8'))
# print (bytes(sss.encode('utf-8')).decode('Big5'))
# hex = sss.encode('utf-8')
# print(hex.decode("ASCII"))
# print(binascii.unhexlify(hex.decode("ASCII")))
# print (chardet.UniversalDetector.ESC_DETECTOR())
'''EUC-JP, SHIFT_JIS, and ISO-2022-JP (Japanese 日文)
EUC-KR and ISO-2022-KR (Korean 韩文)
KOI8-R, MacCyrillic, IBM855, IBM866, ISO-8859-5, and windows-1251 (Russian 俄文)
ISO-8859-2 and windows-1250 (Hungarian 匈牙利文)
ISO-8859-5 and windows-1251 (Bulgarian 保加利亚文)
ISO-8859-1 and windows-1252 (Western European languages 西欧文字)
ISO-8859-7 and windows-1253 (Greek 希腊文)
ISO-8859-8 and windows-1255 (Visual and Logical Hebrew 视觉顺序和逻辑顺序的希伯来文)

ASCII'''
chart = ["EUC-JP","ASCII","UTF-8","UTF-16","UTF-32","TIS-620","ISO-8859-8","ISO-8859-7","ISO-8859-1","ISO-8859-5","ISO-8859-2",
         "windows-1250","windows-1251","windows-1252","windows-1253","windows-1255","KOI8-R","MacCyrillic","IBM855","IBM866","EUC-KR",
         "ISO-2022-KR","SHIFT_JIS","ISO-2022-JP"]

for char in chart:
    try:
        print(bytes(sss.encode('utf-8')).decode(char))
    except:
        pass