data_process.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. # -*- coding: utf-8 -*-
  2. import ast
  3. import json
  4. import re
  5. def get_json(data, limit=10):
  6. """
  7. 返回json对象
  8. @param str data: json_str
  9. @param int limit: json_str 结构修正次数上限
  10. """
  11. retries = 0
  12. try:
  13. data = ast.literal_eval(data)
  14. return json.loads(json.dumps(data, ensure_ascii=False))
  15. except SyntaxError:
  16. while retries < limit:
  17. try:
  18. parsed_data = json.loads(data)
  19. # print("字符串符合JSON格式")
  20. # print(parsed_data)
  21. return parsed_data
  22. except json.JSONDecodeError as e:
  23. retries += 1
  24. error_pos = e.pos
  25. err_msg = e.msg
  26. # print("字符串不符合JSON格式")
  27. # print("错误位置:", error_pos)
  28. # print("错误原因:", err_msg)
  29. data = data[:-1] if data.endswith(",") else data
  30. # 补全'[{...}]' 缺失符号
  31. missing_bracket_count = data.count('[{') - data.count('}]')
  32. if missing_bracket_count > 0:
  33. for i in range(missing_bracket_count):
  34. delimiter = ']' if i == 0 and data.endswith("}") else '}]'
  35. data += delimiter
  36. continue
  37. # 补全缺失闭合符号
  38. missing_closing_bracket_count = data.count('{') - data.count('}')
  39. if missing_closing_bracket_count > 0:
  40. for i in range(missing_closing_bracket_count):
  41. data += '}'
  42. continue
  43. # 双引号替换单引号
  44. if 'Expecting property name enclosed in double quotes' == err_msg:
  45. data = data.replace("'", '"')
  46. try:
  47. data = ast.literal_eval(data)
  48. data = json.dumps(data, ensure_ascii=False)
  49. except SyntaxError:
  50. pass
  51. continue
  52. # 缺失符号补全
  53. if 'delimiter' in err_msg:
  54. ret = re.search('Expecting(.*?)delimiter', err_msg).group(1)
  55. delimiter = ret.strip().replace("'", '')
  56. data = "".join([data[:error_pos], delimiter, data[error_pos:]])
  57. continue