# -*- coding: utf-8 -*-
import sys, os, re
class JsonBaseType:
single_type = 0
object_type = 1
array_type = 2
class ParseException():
def __init__(self,error):
self.error = error
def __str__(self,*args,**kwargs):
return self.error
# NUMBER_RE = re.compile(
# r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
# (re.VERBOSE | re.MULTILINE | re.DOTALL))
class JsonHelper(object):
def __init__(self):
self.jsObjectType = JsonBaseType.single_type
self.dictResult = dict()
self.jsEleName = ''
self.jsArrayValue = []
def __getitem__(self, item):
return self.dictResult[item]
def JsonParse(self, jsString):
jsString = jsString.strip()
jsString = jsString.replace('\r', '').replace('\n', '')
if len(jsString) > 0:
if jsString[0] == '{':
self.jsObjectType = JsonBaseType.object_type
self.ObjectParse(jsString[1:])
elif jsString[0] == '[':
self.jsObjectType = JsonBaseType.array_type
self.ArrayParse(jsString[1:])
else:
jsString, ret_object, ret_string = self.SingleObjectParse(jsString)
self.dictResult[''] = ret_object
if 0 != len(ret_string):
self.dictResult['String' + ret_string] = ret_string
else:
raise ParseException('JsonParse json error 2|||' + jsString)
return self.dictResult
def ArrayParse(self, jsString):
nIdxRet = 0
bEndObject = False
bCorrectFormat = True
nLenString = len(jsString)
jsStringBak = ''
bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 0)
if not bCorrectFormat:
raise ParseException('ArrayParse json error 1|||' + jsString[0:20])
nPos = jsString.find('{')
if -1 != nPos:
jsString = jsString[nPos+1:]
while True:
nIdxTmp = len(jsString)
jsString = jsString.strip()
jsString = jsString[1:]
nIdxRet += nIdxTmp - len(jsString) + 1#清除开头的空白字符,并更新当前位置
jsObj = JsonHelper()
nIdxTmp, bEndObject = jsObj.ObjectParse(jsString, True)#非子节点
self.jsArrayValue.append(jsObj)
nIdxRet += nIdxTmp
jsString = jsString[len(jsString)-nIdxTmp:]#抛弃已解析完的,保留还未解析的
if 0 == len(jsString):
break
if bEndObject:
bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 5)
if not bCorrectFormat:
raise ParseException('ArrayParse json error 1|||' + jsString[0:20])
else:
if bEndObject:
break
nPos = jsString.find(',')#直接找下一个object
if -1 != nPos:
jsString = jsString[nPos:]
bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 6) # 1、{到"之间不能有其他可见字符
if bCorrectFormat:
nPos = jsString.find('"')
if -1 != nPos:
jsString = jsString[nPos:]
jsString = jsString.strip()
else:
bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 7)
if not bCorrectFormat:
raise ParseException('ArrayParse json error 2|||' + jsString[0:20])
else:
if bEndObject:
nPos = jsString.find(']')
if -1 != nPos:
jsString = jsString[nPos+1:]
jsString = jsString.strip()
break
jsStringBak = jsString
self.dictResult[self.jsEleName] = self.jsArrayValue
self.jsArrayValue = []
return len(jsStringBak), bEndObject
def ArrayParseEx(self, jsString):
jsString = jsString.strip()
nIdxRet = 0
bEndObject = False
bCorrectFormat = True
nLenString = len(jsString)
jsStringBak = ''
while True:
if jsString[0] == '[':
pass
elif jsString[0] == '{':
pass
else:
jsString, ret_object, ret_string = self.SingleObjectParse(jsString)
self.jsArrayValue.append(ret_object)
if 0 != len(ret_string):
self.dictResult['String'] = ret_string
jsString = jsString.strip()
if 0 == len(jsString):
break
# 就代码======================================================
if bEndObject:
bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 5)
if not bCorrectFormat:
raise ParseException('ArrayParse json error 1|||' + jsString[0:20])
else:
if bEndObject:
break
nPos = jsString.find(',') # 直接找下一个object
if -1 != nPos:
jsString = jsString[nPos:]
bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 6) # 1、{到"之间不能有其他可见字符
if bCorrectFormat:
nPos = jsString.find('"')
if -1 != nPos:
jsString = jsString[nPos:]
jsString = jsString.strip()
else:
bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 7)
if not bCorrectFormat:
raise ParseException('ArrayParse json error 2|||' + jsString[0:20])
else:
if bEndObject:
nPos = jsString.find(']')
if -1 != nPos:
jsString = jsString[nPos + 1:]
jsString = jsString.strip()
break
jsStringBak = jsString
self.dictResult[self.jsEleName] = self.jsArrayValue
self.jsArrayValue = []
return len(jsStringBak), bEndObject
def ObjectParse(self, jsString, bChildObj = False, bArrayObj = False):
nIdxRet = 0
bEndObject = False
nLenString = len(jsString)
jsStringBak = ''
while True:
nIdxTmp = len(jsString)
jsString = jsString.strip()
if jsString[0] == '"':
jsString = jsString[1:]
jsStringBak = jsString
nIdxTmp, bEndObject = self.ElementParse(jsString)
if bEndObject:
jsString = jsString[len(jsString) - nIdxTmp:]
if bChildObj or bArrayObj:
break
else:
jsString = jsString[len(jsString)-nIdxTmp+1:]#抛弃已解析完的,保留还未解析的
if 0 == len(jsString):
break
jsStringBak = jsString
return len(jsStringBak), bEndObject
def SingleObjectParse(self, jsString):
ret_object = None
ret_string = ''
if jsString[0] == 'n' and jsString[0:4] == 'null':
jsString = jsString[4:]
elif jsString[0] == 't' and jsString[0:4] == 'true':
ret_object = True
jsString = jsString[4:]
elif jsString[0] == 'f' and jsString[0:5] == 'false':
ret_object = False
jsString = jsString[5:]
elif jsString[0] == '"':
ret_value, tmp_pos, end_object = self.ElementStringParse(jsString[1:], False, True)
ret_object = ret_value
jsString = jsString[len(jsString) - tmp_pos + 1:]
else:
match_res = re.match(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', jsString, re.M | re.I)
if match_res:
try:
str_num = match_res.group()
try:
ret_object = int(str_num)
except ValueError:
ret_object = float(str_num) # 指数可能会失败,如果精度要求太高的话
ret_string = str_num
idx_ret = jsString.find(str_num) + len(str_num)
jsString = jsString[idx_ret:]
except ValueError:
raise ParseException('SingleObjectParse json error 1|||' + jsString)
else:
raise ParseException('SingleObjectParse json error 2|||' + jsString)
# 校验普通值后面的数据,值后面还有数值格式就不对了
jsString = jsString.strip()
if 0 != len(jsString):
raise ParseException('SingleObjectParse json error 3|||' + jsString)
return jsString, ret_object, ret_string
#进到ElementParse已经是去掉当前Element的第一个"(ps userId": "U46554751"...)
def ElementParse(self, jsString):
chNext = ''
bEndObject = False # 当前object是否结束
nLenString = len(jsString)
nIdxTmp = 0
nIdxRet = len(jsString)
jsString = jsString.strip()
nIdxRet = nIdxRet - len(jsString)
self.jsEleName, nIdxTmp, bEndObject = self.ElementStringParse(jsString)# 获得元素名
jsString = jsString[nLenString - nIdxTmp + 1:]#把前面已解析完蛋抛弃掉
jsStringBak = jsString
nIdxTmp = len(jsString)
jsString = jsString.strip()#移除开头空白字符
nIdxRet += nIdxTmp - len(jsString)
nIdxTmp = 0
bCorrectFormat = True#默认是符合空白字符规则的
if jsString[0:4] == 'true' or jsString[0:4] == 'null' or jsString[0:5] == 'false':
bCorrectFormat = True
else:
matchRes = re.match(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', jsString, re.M | re.I)
if not matchRes:
bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 2)
if not bCorrectFormat:#element name取出来后马上检查:到值之间是否有违规字符
raise ParseException('ElementParse json error 1|||' + jsString[0:20])
bNtf = False #是否是null,true,false三种之一,这时候需要校验当前object是否结束,是否符合空白规则
for idx in range(len(jsString)):
try:
chNext = jsString[idx]
except IndexError:
raise ParseException('ElementParse visit overstep the boundary 1|||' + jsString[idx:idx+20])
if bNtf: # null,true,false以及数值型数据这几种类型值取出来后马上检查结尾是否有违规字符(非空白),结尾可能是,也可能是}
bCorrectFormat, bEndObject = self.CheckBlankByte(jsString[nIdxTmp:], 4)
if not bCorrectFormat:
raise ParseException('ElementParse json error 2|||' + jsString[nIdxTmp:nIdxTmp+20])
if bEndObject:#解析到这里,说明当前的element已经结完,接下去不是空白字符就是,或者}
nIdxRet = jsString.find('}') + 1
else:
nIdxRet = jsString.find(',') + 1
jsStringBak = jsString[nIdxRet:]
nIdxTmp = 0
break
if chNext == 'n' and jsString[idx:idx + 4] == 'null':
bNtf = True
nIdxTmp = idx + 4
nIdxRet = nIdxRet + 4
jsStringBak = jsStringBak[nIdxRet:]
self.dictResult[self.jsEleName] = None
continue
elif chNext == 't' and jsString[idx:idx + 4] == 'true':
bNtf = True
nIdxTmp = idx + 4
nIdxRet = nIdxRet + 4
jsStringBak = jsStringBak[nIdxRet:]
self.dictResult[self.jsEleName] = True
continue
elif chNext == 'f' and jsString[idx:idx + 5] == 'false':
bNtf = True
nIdxTmp = idx + 5
nIdxRet = nIdxRet + 5
jsStringBak = jsStringBak[nIdxRet:]
self.dictResult[self.jsEleName] = False
continue
elif chNext == '"':#字符串
jsStringValue, nIdxTmp, bEndObject = self.ElementStringParse(jsString[1:], True)
jsString = jsString[len(jsString)-nIdxTmp+1:]
jsStringBak = jsString
nIdxRet = nIdxRet + nIdxTmp + 1
self.dictResult[self.jsEleName] = jsStringValue
nIdxTmp = 0
break
elif chNext == '[':
if jsString[0] == '[':
jsString = jsString[1:]
if 0 != len(self.jsArrayValue):
self.jsArrayValue = []
nIdxTmp, bEndObject = self.ArrayParse(jsString)
jsString = jsString[len(jsString) - nIdxTmp + 1:]
jsStringBak = jsString
nIdxRet += nIdxTmp
nIdxTmp = 0
break
elif chNext == '{':
jsObjectValue = JsonHelper()
strRes = jsString[idx+1:]#从{的下一个位置开始解析接下来的object
strRes = strRes.strip()
nIdxTmp, bEndObject = jsObjectValue.ObjectParse(strRes, True)
self.dictResult[self.jsEleName] = jsObjectValue
if bEndObject:
jsString = strRes[len(strRes) - nIdxTmp:]
else:
jsString = strRes[len(strRes) - nIdxTmp + 1:]
jsStringBak = jsString
if bEndObject:
bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 4)#判断是否还有下一个element,并校验格式
if not bCorrectFormat:
raise ParseException('ElementParse json error 3|||' + jsString[0:20])
else:
if not bEndObject:
nIdxTmp = jsString.find(',')
if -1 != nIdxTmp:
jsString = jsString[nIdxTmp+1:]
nIdxRet = nIdxRet + nIdxTmp + 1
jsStringBak = jsString
bCorrectFormat, bEndObject = self.CheckBlankByte(jsString, 3)#还有下一个element,校验,到"之间的格式
if not bCorrectFormat:
raise ParseException('ElementParse json error 4|||' + jsString[0:20])
else:
nIdxTmp = jsString.find('"')
if -1 != nIdxTmp:
jsString = jsString[nIdxTmp:]
nIdxRet = nLenString - len(jsString)
jsStringBak = jsString
else:
nIdxTmp = jsString.find('}')
if -1 != nIdxTmp:
jsString = jsString[nIdxTmp + 1:]
jsStringBak = jsString
nIdxTmp = 0
break
else:
#其他情况解析为数据,如果无法解析,则json格式错了
strNum = jsString.strip()
matchRes = re.match(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', strNum, re.M | re.I)
if matchRes:
bNtf = True
try:
strNum = matchRes.group()
try:
self.dictResult[self.jsEleName] = int(strNum)
except ValueError:
self.dictResult[self.jsEleName] = float(strNum)#指数可能会失败,如果精度要求太高的话
self.dictResult[self.jsEleName+'String'] = strNum
nIdxRet = nIdxRet + jsString.find(strNum) + len(strNum)
nIdxTmp = len(strNum)
jsStringBak = jsString[nIdxRet:]
except ValueError:
raise ParseException('ElementParse json error 5|||' + jsString[0:20])
else:
raise ParseException('ElementParse json error 6|||' + jsString[0:20])
return len(jsStringBak), bEndObject
# 类型名失败测试
# 'us%"e\\"r\\\\%c Pho\'n:e": \"13030\"rc Pho\'n:303131\"'
# 类型名成功用力
# 'us%e\\"r\\\\%c Pho\'n:e": \"13030\"rc Pho\'n:303131\"'
# ‘userName": "张三李四王麻子",’
# 值失败测试
# '13030\"rc Pho\'n:303131"sdf,'
# '张三李四王麻子" qw}'
# 值成功测试
# '13030\"rc Pho\'n:303131",'
# ‘张三李四王麻子"}’
def ElementStringParse(self, jsString, bVal = False, bNormalString = False):
nRet = 0#返回解析完成之后开始继续遍历字符串的位置
strRet = ''#返回字符串值
ch = ''#临时字符
bEnd = False#字符串是否结束了
bEndObject = False#当前object是否结束
nLenString = len(jsString)
for n in range(nLenString):
try:
ch = jsString[n]
except ValueError:
raise ParseException('ElementStringParse visit overstep the boundary 1|||' + jsString[n:n+20])
nRet = n + 1
if not bEnd:
if ch == '\\':
chNext = ''
chPre = ''
try:
chNext = jsString[n+1]
if n > 0:
chPre = jsString[n-1]
except ValueError:
raise ParseException('ElementStringParse visit overstep the boundary 2|||' + jsString[n:n+20])
if chNext != '"' and chNext != '\\' and chPre != '\\':
raise ParseException('ElementStringParse json error 1|||' + jsString[n:n+20])
strRet = strRet + ch
else:
if ch == '"':
if not (n > 0 and jsString[n-1] == '\\'):
bEnd = True # 找到结束”
if bNormalString:
break
continue
strRet = strRet + ch
else:
if not bVal:
if ch != ' ' and ch != '\t' and ch != ':': # "到:之间有其他字符
raise ParseException('ElementStringParse json error 2|||' + jsString[n:n+20])
else:
if ch == ':': # //字段名结尾是:
break
else:
if ch != ' ' and ch != '\t' and ch != ',' and ch != '}': # "到,或者}之间有其他字符
raise ParseException('ElementStringParse json error 3|||' + jsString[n:n+20])
else:
if ch == ',' or ch == '}': # 值结尾可能是,也可能是}
if ch == '}':
bEndObject = True
break
return strRet, nLenString-nRet+1, bEndObject
# 校验空白字符是否正确
def CheckBlankByte(self, jsString, nCheckPos):
if 0 == jsString.find('666'):
jsString = jsString
chTmp = ''
bEndObject = False
bCorrectFormat = True
for n in range(len(jsString)):
chTmp = jsString[n]
if nCheckPos == 0:#[到{之间
if chTmp != '{' and chTmp != ' ' and chTmp != '\t':
bCorrectFormat = False
break
if chTmp == '{':
break
elif nCheckPos == 1 or nCheckPos == 3:#1、{到"之间 3、,到“之间
if chTmp != '"' and chTmp != ' ' and chTmp != '\t':
bCorrectFormat = False
break
if chTmp == '"':
break
elif nCheckPos == 2:#:到数值之间,可能是"也可能是{
if chTmp != '[' and chTmp != '{' and chTmp != '"' and chTmp != ' ' and chTmp != '\t':
bCorrectFormat = False
break
if chTmp == '[' or chTmp == '{' or chTmp == '"':
break
elif nCheckPos == 4:#子object结束,可能是,也可能是}
if chTmp != ',' and chTmp != '}' and chTmp != ' ' and chTmp != '\t':
bCorrectFormat = False
break
if chTmp == ',' or chTmp == '}':
if chTmp == '}':
bEndObject = True
break
elif nCheckPos == 5:#array节点结束,可能是,也可能是]
if chTmp != ',' and chTmp != ']' and chTmp != ' ' and chTmp != '\t':
bCorrectFormat = False
break
if chTmp == ',' or chTmp == ']':
break
elif nCheckPos == 6: # array节点结束到下一个节点开始只能是,到{
if chTmp != ',' and chTmp != '{' and chTmp != ' ' and chTmp != '\t':
bCorrectFormat = False
break
if chTmp == ',' or chTmp == '{':
break
elif nCheckPos == 7:
if chTmp != ']' and chTmp != ' ' and chTmp != '\t':
bCorrectFormat = False
break
if chTmp == ']':
bEndObject = True
break
return bCorrectFormat, bEndObject
def printSingleObject(obj):
print repr(obj)
def printObject(obj, nTab):
print '\t' * (nTab-1), '{'
for i in obj.dictResult:
if isinstance(obj.dictResult[i], list):
printList(obj.dictResult[i], nTab + 1, i)
elif isinstance(obj.dictResult[i], JsonHelper):
printObject(obj.dictResult[i], nTab + 1)
else:
print '\t' * nTab, '"{}":\t{}'.format(i, repr(obj.dictResult[i]))
print '\t' * (nTab - 1), '}'
def printList(lst, nTab, strEleName = ''):
if 0==len(strEleName):
strEleName = ''
else:
strEleName = '"{}":'.format(strEleName)
print '\t' * (nTab - 1), strEleName, '['
for i in lst:
if isinstance(i, list):
printList(i, nTab + 1)
elif isinstance(i, JsonHelper):
printObject(i, nTab + 1)
print '\t' * (nTab - 1), ']'
if __name__ == "__main__":
try:
str = ' [{\
"userId": "111111111111",\
"userName": "张三李四王麻子",\
"us%e\\"r\\\\%c Pho\'n:e": "13030\\"rc Pho\'n:303131",\
"userFollow": 86,\
"userFuns": -2e-5,\
"userFavorite": true,\
"userEBuy": null,\
"userComment": "999",\
"userNews": "3",\
"userPoints": "3",\
"userOrder": "8",\
"userLocation": "5",\
"isSeller": "1",\
"aaa": {\
"userId": "2222222222222",\
"userPhone": "13030303131",\
"userFollow": "86",\
"userFuns": "5125",\
"userFavorite": "615",\
"userEBuy": "103",\
"userComment": "999",\
"userNews": "3",\
"userPoints": "3",\
"userOrder": "8",\
"userLocation": "5",\
"isSeller": "1",\
"bbb": {\
"userId": "333333333333",\
"userName": "张三李四王麻子",\
"userPhone": "1303\\"0303131",\
"userFollow": 86,\
"userFuns": 66.66,\
"userFavorite": true,\
"userEBuy": null,\
"userC\\"omment": "999",\
"userNews": "3",\
"userPoints": "3",\
"userOrder": "8",\
"userLocation": "5",\
"isSeller": "1"\
},\
"ccc": {\
"userId": "444444444444",\
"userName": "张三李四王麻子",\
"userPhone": "13030303131",\
"userFollow": "86",\
"userFuns": "5125",\
"userFavorite": "615",\
"userEBuy": "103",\
"userComment": "999",\
"userNews": "3",\
"userPoints": "3",\
"userOrder": "8",\
"userLocation": "5",\
"isSeller": "1",\
"sdfsdf": {\
"Name": "55555555555555",\
"X": "85",\
"Y": "78",\
"Z": "10"\
}\
}\
}\
},\
{\
"userId": "66666666666666",\
"userName": "张三李四王麻子",\
"userPhone": "13030303131",\
"userFollow": "86",\
"userFuns": "5125",\
"userFavorite": "615",\
"userEBuy": "103",\
"userComment": "999",\
"userNews": "3",\
"userPoints": "3",\
"userOrder": "8",\
"userLocation": "5",\
"isSeller": "1",\
"Values": [{\
"Name": "7777777777777777",\
"X": "85",\
"Y": "78",\
"Z": "10"\
},\
{\
"Name": "8888888888888888",\
"X": "85",\
"Y": "78",\
"Z": "10"\
}\
]\
}\
]'
# str = ' [ {\
# "Name": "8888888888888888",\
# "X": "85",\
# "Y": "78",\
# "Z": null\
# }]'
# str = ' {"bbb": {\
# "userId": "333333333333",\
# "userName": "张三李四王麻子",\
# "userPhone": "1303\\"0303131",\
# "userFollow": 86,\
# "userFuns": 66.66,\
# "userFavorite": true,\
# "userEBuy": null,\
# "userC\\"omment": "999",\
# "userNews": "3",\
# "userPoints": "3",\
# "userOrder": "8",\
# "userLocation": "5",\
# "isSeller": "1"\
# },\
# "ccc": {\
# "userId": "444444444444",\
# "userName": "张三李四王麻子",\
# "userPhone": "13030303131",\
# "userFollow": "86",\
# "userFuns": "5125",\
# "userFavorite": "615",\
# "userEBuy": "103",\
# "userComment": "999",\
# "userNews": "3",\
# "userPoints": "3",\
# "userOrder": "8",\
# "userLocation": "5",\
# "isSeller": "1",\
# "sdfsdf": {\
# "Name": "55555555555555",\
# "X": "85",\
# "Y": "78",\
# "Z": "10"\
# }\
# }}'
# str = '[{\
# "Name": "7777777777777777",\
# "w": false,\
# "X": 85,\
# "Y": 78.66,\
# "Z": "10",\
# "we": null,\
# "us%e\\"r\\\\%c Pho\'n:e": "13030\\"rc Pho\'n:303131"\
# },\
# {\
# "Name": "8888888888888888",\
# "q": "85",\
# "w": "78",\
# "e": "10",\
# "sdfsdf": {\
# "Name": "9999999999999",\
# "a": "85",\
# "b": "78",\
# "c": "10"\
# }\
# }\
# ]'
# str = '{\
# "Name": "7777777777777777",\
# "w": false,\
# "X": 85,\
# "Y": 78.66,\
# "Z": "10",\
# "we": null,\
# "us%e\\"r\\\\%c Pho\'n:e": "13030\\"rc Pho\'n:303131"\
# }'
# str = '666666'
jsObj = JsonHelper()
jsObj.JsonParse(str)
if jsObj.jsObjectType == JsonBaseType.object_type:
printObject(jsObj.dictResult[''], 1)
elif jsObj.jsObjectType == JsonBaseType.array_type:
printList(jsObj.dictResult[''], 1)
else:
printSingleObject(jsObj.dictResult[''])
print('json parsed success.')
except ParseException as e:
print(e)
print('json parsed failed.')
# debug(21361583)
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)