方法1:如果df里只有一列json格式,可以保存为txt,然后再删掉列名,在进行处理。
import pandas as pd
result = []
with open(r"C:\Users\Administrator\Desktop\json处理.txt") as f:
for line in f:
print(line) # txt样子
line = line.replace('""','"')
line = line.replace('"{','{')
line = line.replace('}"','}')
# print(line) # 处理后的line
dict_o = eval(line.strip()) #eval:将字符串字典转换为字典,strip:将某行字的两端空格去掉
result.append(dict_o)
df = pd.DataFrame(result)
df
txt 内容
{""imei"":""5gy5"",""deviceid"":""c5ty7890h"",""accid"":""null""}
{"imei":"5gy5","deviceid":"c5ty7890h","accid":"null"}
{""imei"":""5rrr5"",""createtime"":""c5ty7890h"",""accid"":""null""}
{"imei":"5rrr5","createtime":"c5ty7890h","accid":"null"}
{""appid"":""5eeey5"",""deviceid"":""c5ty7890h"",""createtime"":""c5ty7890h"",""accid"":""null""}
{"appid":"5eeey5","deviceid":"c5ty7890h","createtime":"c5ty7890h","accid":"null"}
{""imei"":""null"",""deviceid"":""c544ty7890h"",""accid"":""null""}
{"imei":"null","deviceid":"c544ty7890h","accid":"null"}
结果输出:
|
imei |
deviceid |
accid |
createtime |
appid |
0 |
5gy5 |
c5ty7890h |
null |
NaN |
NaN |
1 |
5gy5 |
c5ty7890h |
null |
NaN |
NaN |
2 |
5rrr5 |
NaN |
null |
c5ty7890h |
NaN |
3 |
5rrr5 |
NaN |
null |
c5ty7890h |
NaN |
4 |
NaN |
c5ty7890h |
null |
c5ty7890h |
5eeey5 |
5 |
NaN |
c5ty7890h |
null |
c5ty7890h |
5eeey5 |
6 |
null |
c544ty7890h |
null |
NaN |
NaN |
7 |
null |
c544ty7890h |
null |
NaN |
NaN |
方法2
import json
import pandas as pd
df = pd.read_excel(r"C:\Users\Administrator\Desktop\json处理.xlsx")
print(df)
data = df.to_dict(orient='records')
for _ in data:
_.update(json.loads(_['json']))
del _['json']
df1 = pd.DataFrame(data)
df1
原表
城市 json
0 重庆 {"imei":"5gy5","deviceid":"c5ty7890h","accid":...
1 北京 {"imei":"5gy5","deviceid":"c5ty7890h","accid":...
2 上海 {"imei":"5rrr5","createtime":"c5ty7890h","acci...
3 广州 {"imei":"5rrr5","createtime":"c5ty7890h","acci...
4 深圳 {"appid":"5eeey5","deviceid":"c5ty7890h","crea...
5 地球 {"appid":"5eeey5","deviceid":"c5ty7890h","crea...
6 火星 {"imei":"null","deviceid":"c544ty7890h","accid...
7 太阳星 {"imei":"null","deviceid":"c544ty7890h","accid...
结果输出:
|
城市 |
imei |
deviceid |
accid |
createtime |
appid |
0 |
重庆 |
5gy5 |
c5ty7890h |
null |
NaN |
NaN |
1 |
北京 |
5gy5 |
c5ty7890h |
null |
NaN |
NaN |
2 |
上海 |
5rrr5 |
NaN |
null |
c5ty7890h |
NaN |
3 |
广州 |
5rrr5 |
NaN |
null |
c5ty7890h |
NaN |
4 |
深圳 |
NaN |
c5ty7890h |
null |
c5ty7890h |
5eeey5 |
5 |
地球 |
NaN |
c5ty7890h |
null |
c5ty7890h |
5eeey5 |
6 |
火星 |
null |
c544ty7890h |
null |
NaN |
NaN |
7 |
太阳星 |
null |
c544ty7890h |
null |
NaN |
NaN |
方法3,取json的固定字段
import json
import pandas as pd
df = pd.read_excel(r"C:\Users\Administrator\Desktop\json处理.xlsx")
print(df)
tmp=[]
for line in df.json.values:
# d=eval(line)[0]
d=eval(line)
tmp.append(d.get('imei') or d.get('deviceid'))
# tmp.append(d.get('xx')['yy']) #如果是嵌套字典,就用此方法
df['结果列']=tmp
df
原表
城市 json
0 重庆 {"imei":"5gy5","deviceid":"c5ty7890h","accid":...
1 北京 {"imei":"5gy5","deviceid":"c5ty7890h","accid":...
2 上海 {"imei":"5rrr5","createtime":"c5ty7890h","acci...
3 广州 {"imei":"5rrr5","createtime":"c5ty7890h","acci...
4 深圳 {"appid":"5eeey5","deviceid":"c5ty7890h","crea...
5 地球 {"appid":"5eeey5","deviceid":"c5ty7890h","crea...
6 火星 {"imei":"null","deviceid":"c544ty7890h","accid...
7 太阳星 {"imei":"null","deviceid":"c544ty7890h","accid...
结果输出:
|
城市 |
json |
结果列 |
0 |
重庆 |
{"imei":"5gy5","deviceid":"c5ty7890h","accid":... |
5gy5 |
1 |
北京 |
{"imei":"5gy5","deviceid":"c5ty7890h","accid":... |
5gy5 |
2 |
上海 |
{"imei":"5rrr5","createtime":"c5ty7890h","acci... |
5rrr5 |
3 |
广州 |
{"imei":"5rrr5","createtime":"c5ty7890h","acci... |
5rrr5 |
4 |
深圳 |
{"appid":"5eeey5","deviceid":"c5ty7890h","crea... |
c5ty7890h |
5 |
地球 |
{"appid":"5eeey5","deviceid":"c5ty7890h","crea... |
c5ty7890h |
6 |
火星 |
{"imei":"null","deviceid":"c544ty7890h","accid... |
null |
7 |
太阳星 |
{"imei":"null","deviceid":"c544ty7890h","accid... |
null |