说明
dbnet代码选自https://github.com/WenmuZhou/DBNet.pytorch,训练时数据集信息以json导入,但是官方ICPAR2015数据集下载数据是txt格式因此会出现错误:FileNotFoundError: [Errno 2] No such file or directory: ‘datasets\train.json’ 因此进行修改
json格式
从该博主找到json数据格式http://t.csdn.cn/ZJRog。写一个脚本实现制作该json数据集
代码
代码改编自http://t.csdn.cn/cxApa
#coding:utf-8
#coding:utf-8
import json
import os
def writeToJson(filePath,data):
fb = open(filePath,'w',encoding='utf-8')
# json.dumps(data).decode('unicode-escape')
fb.write(json.dumps(data,indent=2,ensure_ascii=False)) # ,encoding='utf-8'
fb.close()
def txt2json(imagePath,txtPath,savePath):
txtName = os.listdir(txtPath)
imgName = os.listdir(imagePath)
content = {}
datalist=[]
for i in range(len(txtName)):
name=txtName[i]
#print(i)
with open(txtPath+'/'+name,'r', encoding='UTF-8-sig') as f:
content1={}
line = f.readlines() #忽略/n
content1['img_name'] = imgName[i]
txt=[]
for j in range(len(line)):
content2={}
list = line[j].splitlines()
list = list[0].split(',')
#print([int(list[0]),int(list[1])])
xy=[]
for k in [0,2,4,6]:
xy.append([int(list[k]),int(list[k+1])])
content2['polygon']=xy
content2['text']=list[8]
if list[8]=='###':
content2['illegibility']=True
else:
content2['illegibility']=False
content2['language']="Latin"
#print(content2)
txt.append(content2)
content2['chars']=[
{
"polygon": [],
"char": "",
"illegibility": False,
"language": "Latin"
}
]
content1['annotations'] = txt
datalist.append(content1)
content['data_root']="C:\\Users\\Administrator\\Desktop\\DBNet.pytorch-master\\datasets\\train\\img"
content['data_list']=datalist
#print(content)
writeToJson(savePath, content)
if __name__=='__main__':
imagePath=r'C:\Users\Administrator\Desktop\DBNet.pytorch-master\datasets\train\img'
txtPath=r'C:\Users\Administrator\Desktop\DBNet.pytorch-master\datasets\train\gt'
savePath=r'C:\Users\Administrator\Desktop\DBNet.pytorch-master\datasets\train.json'
txt2json(imagePath,txtPath,savePath)