Java 解析http返回的xml数据,写成txt文件
需求:
每小时抓取给定api接口返回的xml数据,把xml数据保存为XML文件;把xml数据转换txt文件格式数据,保存txt文件。
文件名以yyyyMMddHH0000.txt和yyyyMMddHH0000.xml方式命名,如20180703090000.xml,表示2018年7月3日上午9时下载的数据
api说明:
GetLastHoursData 获取任意小时小时数据 请求案例:
http://59.172.208.250:8001/AppServer/PublishData.asmx/GetLastHoursData?UsrName={账号}&passWord={密码}&date=2017-11-13%2012:00:00
返回数据格式:
<ArrayOfLt_HourAqiModel xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://tempuri.org/">
<Lt_HourAqiModel>
<StationName>刘家沟</StationName>
<UniqueCode>420300052</UniqueCode>
<QueryTime>2018-07-03 09:00:00</QueryTime>
<PM25OneHour>NA</PM25OneHour>
<PM10OneHour>NA</PM10OneHour>
<SO2OneHour>NA</SO2OneHour>
<NO2OneHour>NA</NO2OneHour>
<COOneHour>NA</COOneHour>
<O3OneHour>NA</O3OneHour>
<AQI>NA</AQI>
<PrimaryEP />
<AQDegree />
<AQType />
</Lt_HourAqiModel>
<Lt_HourAqiModel>
<StationName>大冶市</StationName>
<UniqueCode>420200402</UniqueCode>
<QueryTime>2018-07-03 09:00:00</QueryTime>
<PM25OneHour>NA</PM25OneHour>
<PM10OneHour>NA</PM10OneHour>
<SO2OneHour>NA</SO2OneHour>
<NO2OneHour>NA</NO2OneHour>
<COOneHour>NA</COOneHour>
<O3OneHour>NA</O3OneHour>
<AQI>NA</AQI>
<PrimaryEP />
<AQDegree />
<AQType />
</Lt_HourAqiModel>
</ArrayOfLt_HourAqiModel>
第一步
编写配置文件url.xml
<?xml version="1.0" encoding="UTF-8"?>
<!-- http://59.172.208.250:8001/AppServer/PublishData.asmx/ GetLastHoursData?
UsrName={账号}&passWord={密码}&date=2017-11-13%2012:00:00 -->
<pm25>
<baseUrl>http://59.172.208.250:8001/AppServer/PublishData.asmx/
</baseUrl>
<method>GetLastHoursData
</method>
<UsrName>a******n
</UsrName>
<passWord>I*****XWO
</passWord>
<xmlSavePath>K:\baidudownload\</xmlSavePath>
<txtSavePath>K:\baidudownload\</txtSavePath>
</pm25>
导入日志记录jar包,记录每小时的流程
1.log4j-api-2.3.jar
2.log4j-core-2.3.jar
第二步:解析下载所需要的配置xml文件
1.获取url链接
2.获取xml文件的存放路径
3.获取txt文件的存放路径
package cn.whu.edu.sendimage.pm25.spider;
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
public class XMLReaderUtil {
private static Document document;
private static final Logger logger = LogManager.getLogger("XMLReaderUtil.class");
public XMLReaderUtil(String path) throws SAXException, IOException, ParserConfigurationException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder db = factory.newDocumentBuilder();
XMLReaderUtil.document = db.parse(path);
}
// 拼接url路径,时间从每小时从系统时间触发获得
public String getUrl() throws Exception {
StringBuilder sb = new StringBuilder();
String baseurl = document.getElementsByTagName("baseUrl").item(0).getFirstChild().getNodeValue().trim();
sb.append(baseurl);
String method = document.getElementsByTagName("method").item(0).getFirstChild().getNodeValue().trim();
sb.append(method);
sb.append("?");
String UsrName = document.getElementsByTagName("UsrName").item(0).getFirstChild().getNodeValue().trim();
sb.append("UsrName=" + UsrName);
sb.append("&");
String passWord = document.getElementsByTagName("passWord").item(0).getFirstChild().getNodeValue().trim();
sb.append("passWord=" + passWord);
logger.info(sb.toString());
return sb.toString();
}
// 获取xml存放路径
public String getXmlSavePath() throws Exception {
String xmlSavePath = document.getElementsByTagName("xmlSavePath").item(0).getFirstChild().getNodeValue().trim();
logger.info(xmlSavePath);
return xmlSavePath;
}
// 获取txt存放路径
public String getTxtSavePath() {
String txtSavePath = document.getElementsByTagName("txtSavePath").item(0).getFirstChild().getNodeValue().trim();
logger.info(txtSavePath);
return txtSavePath;
}
public static void main(String[] args) throws Exception {
XMLReaderUtil reader = new XMLReaderUtil("url2.xml");
String url = reader.getUrl();
String xmlpath = reader.getXmlSavePath();
String txtpath = reader.getTxtSavePath();
System.out.println(url);
System.out.println(xmlpath);
System.out.println(txtpath);
}
}
第三步:爬取一个url的数据
导入httpClient包
httpClient下载链接:http://hc.apache.org/downloads.cgi
获取http数据写入xml文件
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import org.apache.http.HttpEntity;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClients;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
public class Spider {
private static final Logger logger = LogManager.getLogger("Spider.class");
public static void start() throws Exception {
// 读取xml,
XMLReaderUtil util = null;
try {
util = new XMLReaderUtil("url2.xml");
} catch (Exception e) {
e.printStackTrace();
}
String url = util.getUrl();
String xmlpath = util.getXmlSavePath();
String txtpath = util.getTxtSavePath();
String time = getBeijingTimeStr();
//写入xml文件
getHttpJson(url, getUrlDate(), xmlpath);
// xml变为txt
Xml2Txt xml2txt = new Xml2Txt(xmlpath + time + ".xml", txtpath + time + ".txt");
xml2txt.start();
}
//url中需要的时间格式
private static String getUrlDate() {
Calendar cal = Calendar.getInstance();
// date=2017-11-13%2012:00:00
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd%20HH:00:00");
String dateString = sdf.format(cal.getTime());
return dateString;
}
//拼接url需要的时间。下载数据放入其中xml文件中
private static void getHttpJson(String url, String time, String path) throws InterruptedException {
logger.info("getHttpJson========================================");
String urlTail = "&date=" + time;
// 建立一个新的请求客户端
HttpClient httpClient = HttpClients.createDefault();
HttpGet httpGet = null;
// 获取网址的返回结果
CloseableHttpResponse response = null;
System.out.println(url + urlTail);
httpGet = new HttpGet(url + urlTail);
try {
response = (CloseableHttpResponse) httpClient.execute(httpGet);
// System.out.println(response);
} catch (IOException e) {
e.printStackTrace();
}
// 获取返回结果中的实体
HttpEntity entity = response.getEntity();
try {
// EntityUtils.toString(entity);
InputStream in = entity.getContent();
//写入xml文件
File file = new File(path + getBeijingTimeStr() + ".xml");
if (!file.exists()) {
file.createNewFile();
}
try {
FileOutputStream fout = new FileOutputStream(file);
int l = -1;
byte[] tmp = new byte[1024];
while ((l = in.read(tmp)) != -1) {
fout.write(tmp, 0, l);
// 注意这里如果用OutputStream.write(buff)的话,图片会失真,大家可以试试
}
fout.flush();
fout.close();
} finally {
// 关闭低层流。
in.close();
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//文件名的前缀
private static String getBeijingTimeStr() {
StringBuffer BeijingTimeBuffer = new StringBuffer();
// 1、取得本地时间:
Calendar c1 = Calendar.getInstance();
// 获得年份
int year = c1.get(Calendar.YEAR);
// 获得月份
int month = c1.get(Calendar.MONTH) + 1;
// 获得日期
int day = c1.get(Calendar.DATE);
// 获得小时
int hour = c1.get(Calendar.HOUR_OF_DAY);
BeijingTimeBuffer.append(year).append(String.format("%02d", month)).append(String.format("%02d", day));
BeijingTimeBuffer.append(String.format("%02d", hour)).append("00").append("00");
return BeijingTimeBuffer.toString();
}
public static void main(String[] args) throws Exception {
start();
}
}
第四步:解析下载的xml文件,写成txt
使用xstream包
下载xstream包链接:http://x-stream.github.io/
导入以下三个包:
1.xstream-1.4.10.jar
2.xpp3_min-1.1.4c.jar
3.xmlpull-1.1.3.1.jar
根据xml标签创建类:ArrayOfLt_HourAqiModel;Lt_HourAqiModel
1.ArrayOfLt_HourAqiModel类:
import java.util.List;
public class ArrayOfLt_HourAqiModel {
private List<Lt_HourAqiModel> Lt_HourAqiModel;
public void add(Lt_HourAqiModel lt) {
Lt_HourAqiModel.add(lt);
}
public List<Lt_HourAqiModel> getContent() {
return Lt_HourAqiModel;
}
}
2.Lt_HourAqiModel类
public class Lt_HourAqiModel {
// <Lt_HourAqiModel>
// <StationName>刘家沟</StationName>
// <UniqueCode>420300052</UniqueCode>
// <QueryTime>2018-07-02 16:00:00</QueryTime>
// <PM25OneHour>NA</PM25OneHour>
// <PM10OneHour>NA</PM10OneHour>
// <SO2OneHour>NA</SO2OneHour>
// <NO2OneHour>NA</NO2OneHour>
// <COOneHour>NA</COOneHour>
// <O3OneHour>NA</O3OneHour>
// <AQI>NA</AQI>
// <PrimaryEP/>
// <AQDegree/>
// <AQType/>
// </Lt_HourAqiModel>
private String StationName;
private String UniqueCode;
private String QueryTime;
private String PM25OneHour;
private String PM10OneHour;
private String SO2OneHour;
private String NO2OneHour;
private String COOneHour;
private String O3OneHour;
private String AQI;
private String PrimaryEP;
private String AQDegree;
private String AQType;
@Override
public String toString() {
return "Lt_HourAqiModel [StationName=" + StationName + ", UniqueCode=" + UniqueCode + ", QueryTime=" + QueryTime
+ ", PM25OneHour=" + PM25OneHour + ", PM10OneHour=" + PM10OneHour + ", SO2OneHour=" + SO2OneHour
+ ", NO2OneHour=" + NO2OneHour + ", COOneHour=" + COOneHour + ", O3OneHour=" + O3OneHour + ", AQI="
+ AQI + ", PrimaryEP=" + PrimaryEP + ", AQDegree=" + AQDegree + ", AQType=" + AQType + "]";
}
public String getStationName() {
return StationName;
}
public void setStationName(String stationName) {
StationName = stationName;
}
public String getUniqueCode() {
return UniqueCode;
}
public void setUniqueCode(String uniqueCode) {
UniqueCode = uniqueCode;
}
public String getQueryTime() {
return QueryTime;
}
public void setQueryTime(String queryTime) {
QueryTime = queryTime;
}
public String getPM25OneHour() {
return PM25OneHour;
}
public void setPM25OneHour(String pM25OneHour) {
PM25OneHour = pM25OneHour;
}
public String getPM10OneHour() {
return PM10OneHour;
}
public void setPM10OneHour(String pM10OneHour) {
PM10OneHour = pM10OneHour;
}
public String getSO2OneHour() {
return SO2OneHour;
}
public void setSO2OneHour(String sO2OneHour) {
SO2OneHour = sO2OneHour;
}
public String getNO2OneHour() {
return NO2OneHour;
}
public void setNO2OneHour(String nO2OneHour) {
NO2OneHour = nO2OneHour;
}
public String getCOOneHour() {
return COOneHour;
}
public void setCOOneHour(String cOOneHour) {
COOneHour = cOOneHour;
}
public String getO3OneHour() {
return O3OneHour;
}
public void setO3OneHour(String o3OneHour) {
O3OneHour = o3OneHour;
}
public String getAQI() {
return AQI;
}
public void setAQI(String aQI) {
AQI = aQI;
}
public String getPrimaryEP() {
return PrimaryEP;
}
public void setPrimaryEP(String primaryEP) {
PrimaryEP = primaryEP;
}
public String getAQDegree() {
return AQDegree;
}
public void setAQDegree(String aQDegree) {
AQDegree = aQDegree;
}
public String getAQType() {
return AQType;
}
public void setAQType(String aQType) {
AQType = aQType;
}
}
从xm文件中转换为对象,并写入txt
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.List;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.thoughtworks.xstream.XStream;
public class Xml2Txt {
private static final Logger logger = LogManager.getLogger("Xml2Txt.class");
private String xmlPath;
private String txtPath;
public Xml2Txt(String xmlPath, String txtPath) {
super();
this.xmlPath = xmlPath;
this.txtPath = txtPath;
}
public String getXmlPath() {
return xmlPath;
}
public void setXmlPath(String xmlPath) {
this.xmlPath = xmlPath;
}
public String getTxtPath() {
return txtPath;
}
public void setTxtPath(String txtPath) {
this.txtPath = txtPath;
}
//从xml文件获取数据转换为对象列表
public static List<Lt_HourAqiModel> getXmlDataList(String xmlPath) {
logger.info("======getXmlDataList========");
logger.info("xmlPath:"+xmlPath);
XStream xstream = new XStream();
xstream.alias("ArrayOfLt_HourAqiModel", ArrayOfLt_HourAqiModel.class);
xstream.alias("Lt_HourAqiModel", Lt_HourAqiModel.class);
xstream.addImplicitCollection(ArrayOfLt_HourAqiModel.class, "Lt_HourAqiModel");
ArrayOfLt_HourAqiModel ArrayOfLt = (ArrayOfLt_HourAqiModel) xstream.fromXML(new File(xmlPath));
logger.info(ArrayOfLt.getContent());
return (ArrayOfLt.getContent());
}
//把对象列表写入txt
public static void WriteStringToTxtFile(List<Lt_HourAqiModel> list, String txtPath) throws IOException {
logger.info("================WriteListToTxtFile========================");
// System.out.println(json);
logger.info("写入地址:" + txtPath);
File file = new File(txtPath);
if (!file.exists()) {
file.createNewFile();
}
if (list == null||list.size()==0) {
logger.info("list size is null or size is 0");
return;
} else {
FileWriter out = new FileWriter(file, true); // 文件写入流
// 判断文件是都为空,如果不为空,说明有列标题,那么直接写入数组。如果为空,则写入列标题
if (file.exists() && file.length() != 0) {
for (Lt_HourAqiModel lt : list) {
out.write(lt.getStationName() + "\t");
out.write(lt.getUniqueCode() + "\t");
out.write(lt.getQueryTime() + "\t");
out.write(lt.getPM25OneHour() + "\t");
out.write(lt.getNO2OneHour() + "\t");
out.write(lt.getCOOneHour() + "\t");
out.write(lt.getO3OneHour() + "\t");
out.write(lt.getAQI() + "\t");
out.write(lt.getPrimaryEP() + "\t");
out.write(lt.getAQDegree() + "\t");
out.write(lt.getAQType() + "\t");
out.write("\r\n");
}
} else if (file.exists() && file.length() == 0) {
out.write("StationName" + "\t" + "UniqueCode" + "\t" + "QueryTime" + "\t" + "PM25OneHour" + "\t"
+ "PM10OneHour" + "\t" + "SO2OneHour" + "\t" + "NO2OneHour" + "\t" + "COOneHour" + "\t"
+ "COOneHour" + "\t" + "O3OneHour" + "\t" + "AQI" + "\t" + "PrimaryEP" + "\t" + "AQDegree"
+ "\t" + "AQType" + "\t\r\n");
for (Lt_HourAqiModel lt : list) {
// <Lt_HourAqiModel>
// <StationName>刘家沟</StationName>
// <UniqueCode>420300052</UniqueCode>
// <QueryTime>2018-07-02 16:00:00</QueryTime>
// <PM25OneHour>NA</PM25OneHour>
// <PM10OneHour>NA</PM10OneHour>
// <SO2OneHour>NA</SO2OneHour>
// <NO2OneHour>NA</NO2OneHour>
// <COOneHour>NA</COOneHour>
// <O3OneHour>NA</O3OneHour>
// <AQI>NA</AQI>
// <PrimaryEP/>
// <AQDegree/>
// <AQType/>
// </Lt_HourAqiModel>
out.write(lt.getStationName() + "\t");
out.write(lt.getUniqueCode() + "\t");
out.write(lt.getQueryTime() + "\t");
out.write(lt.getPM25OneHour() + "\t");
out.write(lt.getNO2OneHour() + "\t");
out.write(lt.getCOOneHour() + "\t");
out.write(lt.getO3OneHour() + "\t");
out.write(lt.getAQI() + "\t");
out.write(lt.getPrimaryEP() + "\t");
out.write(lt.getAQDegree() + "\t");
out.write(lt.getAQType() + "\t");
out.write("\r\n");
}
}
out.close();
}
}
public void start() throws IOException {
List<Lt_HourAqiModel> list = getXmlDataList(this.xmlPath);
logger.info(list);
WriteStringToTxtFile(list, this.txtPath);
}
}
第五步:定时任务调度
每小时下载一次
创建任务
import java.util.Date;
import java.util.TimerTask;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
public class Tasker extends TimerTask{
private static final Logger logger = LogManager.getLogger("TimerTask.class");
@Override
public void run() {
logger.info("============开始执行"+new Date()+"===================");
try {
Spider.start();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
调度程序
import java.util.Timer;
public class PMTimer {
public static void main(String[] args) {
Timer timer = new Timer();
Tasker tasker = new Tasker();
timer.schedule(tasker, 0L, 60*60*1000L);
}
}