前言:
关系型数据库要符合第一范式即原子性,因此字段多值情况只能分行处理,如下表,假设keys是terms、appl_dt,则no, predict_pay_dt, actual_pay_dt 是多值
如果要转换成NoSQL或collection对象,需要以keys分组,然后列转成行(或列转成collection)。
ETL开源工具Kettle也有行扁平化,不过不能分组进行,功能有限。
现以Java实现,示例:
public static List<Fpd> columnToLine(Connection conn) {
List<Fpd> fpdList = new ArrayList<Fpd>();
Fpd preFpd = new Fpd();
Fpd fpd = new Fpd();
List<Integer> preAgrList = new ArrayList<Integer>();
List<Integer> agrList = new ArrayList<Integer>();
List<Integer> preDefList = new ArrayList<Integer>();
List<Integer> defList = new ArrayList<Integer>();
List<Integer> preDefDaysList = new ArrayList<Integer>();
List<Integer> defDaysList = new ArrayList<Integer>();
ResultSet rs = selectValue(conn);
try {
while(rs.next()){
// 1. copy current line to previous line
try {
preFpd = DeepCopy.copy(fpd);
preAgrList = DeepCopy.copy(agrList);
preDefList = DeepCopy.copy(defList);
preDefDaysList = DeepCopy.copy(defDaysList);
} catch (Exception e) {
e.printStackTrace();
}
// 2. assign current line
fpd = new Fpd();
fpd.setMobileNo(rs.getString("mobile_no"));
fpd.setName(rs.getString("name"));
fpd.setLoanAmount(rs.getInt("loan_amount"));
fpd.setTerms(rs.getInt("terms"));
fpd.setApplDt(rs.getString("appl_dt"));
fpd.setZhangdanNo(rs.getInt("zhangdan_no"));
agrList.add(rs.getInt("ARG"));
defList.add(rs.getInt("DEF"));
defDaysList.add(rs.getInt("DEF_days"));
// 3. if group by keys are different, update record
if (preFpd.getMobileNo() != null && !fpd.keysEquals(preFpd)) {
preFpd.setAgr(preAgrList);
preFpd.setDef(preDefList);
preFpd.setDefDays(preDefDaysList);
fpdList.add(preFpd);
agrList.clear();
defList.clear();
defDaysList.clear();
agrList.add(rs.getInt("ARG"));
defList.add(rs.getInt("DEF"));
defDaysList.add(rs.getInt("DEF_days"));
}
}
// 4. final process (copy current line & update record)
{
try {
preFpd = DeepCopy.copy(fpd);
preAgrList = DeepCopy.copy(agrList);
preDefList = DeepCopy.copy(defList);
preDefDaysList = DeepCopy.copy(defDaysList);
} catch (Exception e) {
e.printStackTrace();
}
preFpd.setAgr(preAgrList);
preFpd.setDef(preDefList);
preFpd.setDefDays(preDefDaysList);
fpdList.add(preFpd);
}
} catch (SQLException e) {
e.printStackTrace();
}
return fpdList;
}
附上转成行后的表批量插入(关系型):
public static void insertValue(Connection conn, List<Fpd> fpdList) {
String sql = "insert into def_analysis ("
+ "mobile_no, name, amount, terms, appl_dt, "
+ "AGR_1, AGR_2, AGR_3, AGR_4, AGR_5, AGR_6, "
+ "AGR_7, AGR_8, AGR_9, AGR_10, AGR_11, AGR_12, "
+ "DEF_1, DEF_2, DEF_3, DEF_4, DEF_5, DEF_6, "
+ "DEF_7, DEF_8, DEF_9, DEF_10, DEF_11, DEF_12, "
+ "DEF_DAYS_1, DEF_DAYS_2, DEF_DAYS_3, DEF_DAYS_4, DEF_DAYS_5, DEF_DAYS_6, "
+ "DEF_DAYS_7, DEF_DAYS_8, DEF_DAYS_9, DEF_DAYS_10, DEF_DAYS_11, DEF_DAYS_12"
+ ") values(?, ?, ?, ?, ?, "
+ "?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, "
+ "?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, "
+ "?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? "
+ ")";
try {
PreparedStatement ps = conn.prepareStatement(sql);
conn.setAutoCommit(false);
for (Fpd fpd:fpdList) {
ps.setString(1,fpd.getMobileNo());
ps.setString(2,fpd.getName());
ps.setInt(3,fpd.getAmount());
ps.setInt(4,fpd.getTerms());
ps.setString(5,fpd.getApplDt());
Iterator<Integer> agrItr = fpd.getAgr().iterator();
for(int i = 6; i<18; i++) {
if (agrItr.hasNext())
ps.setInt(i,agrItr.next());
else
ps.setInt(i,0);
}
Iterator<Integer> defItr = fpd.getDef().iterator();
for(int i = 18; i<30; i++) {
if (defItr.hasNext())
ps.setInt(i,defItr.next());
else
ps.setInt(i,0);
}
Iterator<Integer> defDaysItr = fpd.getDefDays().iterator();
for(int i = 30; i<42; i++) {
if (defDaysItr.hasNext())
ps.setInt(i,defDaysItr.next());
else
ps.setInt(i,0);
}
ps.addBatch();
}
ps.executeBatch();
conn.commit();
conn.setAutoCommit(true);
} catch (SQLException e) {
e.printStackTrace();
}
}
注意:
previous 对象要用值传递而不能地址传递,DeepCopy可参考:http://blog.csdn.net/textboy/article/details/49075263