日期格式在Java中创建.CSV文件时受到干扰 [英] Date Format getting disturb when creating .CSV file in Java
问题描述
我正在创建一个网页抓取工具,然后将数据存储在.CSV文件中。
我的程序运行正常,但有一个问题,我从其中检索数据的网站有一个日期(月日,年)
格式。所以当我将数据保存在.CSV文件中时,它将把Year作为另一个列,因为所有的数据都被处理。我实际上想将该数据存储到(MM-MON-YYYY)
中,并将有效期日期存储在一列中。我在下面发布我的代码。请帮助我。谢谢!
PS:对不能在原始帖子中填写我想要的格式很抱歉。
package com.mufapscraping;
// import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
// import java.util.Collections;
import java.util.Iterator;
// import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class ComMufapScraping {
boolean writeCSVToConsole = true;
boolean writeCSVToFile = true;
// String destinationCSVFile =C:\\convertedCSV.csv;
boolean sortTheList = true;
boolean writeToConsole;
boolean writeToFile;
public static Document doc = null;
public static Elements tbodyElements = null;
public static Elements elements = null;
public static Elements tdElements = null;
public static Elements trElement2 = null;
public static String Dcomma =,2;
public static ArrayList< Elements> sampleList = new ArrayList< Elements>();
public static void createConnection()throws IOException {
System.setProperty(http.proxyHost,191.1.1.123);
System.setProperty(http.proxyPort,8080);
String tempUrl =http://www.mufap.com.pk/nav_returns_performance.php?tab=01;
doc = Jsoup.connect(tempUrl).get();
}
public static void parsingHTML()throws Exception {
for(int i = 1; i <= 1; i ++){
tbodyElements = doc.getElementsByTag(tbody);
// Element table = doc.getElementById(dataTable);
if(tbodyElements.isEmpty()){
throw new Exception(Table is not found);
}
elements = tbodyElements.get(0).getElementsByTag(tr);
for(Element trElement:elements){
trElement2 = trElement.getElementsByTag(tr);
tdElements = trElement.getElementsByTag(td);
FileWriter sb = new FileWriter(C:\\convertedCSV2.csv,true);
for(Iterator< Element> it = tdElements.iterator(); it.hasNext();){
if(it.hasNext()){
sb.append n);
}
(Iterator< Element> it2 = trElement2.iterator(); it.hasNext();){
元素tdElement = it.next
sb.append(tdElement.text());
if(it2.hasNext()){
sb.append(,);
}
}
System.out.println(sb.toString());
sb.flush();
sb.close();
}
System.out.println(sampleList.add(tdElements));
/ * for(Elements elements2:zakazky){
System.out.println(elements2);
} * /
}
}
}
public static void main(String [] args)throws IOException,Exception {
createConnection();
parsingHTML();
}
}
而不是直接在 FileWriter
因此,请替换以下行:
sb.append(tdElement.text());
到
sb.append(formatData(tdElement.text()));
static final SimpleDateFormat FORMATTER_MMM_d_yyyy = new SimpleDateFormat(MMM d,yyyy,Locale.US);
private static final SimpleDateFormat FORMATTER_dd_MMM_yyyy = new SimpleDateFormat(dd-MMM-YYYY,Locale.US);
public static String formatData(String text){
String tmp = null;
try {
Date d = FORMATTER_MMM_d_yyyy.parse(text);
tmp = FORMATTER_dd_MMM_yyyy.format(d);
} catch(ParseException pe){
tmp = text;
}
return tmp;
}
SAMPLE
public static void main(String [] args){
String [] fields = new String [] {//
ABL Cash Fund,//
AA(f),//
2016年4月18日,//
10.4729//
}
for(String field:fields){
System.out.format(%s\\\
%s\\\
\\\
,field,formatData(field));
}
}
OUTPUT
ABL现金基金
ABL现金基金
AA(f)
AA(f)
Apr 18,2016
2016年4月18日
10.4729
10.4729
I am creating a web scraper and then store the data in the .CSV file. My program is running fine but, there is a problem that the website from where I am retrieving data have a date which is in
(Month Day, Year)
format. So when I save the data in .CSV file it will consider the Year as another column due to which all the data gets manipulated. I actually want to store that data into(MM-MON-YYYY)
and store Validity date in one column. I am posting my code below. Kindly, help me out. Thanks!P.S: I am sorry for not writing the format I want in the original post.
package com.mufapscraping; //import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; //import java.util.Collections; import java.util.Iterator; //import java.util.List; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class ComMufapScraping { boolean writeCSVToConsole = true; boolean writeCSVToFile = true; //String destinationCSVFile = "C:\\convertedCSV.csv"; boolean sortTheList = true; boolean writeToConsole; boolean writeToFile; public static Document doc = null; public static Elements tbodyElements = null; public static Elements elements = null; public static Elements tdElements = null; public static Elements trElement2 = null; public static String Dcomma = ", 2"; public static ArrayList<Elements> sampleList = new ArrayList<Elements>(); public static void createConnection() throws IOException { System.setProperty("http.proxyHost", "191.1.1.123"); System.setProperty("http.proxyPort", "8080"); String tempUrl = "http://www.mufap.com.pk/nav_returns_performance.php?tab=01"; doc = Jsoup.connect(tempUrl).get(); } public static void parsingHTML() throws Exception { for (int i = 1; i <= 1; i++) { tbodyElements = doc.getElementsByTag("tbody"); //Element table = doc.getElementById("dataTable"); if (tbodyElements.isEmpty()) { throw new Exception("Table is not found"); } elements = tbodyElements.get(0).getElementsByTag("tr"); for (Element trElement : elements) { trElement2 = trElement.getElementsByTag("tr"); tdElements = trElement.getElementsByTag("td"); FileWriter sb = new FileWriter("C:\\convertedCSV2.csv", true); for (Iterator<Element> it = tdElements.iterator(); it.hasNext();) { if (it.hasNext()) { sb.append(" \n "); } for (Iterator<Element> it2 = trElement2.iterator(); it.hasNext();) { Element tdElement = it.next(); sb.append(tdElement.text()); if (it2.hasNext()) { sb.append(" , "); } } System.out.println(sb.toString()); sb.flush(); sb.close(); } System.out.println(sampleList.add(tdElements)); /* for (Elements elements2 : zakazky) { System.out.println(elements2); }*/ } } } public static void main(String[] args) throws IOException, Exception { createConnection(); parsingHTML(); } }
解决方案Instead of appeding directly the element text in the
FileWriter
, format it first then append it.So, replace the following line:
sb.append(tdElement.text());
into
sb.append(formatData(tdElement.text()));
private static final SimpleDateFormat FORMATTER_MMM_d_yyyy = new SimpleDateFormat("MMM d, yyyy", Locale.US); private static final SimpleDateFormat FORMATTER_dd_MMM_yyyy = new SimpleDateFormat("dd-MMM-YYYY", Locale.US); public static String formatData(String text) { String tmp = null; try { Date d = FORMATTER_MMM_d_yyyy.parse(text); tmp = FORMATTER_dd_MMM_yyyy.format(d); } catch (ParseException pe) { tmp = text; } return tmp; }
SAMPLE
public static void main(String[] args) { String[] fields = new String[] { // "ABL Cash Fund", // "AA(f)", // "Apr 18, 2016", // "10.4729" // }; for (String field : fields) { System.out.format("%s\n%s\n\n", field, formatData(field)); } }
OUTPUT
ABL Cash Fund ABL Cash Fund AA(f) AA(f) Apr 18, 2016 18-Apr-2016 10.4729 10.4729
这篇关于日期格式在Java中创建.CSV文件时受到干扰的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!