日期格式在Java中创建.CSV文件时受到干扰 [英] Date Format getting disturb when creating .CSV file in Java

查看:295
本文介绍了日期格式在Java中创建.CSV文件时受到干扰的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在创建一个网页抓取工具,然后将数据存储在.CSV文件中。
我的程序运行正常,但有一个问题,我从其中检索数据的网站有一个日期(月日,年)格式。所以当我将数据保存在.CSV文件中时,它将把Year作为另一个列,因为所有的数据都被处理。我实际上想将该数据存储到(MM-MON-YYYY)中,并将有效期日期存储在一列中。我在下面发布我的代码。请帮助我。谢谢!



PS:对不能在原始帖子中填写我想要的格式很抱歉。

  package com.mufapscraping; 

// import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
// import java.util.Collections;
import java.util.Iterator;
// import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class ComMufapScraping {

boolean writeCSVToConsole = true;
boolean writeCSVToFile = true;
// String destinationCSVFile =C:\\convertedCSV.csv;
boolean sortTheList = true;
boolean writeToConsole;
boolean writeToFile;
public static Document doc = null;
public static Elements tbodyElements = null;
public static Elements elements = null;
public static Elements tdElements = null;
public static Elements trElement2 = null;
public static String Dcomma =,2;
public static ArrayList< Elements> sampleList = new ArrayList< Elements>();

public static void createConnection()throws IOException {
System.setProperty(http.proxyHost,191.1.1.123);
System.setProperty(http.proxyPort,8080);
String tempUrl =http://www.mufap.com.pk/nav_returns_performance.php?tab=01;
doc = Jsoup.connect(tempUrl).get();
}

public static void parsingHTML()throws Exception {
for(int i = 1; i <= 1; i ++){

tbodyElements = doc.getElementsByTag(tbody);
// Element table = doc.getElementById(dataTable);

if(tbodyElements.isEmpty()){
throw new Exception(Table is not found);
}
elements = tbodyElements.get(0).getElementsByTag(tr);

for(Element trElement:elements){
trElement2 = trElement.getElementsByTag(tr);
tdElements = trElement.getElementsByTag(td);
FileWriter sb = new FileWriter(C:\\convertedCSV2.csv,true);
for(Iterator< Element> it = tdElements.iterator(); it.hasNext();){
if(it.hasNext()){
sb.append n);
}
(Iterator< Element> it2 = trElement2.iterator(); it.hasNext();){
元素tdElement = it.next
sb.append(tdElement.text());
if(it2.hasNext()){
sb.append(,);
}

}

System.out.println(sb.toString());
sb.flush();
sb.close();
}

System.out.println(sampleList.add(tdElements));
/ * for(Elements elements2:zakazky){
System.out.println(elements2);
} * /

}
}
}

public static void main(String [] args)throws IOException,Exception {
createConnection();
parsingHTML();

}

}

解决方案

而不是直接在 FileWriter



因此,请替换以下行:

  sb.append(tdElement.text()); 

  sb.append(formatData(tdElement.text())); 






  static final SimpleDateFormat FORMATTER_MMM_d_yyyy = new SimpleDateFormat(MMM d,yyyy,Locale.US); 
private static final SimpleDateFormat FORMATTER_dd_MMM_yyyy = new SimpleDateFormat(dd-MMM-YYYY,Locale.US);

public static String formatData(String text){
String tmp = null;

try {
Date d = FORMATTER_MMM_d_yyyy.parse(text);
tmp = FORMATTER_dd_MMM_yyyy.format(d);
} catch(ParseException pe){
tmp = text;
}

return tmp;
}



SAMPLE



  public static void main(String [] args){
String [] fields = new String [] {//
ABL Cash Fund,//
AA(f),//
2016年4月18日,//
10.4729//
}

for(String field:fields){
System.out.format(%s\\\
%s\\\
\\\
,field,formatData(field));
}
}



OUTPUT



  ABL现金基金
ABL现金基金

AA(f)
AA(f)

Apr 18,2016
2016年4月18日

10.4729
10.4729


I am creating a web scraper and then store the data in the .CSV file. My program is running fine but, there is a problem that the website from where I am retrieving data have a date which is in (Month Day, Year) format. So when I save the data in .CSV file it will consider the Year as another column due to which all the data gets manipulated. I actually want to store that data into (MM-MON-YYYY) and store Validity date in one column. I am posting my code below. Kindly, help me out. Thanks!

P.S: I am sorry for not writing the format I want in the original post.

package com.mufapscraping;

//import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
//import java.util.Collections;
import java.util.Iterator;
//import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class ComMufapScraping {

    boolean writeCSVToConsole = true;
    boolean writeCSVToFile = true;
    //String destinationCSVFile = "C:\\convertedCSV.csv";
    boolean sortTheList = true;
    boolean writeToConsole;
    boolean writeToFile;
    public static Document doc = null;
    public static Elements tbodyElements = null;
    public static Elements elements = null;
    public static Elements tdElements = null;
    public static Elements trElement2 = null;
    public static String Dcomma = ", 2";
    public static ArrayList<Elements> sampleList = new ArrayList<Elements>();

    public static void createConnection() throws IOException {
        System.setProperty("http.proxyHost", "191.1.1.123");
        System.setProperty("http.proxyPort", "8080");
        String tempUrl = "http://www.mufap.com.pk/nav_returns_performance.php?tab=01";
        doc = Jsoup.connect(tempUrl).get();
    }

    public static void parsingHTML() throws Exception {
        for (int i = 1; i <= 1; i++) {

            tbodyElements = doc.getElementsByTag("tbody");
            //Element table = doc.getElementById("dataTable");

            if (tbodyElements.isEmpty()) {
                throw new Exception("Table is not found");
            }
            elements = tbodyElements.get(0).getElementsByTag("tr");

            for (Element trElement : elements) {
                trElement2 = trElement.getElementsByTag("tr");
                tdElements = trElement.getElementsByTag("td");
                FileWriter sb = new FileWriter("C:\\convertedCSV2.csv", true);
                for (Iterator<Element> it = tdElements.iterator(); it.hasNext();) {
                    if (it.hasNext()) {
                        sb.append("  \n  ");
                    }
                    for (Iterator<Element> it2 = trElement2.iterator(); it.hasNext();) {
                        Element tdElement = it.next();
                        sb.append(tdElement.text());
                        if (it2.hasNext()) {
                            sb.append("   ,   ");
                        }

                    }

                    System.out.println(sb.toString());
                    sb.flush();
                    sb.close();
                }

                System.out.println(sampleList.add(tdElements));
                /* for (Elements elements2 : zakazky) {
                System.out.println(elements2);
            }*/

            }
        }
    }

    public static void main(String[] args) throws IOException, Exception {
        createConnection();
        parsingHTML();

    }

}

解决方案

Instead of appeding directly the element text in the FileWriter, format it first then append it.

So, replace the following line:

sb.append(tdElement.text());

into

sb.append(formatData(tdElement.text()));


private static final SimpleDateFormat FORMATTER_MMM_d_yyyy = new SimpleDateFormat("MMM d, yyyy", Locale.US);
private static final SimpleDateFormat FORMATTER_dd_MMM_yyyy = new SimpleDateFormat("dd-MMM-YYYY", Locale.US);

public static String formatData(String text) {
    String tmp = null;

    try {
        Date d = FORMATTER_MMM_d_yyyy.parse(text);
        tmp = FORMATTER_dd_MMM_yyyy.format(d);
    } catch (ParseException pe) {
        tmp = text;
    }

    return tmp;
}

SAMPLE

public static void main(String[] args) {
    String[] fields = new String[] { //
            "ABL Cash Fund", //
            "AA(f)", //
            "Apr 18, 2016", //
            "10.4729" //
    };

    for (String field : fields) {
        System.out.format("%s\n%s\n\n", field, formatData(field));
    }
}

OUTPUT

ABL Cash Fund
ABL Cash Fund

AA(f)
AA(f)

Apr 18, 2016
18-Apr-2016

10.4729
10.4729

这篇关于日期格式在Java中创建.CSV文件时受到干扰的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆