使用jsoup获取Captcha图像 [英] Getting Captcha image using jsoup

查看:164
本文介绍了使用jsoup获取Captcha图像的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在尝试通过从我的学术URL获取验证码来开发一个基于java-GUI的小应用程序,询问用户的用户名,密码和验证码,在登录后显示内容。但是我被困在登录页面本身作为提交表格后,网上的回复是

  alert('请输入正确的代码。'); window.history.go(-1); 

代码

 公共地图饼干; 
public void downloadCaptcha()throws Exception {
Connection.Response response = Jsoup.connect(https://academics.ddn.upes.ac.in/upes/)
.timeout (300000)
.userAgent(Mozilla / 5.0)
.method(Connection.Method.GET).execute();
cookies = response.cookies();
Connection.Response resultImageResponse = Jsoup.connect(https://academics.ddn.upes.ac.in/upes/modules/create_image.php)
.cookies(cookies)
.ignoreContentType(true)
.method(Connection.Method.GET).timeout(30000).execute();
FileOutputStream out =(new FileOutputStream(new java.io.File(F:\\\\ abc.jpg)));
out.write(resultImageResponse.bodyAsBytes());
out.close();
System.out.println(Captcha Fetched);

}



下载后Captcha

  public static void getData(String captacha)throws Exception {
Connection.Response response = Jsoup .connect(https://academics.ddn.upes.ac.in/upes/index.php)
.userAgent(Mozilla / 5.0)
.cookies(cookies)
.data(用户名,用户名)
.data(passwd,密码)
.data(txtCaptcha,captacha)
.data(提交,登录 )
.data(选项,登录)
.data(op2,login)
.data(lang,english)
。数据( 回归, https://academics.ddn.upes.ac.in/upes/index.php?option=com_content&task=view&id=53&Itemid=6420 )
.data(message,0)
.data(j1643f05a0c7fc7910424fb3fc4fbbb6f,1)
.timeout(0)
.method(Connection.Method) .POST)
.execute();
cookies = response.cookies();
System.out.println(response.cookies());
Document doc = response.parse();
FileWriter fr = new FileWriter(F:\\ responsese.html);
PrintWriter pw = new PrintWriter(fr);
pw.println(doc.toString());
pw.close();
fr.close();
}

resonse.cookies()给出输出 {PHPSESSID = ai0r017bmb55gv0m4ikeu6jfc6,61c78a27855d239ae8682ff6befaa989 = 5ae2e5baf548bc293c943d3416e7d400}



网站 https://academics.ddn.upes.ac.in/upes/index.php



请指出我的错误。

解决方案

您需要进行两项更改代码工作:



1 - 您需要获取第二次调用返回的cookie(图像下载)并将其添加到以前的cookie中。



2 - 如果您看到字段j1643f05a0c7fc7910424fb3fc4fbbb6f非常可疑,实际上该字段是可变的,您需要在表单中选择隐藏的输入并使用它。



3(额外) - 事实并非如此,但是如果你不发送一些标题,如接受,接受编码,接受语言等,一些服务器会抱怨...... / p>

当我使用您的代码进行这些更改时,我得到:

 < ; script> alert('用户名或密码不正确。请再试一次。'); window.history.go(-1);< /脚本> 

当然我没有用户/通行证,我想你会得到想要的页面。



包含必要更改的代码是:

 公共类SO_28619161 {


公共地图饼干;
private String username =u;
private String password =p;

public HashMap< String,String> downloadCaptcha()throws Exception {
Connection.Response response = Jsoup.connect(https://academics.ddn.upes.ac.in/upes/)
.timeout(300000)
.userAgent(Mozilla / 5.0)
.method(Connection.Method.GET).execute();

//很好
cookies = response.cookies();

//现在我们将加载表单的输入
Document doc = response.parse();
元素字段= doc.select(表单输入);
HashMap< String,String> formFields = new HashMap< String,String>();
for(元素字段:字段){
formFields.put(field.attr(name),field.attr(value));
}

Connection.Response resultImageResponse = Jsoup.connect(https://academics.ddn.upes.ac.in/upes/modules/create_image.php)
.cookies(cookies)
.ignoreContentType(true)
.method(Connection.Method.GET).timeout(30000).execute();

//我们也需要这些饼干!
cookies.putAll(resultImageResponse.cookies());

FileOutputStream out =(new FileOutputStream(new java.io.File(abc.jpg)));
out.write(resultImageResponse.bodyAsBytes());
out.close();

System.out.println(Captcha Fetched);

返回formFields;
}

public void getData(HashMap< String,String> formFields)抛出异常{
连接conn = Jsoup.connect(https://academics.ddn.upes。 ac.in/upes/index.php)
.userAgent(Mozilla / 5.0(Windows NT 6.1; WOW64; rv:35.0)Gecko / 20100101 Firefox / 35.0)
//不是必要的但是不需要这些额外的标题不会损害
.header(接受,text / html,application / xhtml + xml,application / xml; q = 0.9,* / *; q = 0.8)
.header(Accept-Encoding,gzip,deflate)
.header(Accept-Language,es-ES,es; q = 0.8,en-US; q = 0.5,en; q = 0.3)
.header(host,academics.ddn.upes.ac.in)
.header(Referer,https://academics.ddn.upes.ac .in / upes / index.php)
.cookies(cookies)
.timeout(0)
.method(Connection.Method.POST);

//我们发送字段
conn.data(formFields);

响应响应= conn.execute();
cookies = response.cookies();
System.out.println(response.cookies());
Document doc = response.parse();
FileWriter fr = new FileWriter(response.html);
PrintWriter pw = new PrintWriter(fr);
pw.println(doc.toString());
System.out.println(doc.toString());
pw.close();
fr.close();
}

private void run()抛出异常,IOException {
HashMap< String,String> formFields = downloadCaptcha();

BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
String captcha = br.readLine();

//我们设置user / pass和captcha
formFields.put(username,username);
formFields.put(passwd,密码);
formFields.put(txtCaptcha,captcha);

getData(formFields);
}

public static void main(String [] args)抛出异常{
SO_28619161 main = new SO_28619161();
main.run();
}

}


I am trying to develop a small java-GUI based application by fetching captcha from my academics URL, asking user for his username, password and captcha, display the content after log in. However I am stuck at log in page itself as after submitting the form the response from web is

alert('Please enter correct code.'); window.history.go(-1);

Code

public Map cookies;
public void downloadCaptcha()throws Exception {
Connection.Response response = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/")
.timeout(300000)
.userAgent("Mozilla/5.0")
.method(Connection.Method.GET).execute();
cookies = response.cookies();
Connection.Response resultImageResponse = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/modules/create_image.php")
.cookies(cookies)
.ignoreContentType(true)
.method(Connection.Method.GET).timeout(30000).execute();
FileOutputStream out = (new FileOutputStream(new java.io.File("F:\\abc.jpg")));
out.write(resultImageResponse.bodyAsBytes()); 
out.close();
System.out.println("Captcha Fetched");

}

After downloading Captcha

public static void getData(String captacha)throws Exception{
Connection.Response response = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/index.php")
.userAgent("Mozilla/5.0")
.cookies(cookies)
.data("username",username)
.data("passwd",password)
.data("txtCaptcha",captacha)
.data("submit","Login")
.data("option","login")
.data("op2","login")
.data("lang","english")
.data("return","https://academics.ddn.upes.ac.in/upes/index.php?option=com_content&amp;task=view&amp;id=53&amp;Itemid=6420")
.data("message","0")
.data("j1643f05a0c7fc7910424fb3fc4fbbb6f","1")
.timeout(0)
.method(Connection.Method.POST)
.execute();
cookies = response.cookies();
System.out.println(response.cookies());
Document doc= response.parse();
FileWriter fr = new FileWriter("F:\\response.html");
PrintWriter pw= new PrintWriter(fr);
pw.println(doc.toString());
pw.close();
fr.close();
}

resonse.cookies() gives output {PHPSESSID=ai0r017bmb55gv0m4ikeu6jfc6, 61c78a27855d239ae8682ff6befaa989=5ae2e5baf548bc293c943d3416e7d400}

The website is https://academics.ddn.upes.ac.in/upes/index.php

Please point out my mistakes.

解决方案

You need two changes for your code to work:

1 - You need to pick up the cookie returned by the second call (the download of the image) and add it to previous cookie.

2 - If you see the field "j1643f05a0c7fc7910424fb3fc4fbbb6f" is very suspicious, in fact that field is variable, you will need to pick the hidden input in the form and use it.

3 (extra) - It is not the case but some servers complain if you don't send some headers, like Accept, Accept-Encoding, Accept-Language ...

When I use your code with those changes I get :

<script>alert('Incorrect username or password. Please try again.'); window.history.go(-1);</script> 

Of course I don't have a user/pass, I think you'll get the desired page.

The code with the neccesary changes is:

public class SO_28619161 {


    public Map cookies;
    private String username = "u";
    private String password = "p";

    public HashMap<String,String> downloadCaptcha()throws Exception {
        Connection.Response response = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/")
                .timeout(300000)
                .userAgent("Mozilla/5.0")
                .method(Connection.Method.GET).execute();

        //nice
        cookies = response.cookies();

        //now we will load form's inputs 
        Document doc = response.parse();
        Elements fields = doc.select("form input");
        HashMap<String,String> formFields = new HashMap<String, String>();
        for (Element field : fields ){
            formFields.put(field.attr("name"), field.attr("value"));
        }

        Connection.Response resultImageResponse = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/modules/create_image.php")
                .cookies(cookies)
                .ignoreContentType(true)
                .method(Connection.Method.GET).timeout(30000).execute();

        //we will need these cookies also!
        cookies.putAll(resultImageResponse.cookies());

        FileOutputStream out = (new FileOutputStream(new java.io.File("abc.jpg")));
        out.write(resultImageResponse.bodyAsBytes()); 
        out.close();

        System.out.println("Captcha Fetched");

        return formFields;
    }

    public void getData(HashMap<String, String> formFields) throws Exception{
        Connection conn = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/index.php")
                .userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:35.0) Gecko/20100101 Firefox/35.0")
                //not neccesary but these extra headers won't hurt
                .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
                .header("Accept-Encoding", "gzip, deflate")
                .header("Accept-Language", "es-ES,es;q=0.8,en-US;q=0.5,en;q=0.3")
                .header("Host", "academics.ddn.upes.ac.in")
                .header("Referer", "https://academics.ddn.upes.ac.in/upes/index.php")
                .cookies(cookies)
                .timeout(0)
                .method(Connection.Method.POST);

        //we send the fields
        conn.data(formFields);

        Response response = conn.execute();
        cookies = response.cookies();
        System.out.println(response.cookies());
        Document doc= response.parse();
        FileWriter fr = new FileWriter("response.html");
        PrintWriter pw= new PrintWriter(fr);
        pw.println(doc.toString());
        System.out.println(doc.toString());
        pw.close();
        fr.close();
    }

    private void run() throws Exception, IOException {
        HashMap<String, String> formFields = downloadCaptcha();

        BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
        String captcha = br.readLine();

        //we set user/pass and captcha
        formFields.put("username", username);
        formFields.put("passwd", password);
        formFields.put("txtCaptcha", captcha);

        getData(formFields);
    }

    public static void main(String[] args) throws Exception {
        SO_28619161 main = new SO_28619161();
        main.run();
    }

}

这篇关于使用jsoup获取Captcha图像的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆