level 8
c4dr01d
楼主
1.获取网站服务器所有DNS相同的域名爬虫
源码如下:
import java.io.*;
import java.net.*;
import java.util.regex.*;
import java.util.*;
public class Worm
{
static String getUrl()
{
Scanner input = new Scanner(System.in);
String url;
System.out.print("URL:");
url = input.next();
return url;
}
public static void main(String args[])
{
URL url = null;
URLConnection urlconn = null;
BufferedReader br = null;
PrintWriter pw = null;
String regex = "http://[\\w+\\.?/?]+\\.[A-Za-z]+";
Pattern p = Pattern.compile(regex);
System.out.println("Start catch website at same DNS server");
try
{
url = new URL(getUrl());
urlconn = url.openConnection();
pw = new PrintWriter(new FileWriter("/storage/emulated/0/AppProjects/package.txt"),true);
br = new BufferedReader(new InputStreamReader(urlconn.getInputStream()));
String buf = null;
while ((buf = br.readLine()) != null)
{
Matcher buf_m = p.matcher(buf);
while (buf_m.find())
{
pw.println(buf_m.group());
}
}
System.out.println("Get Successful");
}
catch(MalformedURLException e)
{
e.printStackTrace();
}
catch(IOException e)
{
e.printStackTrace();
}
finally
{
try
{
br.close();
}
catch(IOException e)
{
e.printStackTrace();
}
pw.close();
}
}
}
2.获取网页源码
源码如下:
import java.io.*;
import java.net.*;
import java.util.*;
public class Sources
{
static String getUrl()
{
Scanner input = new Scanner(System.in);
String url;
System.out.print("URL:");
url = input.next();
return url;
}
public static void main(String[] args) throws Exception
{
URL url;
int responsecode;
HttpURLConnection urlConnection;
BufferedReader reader;
String line;
try
{
url = new URL(getUrl());
urlConnection = (HttpURLConnection) url.openConnection();
responsecode = urlConnection.getResponseCode();
if (responsecode == 200)
{
reader = new BufferedReader(new InputStreamReader(urlConnection.getInputStream(),"UTF-8"));
while ((line = reader.readLine()) != null)
{
System.out.println(line);
}
}
else
{
System.out.println("Cannot get website source,The server return code " + responsecode);
}
}
catch (Exception e)
{
System.out.println("Cannot get website source,Server throw a exception" + e);
}
}
}
上述两个源码,经本人实验,
aide下编译通过
jdk,jre编译通过
eclipse编译通过
2017年03月10日 08点03分
1
源码如下:
import java.io.*;
import java.net.*;
import java.util.regex.*;
import java.util.*;
public class Worm
{
static String getUrl()
{
Scanner input = new Scanner(System.in);
String url;
System.out.print("URL:");
url = input.next();
return url;
}
public static void main(String args[])
{
URL url = null;
URLConnection urlconn = null;
BufferedReader br = null;
PrintWriter pw = null;
String regex = "http://[\\w+\\.?/?]+\\.[A-Za-z]+";
Pattern p = Pattern.compile(regex);
System.out.println("Start catch website at same DNS server");
try
{
url = new URL(getUrl());
urlconn = url.openConnection();
pw = new PrintWriter(new FileWriter("/storage/emulated/0/AppProjects/package.txt"),true);
br = new BufferedReader(new InputStreamReader(urlconn.getInputStream()));
String buf = null;
while ((buf = br.readLine()) != null)
{
Matcher buf_m = p.matcher(buf);
while (buf_m.find())
{
pw.println(buf_m.group());
}
}
System.out.println("Get Successful");
}
catch(MalformedURLException e)
{
e.printStackTrace();
}
catch(IOException e)
{
e.printStackTrace();
}
finally
{
try
{
br.close();
}
catch(IOException e)
{
e.printStackTrace();
}
pw.close();
}
}
}
2.获取网页源码
源码如下:
import java.io.*;
import java.net.*;
import java.util.*;
public class Sources
{
static String getUrl()
{
Scanner input = new Scanner(System.in);
String url;
System.out.print("URL:");
url = input.next();
return url;
}
public static void main(String[] args) throws Exception
{
URL url;
int responsecode;
HttpURLConnection urlConnection;
BufferedReader reader;
String line;
try
{
url = new URL(getUrl());
urlConnection = (HttpURLConnection) url.openConnection();
responsecode = urlConnection.getResponseCode();
if (responsecode == 200)
{
reader = new BufferedReader(new InputStreamReader(urlConnection.getInputStream(),"UTF-8"));
while ((line = reader.readLine()) != null)
{
System.out.println(line);
}
}
else
{
System.out.println("Cannot get website source,The server return code " + responsecode);
}
}
catch (Exception e)
{
System.out.println("Cannot get website source,Server throw a exception" + e);
}
}
}
上述两个源码,经本人实验,
aide下编译通过
jdk,jre编译通过
eclipse编译通过