一、idea创建Maven管理的JavaWeb项目

二、创建一个用来封装影片数据的JavaBean对象

public class FilmInfo {
    private String name;
    private String url;
    private String poster;
    private boolean isPaid;
    private String year;
    private String score;
    private String star;
    public FilmInfo(){}

    public FilmInfo(String name, String url, String poster, boolean isPaid, String year, String score, String star) {
        this.name = name;
        this.url = url;
        this.poster = poster;
        this.isPaid = isPaid;
        this.year = year;
        this.score = score;
        this.star = star;
    }

    public String getName() {
        return name;
    }

    public String getUrl() {
        return url;
    }

    public String getPoster() {
        return poster;
    }

    public boolean isPaid() {
        return isPaid;
    }

    public String getYear() {
        return year;
    }

    public String getScore() {
        return score;
    }

    public String getStar() {
        return star;
    }

    public void setName(String name) {
        this.name = name;
    }

    public void setUrl(String url) {
        this.url = url;
    }

    public void setPoster(String poster) {
        this.poster = poster;
    }

    public void setPaid(boolean paid) {
        isPaid = paid;
    }

    public void setYear(String year) {
        this.year = year;
    }

    public void setScore(String score) {
        this.score = score;
    }

    public void setStar(String star) {
        this.star = star;
    }

    @Override
    public String toString() {
        final StringBuffer sb = new StringBuffer("FilmInfo{");
        sb.append("name='").append(name).append('\'');
        sb.append(", url='").append(url).append('\'');
        sb.append(", poster='").append(poster).append('\'');
        sb.append(", isPaid=").append(isPaid);
        sb.append(", year=").append(year);
        sb.append(", score=").append(score);
        sb.append(", star='").append(star).append('\'');
        sb.append('}');
        return sb.toString();
    }
}

三、使用了 GsonHttpUtils 的依赖

1. Gson的maven依赖

<dependency>
    <groupId>com.google.code.gson</groupId>
    <artifactId>gson</artifactId>
    <version>2.8.5</version>
</dependency>

2. HttpUtils是我手写的Http工具类,里面主要包含3个方法,get请求,post请求,还有采集数据的getData方法

public class HttpUtils {
    public static String getHtmlCode(String url) {
        HttpURLConnection connection = null;
        InputStream is = null;
        BufferedReader br = null;
        StringBuilder sb = new StringBuilder();
        try {
            connection = (HttpURLConnection) new URL(url).openConnection();
            is = connection.getInputStream();
            br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
            String line;
            while ((line = br.readLine()) != null) {
                sb.append(line);
                sb.append("\n");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return sb.toString();
    }
    public static String Get(String url) {
        HttpURLConnection conn = null;
        InputStream is = null;
        BufferedReader br = null;
        String line = null;
        StringBuilder sb = new StringBuilder();
        try {
            //获取URL连接对象强转为HTTPURL连接对象
            conn = (HttpURLConnection) new URL(url).openConnection();
            //设置HTTP请求方式为GET
            conn.setRequestMethod("GET");
            //设置连接超时时间为3秒
            conn.setConnectTimeout(3000);
            //设置读取时间超时为5秒
            conn.setReadTimeout(5000);
            conn.setRequestProperty("Content-Type", "text/html;charset=utf-8");
            conn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml,application/json;");
            conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36");
            //发送请求
            conn.connect();
            if (conn.getResponseCode() == 200) {
                is = conn.getInputStream();
                br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
                while ((line = br.readLine()) != null) {
                    sb.append(line + "\n");
                }
            } else {
                return "ResponseCode is Error:" + conn.getResponseCode();
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            if (br != null) {
                try {
                    br.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return sb.toString();
    }
    public static String Post(String url, String requestContent) {
        System.out.println("请求的URL:"+url);
        System.out.println("请求的参数:"+requestContent);
        HttpURLConnection conn = null;
        OutputStream os = null;
        BufferedWriter bw = null;
        InputStream is = null;
        String line = null;
        StringBuilder sb = new StringBuilder();
        BufferedReader br = null;
        try {
            conn = (HttpURLConnection) new URL(url).openConnection();
            conn.setRequestProperty("Connection","keep-alive");
            conn.setRequestMethod("POST");
            conn.setDoInput(true);
            conn.setDoOutput(true);
            conn.setConnectTimeout(3000);
            conn.setReadTimeout(15000);
            conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded;charset=utf-8");
            conn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml,application/json,application/x-www-form-urlencoded");
            conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36");
            conn.getOutputStream().write(requestContent.getBytes("UTF-8"));
            conn.getOutputStream().flush();
            if (conn.getResponseCode()==200){
                is = conn.getInputStream();
                br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
                while((line=br.readLine())!=null){
                    sb.append(line+"\n");
                }
            }else{
                return "ResponseCode is Error:" + conn.getResponseCode();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            if(br!=null){
                try {
                    br.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if(bw!=null){
                try {
                    bw.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return sb.toString();
    }
    public static String getData(String url){
        String sourceCode = Get(url);
        String regex = "<li class=\"item\">\\s*<a class=\"js-tongjic\" href=\"(.*?)\" monitor-shortpv-c=\"(.*?)\">\\s*<div class=\"cover g-playicon\">\\s*<img src=\"(.*?)\">\\s*(<span class=\"pay\">(.*?)</span>)?\\s*<div class=\"mask-wrap\">\\s*<span class=\"hint\">(.*?)</span>\\s*(<span class=\"point\">(.*?)</span>)?\\s*</div>\\s*</div>\\s*<div class=\"detail\">\\s*<p class=\"title g-clear\">\\s*<span class=\"s1\">(.*?)</span>\\s*</p>\\s*<p class=\"star\">(.*?)</p>\\s*</div>\\s*</a>\\s*</li>";
        Pattern p = Pattern.compile(regex);
        Matcher m = p.matcher(sourceCode);
        // System.out.println(m.groupCount());
        Gson gson = new GsonBuilder().setPrettyPrinting().create();
        ArrayList<FilmInfo> list = new ArrayList<FilmInfo>();
        while(m.find()){
            FilmInfo fi = new FilmInfo(m.group(9),"http://www.360kan.com/"+m.group(1),m.group(3),"付费".equals(m.group(5)),m.group(6),m.group(8),m.group(10));
            list.add(fi);
        }
        return gson.toJson(list);
    }
}

Java实现原生的get和post请求,我在之前的博客中发过了,不懂的朋友可以回看我之前的博客,我就不做解释了,这里解释一下getData方法,这个方法主要是使用get发送请求获取指定页面源码,再通过正则表达式拿到我们所需要的数据,使用gson封装为Json数据返回,没有什么技术含量
注意:为了取到详细信息,此处正则表达式比较长,我建议使用配置文件来读取这种较长的正则表达式,避免代码中语句过长,问题排查难度大的问题,我这里是做记录写博客,所以直接放到变量里

四、随便写个前端页面

1. JSP部分

<%@ page import="com.zhiyuan.utils.HttpUtils" %>
<%@ page import="com.google.gson.JsonArray" %>
<%@ page import="com.google.gson.JsonParser" %>
<%@ page import="com.zhiyuan.bean.FilmInfo" %>
<%@ page import="java.util.ArrayList" %>
<%@ page import="com.google.gson.Gson" %>
<%@ page import="com.google.gson.JsonElement" %>
<%@ page contentType="text/html;charset=UTF-8" language="java" pageEncoding="UTF-8" %>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>360Video</title>
    <link rel="icon" href="https://www.360kan.com/favicon.ico">
    <link rel="stylesheet" href="./css/style.css?v=<%=Math.random()%>">
    <script src="./js/jquery-3.1.1.js"></script>
</head>
<body>
<div class="header">
    <div class="nav">360影视采集</div>
</div>
<div class="container">
    <ul class="list">
    <%
        String data = HttpUtils.getData("https://www.360kan.com/dianying/list.php?rank=rankhot&cat=all&area=all&act=all&year=all&pageno=2&from=dianying_list");
        JsonArray jsonElements = new JsonParser().parse(data).getAsJsonArray();
        ArrayList<FilmInfo> filmInfos = new ArrayList<>();
        Gson gson = new Gson();
        for (JsonElement filmInfo : jsonElements) {
            filmInfos.add(gson.fromJson(filmInfo,FilmInfo.class));
        }
        for (FilmInfo filmInfo : filmInfos) {
    %>
        <li>
            <div class="poster">
                <img src="<%=filmInfo.getPoster()%>">
                <div class="paid"><%=filmInfo.isPaid()?"付费":"免费"%></div>
                <div class="info">
                    <div class="year"><%=filmInfo.getYear()%></div>
                    <div class="score"><%=filmInfo.getScore()!=null?filmInfo.getScore():""%></div>
                </div>
            </div>
            <div class="detail">
                <div class="name"><%=filmInfo.getName()%></div>
                <div class="star"><%=filmInfo.getStar().length()>12?filmInfo.getStar().substring(0,12)+"..." : filmInfo.getStar()%></div>
            </div>
        </li>
    <%
        }
    %>
    </ul>
</div>
</body>
</html>

2. css部分

*{
    margin: 0;
    padding: 0;
    box-sizing: border-box;
}
body,html{
    background-color:#f2f2f2;
}
a{
    text-decoration:none;
}
li{
    list-style:none;
}
.header{
    width: 100%;
    height: 50px;
    line-height: 50px;
    text-align: center;
    color:#228b22;
    font-weight: bold;
    background-color:#fff;
    box-shadow:rgba(0, 0, 0, 0.2) 1px 1px 8px 1px;
}
.container{
    width:1190px;
    background-color:#fff;
    margin:20px auto;
}
.list > li{
    display: flex;
    flex-direction:column;
    align-items:center;
    width: 155px;
    height: 250px;
    margin-bottom:15px;
}
.list{
    display:flex;
    flex-direction: row;
    flex-wrap: wrap;
    justify-content:space-between;
}
li > .poster{
    width: 155px;
    height: 212px;
    border-radius:5px;
    overflow: hidden;
    position:relative;
}
.poster > img{
    width: 100%;
    height: 100%;
}
.poster .info{
    position:absolute;
    width:155px;
    height: 38px;
    line-height: 38px;
    font-size: 12px;
    color:#000;
    bottom:0;
    background-color:rgba(0,0,0,.12);
    display: flex;
    flex-direction: row;
    justify-content:space-between;
    padding:0 10px;
}
.poster .paid{
    position: absolute;
    left:5px;
    top:5px;
    width:40px;
    height: 20px;
    line-height: 20px;
    font-size: 12px;
    color:#fff;
    background-color:goldenrod;
    text-align:center;
    border-radius:5px;
}
.info .score{
    color:goldenrod;
}
.info .year{
    color:#fff;
}
li > .detail{
    width: 155px;
    height: 38px;
    display: flex;
    flex-direction: column;
    justify-content: center;
    align-items:center;
    cursor:pointer;
}
.detail > .name{
    width:155px;
    height: 14px;
    line-height: 14px;
    font-size: 14px;
    color:#000;
    padding-left:5px;
}
.detail > .star{
    width: 155px;
    height: 12px;
    line-height: 12px;
    font-size: 12px;
    color:#666666;
    margin-top:5px;
    padding-left:5px;
}

五、运行起来看一下效果

这里我只是采集了1页数据,当然采集其他的数据不过是重复这个操作,没有什么技术含量,就不说了

原文作者:絷缘
作者邮箱:zhiyuanworkemail@163.com
原文地址:https://zhiyuandnc.github.io/360Video/
版权声明:本文为博主原创文章,转载请注明原文链接作者信息