书签导入功能

This commit is contained in:
WangHao
2020-08-27 21:51:33 +08:00
parent 5e67a19e04
commit 96672852ae
9 changed files with 1407 additions and 676 deletions

View File

@ -125,6 +125,18 @@
<version>2.1.5</version>
</dependency>
<!--jsoup解析html-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
<!--常用工具包-->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.5</version>
</dependency>
</dependencies>

View File

@ -0,0 +1,38 @@
package com.ruoyi.common.utils.BookmarkHtml;
import org.springframework.stereotype.Component;
/**
* @Auther: Wang
* @Date: 2020/08/22 23:03
* 功能描述:
*/
@Component
public class Const {
public static String BASE_PATH;
public static String LOGIN_SESSION_KEY = "Favorites_user";
public static String PASSWORD_KEY = "@#$%^&*()OPG#$%^&*(HG";
public static String DES3_KEY = "9964DYByKL967c3308imytCB";
public static String default_logo="img/logo.jpg";
public static String userAgent="Mozilla";
public static String default_Profile=BASE_PATH+"/img/logo.jpg";
public static String LAST_REFERER = "LAST_REFERER";
public static int COOKIE_TIMEOUT= 30*24*60*60;
// @Autowired(required = true)
// public void setBasePath(@Value("${favorites.base.path}")String basePath) {
// Const.BASE_PATH = basePath;
// }
}

View File

@ -0,0 +1,363 @@
package com.ruoyi.common.utils.BookmarkHtml;
import com.ruoyi.common.utils.StringUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @Auther: Wang
* @Date: 2020/08/22 22:56
* 功能描述:
*/
public class ImportHtml {
public static Logger logger = LoggerFactory.getLogger(ImportHtml.class);
/**
* @param url
* @return
*/
// public static String getImge(String url){
// String logo="";
// logo=getPageImg(url);
// if(StringUtils.isBlank(logo) || logo.length()>300){
// logo=Const.BASE_PATH + Const.default_logo;
// }
// return logo;
// }
/**
* @param url
* @return
*/
// public static String getPageImg(String url){
// String imgUrl="";
// Document doc;
// try {
// doc = Jsoup.connect(url).userAgent(Const.userAgent).get();
// Elements images = doc.select("img[src~=(?i)\\.(png|jpe?g|gif)]");
// for(Element image : images){
// imgUrl=image.attr("src");
// if(StringUtils.isNotBlank(imgUrl) ){
// if(imgUrl.startsWith("//")){
// imgUrl = "http:" + imgUrl;
// }else if(!imgUrl.startsWith("http") && !imgUrl.startsWith("/")){
// imgUrl=URLUtil.getDomainUrl(url) + "/" + imgUrl;
// }else if(!imgUrl.startsWith("http")){
// imgUrl=URLUtil.getDomainUrl(url)+imgUrl;
// }
// }
// // 判断图片大小
// String fileUrl = download(imgUrl);
// if(fileUrl!=null){
// File picture = new File(fileUrl);
// FileInputStream in = new FileInputStream(picture);
// BufferedImage sourceImg = ImageIO.read(in);
// String weight = String.format("%.1f",picture.length()/1024.0);
// int width = sourceImg.getWidth();
// int height = sourceImg.getHeight();
// // 删除临时文件
// if(picture.exists()){
// in.close();
// picture.delete();
// }
// if(Double.parseDouble(weight) <= 0 || width <=0 || height <= 0){
// logger.info("当前图片大小为0继续获取图片链接");
// imgUrl="";
// }else{
// break;
// }
// }
// }
// } catch (Exception e) {
//
// logger.warn("getPageImg 失败,url:"+url,e.getMessage());
// }
// return imgUrl;
// }
/**
* @auther: Wang
* @date: 2020/02/14 15:35
* 功能描述:查询URL的 最新信息
*/
public static Map<String, String> getCollectFromUrl(String url){
Map<String, String> result = new HashMap<String, String>();
try {
result.put("url", url);
Connection connection = Jsoup.connect(url).userAgent(Const.userAgent);
connection.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
connection.header("Accept-Encoding", "gzip, deflate, sdch");
connection.header("Accept-Language", "zh-CN,zh;q=0.8");
connection.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
Document doc = connection.get();
String title = doc.title();
if(StringUtils.isNotBlank(title)){
result.put("title", title);
}
String charset = doc.charset().name();
if(StringUtils.isBlank(charset)){
Elements eles = doc.select("meta[http-equiv=Content-Type]");
Iterator<Element> itor = eles.iterator();
while (itor.hasNext()){
charset = matchCharset(itor.next().toString().toUpperCase());
}
}
if(StringUtils.isBlank(charset)){
result.put("charset", charset);
}
Elements metas = doc.head().select("meta");
for (Element meta : metas) {
String content = meta.attr("content");
if ("description".equalsIgnoreCase(meta.attr("name"))) {
result.put("description", content);
}
}
//result.put("logoUrl", getImge(url));
} catch (Exception e) {
logger.error("文章解析出错:",e);
}
return result;
}
//
/**
* 一层只输出url及对应的title或描述
* @param in
* @return
*/
public static Map<String, String> parseHtmlOne(InputStream in){
Map<String, String> map = new HashMap<String, String>();
try {
Document doc = Jsoup.parse(in, "UTF-8", "");
Elements metas = doc.select("a");
for (Element meta : metas) {
String url = meta.attr("href");
if(url.startsWith("http")){
map.put(url, meta.text());
}
}
} catch (Exception e) {
logger.error("解析html 文件异常:",e);
}
return map;
}
/**
* 两层(文件夹<url+title或描述>
* @param HTML
* @return
*/
public static Map<String, Map<String, String>> parseHtml(InputStream HTML){
Map<String, Map<String, String>> resultMap = new HashMap<String, Map<String, String>>();
try {
Document doc = Jsoup.parse(HTML, "UTF-8", "");
Elements metasdts = doc.select("dt");
for(Element dt : metasdts){
String favoritesName = "";
Elements dtcs = dt.children();
Map<String, String> map = new HashMap<String, String>();
for(Element dt1 : dtcs){
if("h3".equalsIgnoreCase(dt1.nodeName())){
favoritesName = dt1.text();
}else if("dl".equalsIgnoreCase(dt1.nodeName())){
Elements dts = dt1.children();
for(Element dt11 : dts){
if("dt".equals(dt11.nodeName())){
if("a".equals(dt11.child(0).nodeName())){
String url = dt11.child(0).attr("href");
if(url.startsWith("http")){
map.put(url, dt11.child(0).text());
}
}
}
}
}
}
if(StringUtils.isNotBlank(favoritesName) && map.size() > 0){
resultMap.put(favoritesName, map);
}
}
} catch (Exception e) {
logger.error("解析html文件异常",e);
}
return resultMap;
}
/**
* 按照文档结构输出(TODO)
*/
public static Map<String, List<Map>> importHtmlMore(InputStream in){
Map<String, List<Map>> resultMap = new HashMap<String, List<Map>>();
try {
Document doc = Jsoup.parse(in, "UTF-8", "");
Elements bodys = doc.child(0).children();
for(Element body : bodys){
if("body".equalsIgnoreCase(body.nodeName())){
Elements dls = body.children();
for(Element dl : dls){
if("dl".equalsIgnoreCase(dl.nodeName())){
resultMap = parseElements(dl,resultMap);
System.out.println("resultMap:" + resultMap);
}
}
}
}
} catch (Exception e) {
logger.error("解析html文件异常",e);
}
return resultMap;
}
public static Map<String, List<Map>> parseElements(Element element,Map<String, List<Map>> resultMap){
Map<String, Map> favoritesMap = new HashMap<String, Map>();
Map<String, String> urlMap = new HashMap<String, String>();
String favoritesName = "";
Elements dts = element.children();
for(Element dt : dts){
if("dt".equalsIgnoreCase(dt.nodeName())){
Elements dtas = dt.children();
for(Element a : dtas){
if("a".equalsIgnoreCase(a.nodeName())){
String url = a.attr("href");
if(url.startsWith("http")){
urlMap.put(url, a.text());
favoritesName=a.parent().parent().parent().child(0).text();
System.out.println("目录:"+favoritesName);
logger.error("目录:",favoritesName);
}
}else if("dl".equalsIgnoreCase(a.nodeName())){
resultMap = parseElements(a,resultMap);
}
}
}
}
if(StringUtils.isNotBlank(favoritesName)){
favoritesMap.put(favoritesName, urlMap);
}
List<Map> mapList = null;
Element parment = element.parent().parent().parent().child(0);
if("h3".equalsIgnoreCase(parment.nodeName())){
String name = parment.text();
if(resultMap.containsKey(name)){
mapList = resultMap.get(name);
mapList.add(favoritesMap);
}else{
mapList = new ArrayList<Map>();
mapList.add(favoritesMap);
}
resultMap.put(name, mapList);
}
return resultMap;
}
// public static StringBuilder exportHtml(String title,StringBuilder body){
// StringBuilder sb = new StringBuilder();
// sb.append("<HTML>");
// sb.append("<HEAD>");
// sb.append("<TITLE>"+title+"</TITLE>");
// sb.append("<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=utf-8\" />");
// sb.append("</HEAD>");
// sb.append("<BODY><H1>"+title+"</H1>");
// sb.append(body);
// sb.append("</BODY>");
//
// return sb;
// }
//
public static String matchCharset(String content) {
Pattern p = Pattern.compile("(?<=charset=)(.+)(?=\")");
Matcher m = p.matcher(content);
if (m.find()){
return m.group();
}
return null;
}
//
// // 图片下载
// private static String download(String url) {
// try {
// String imageName = url.substring(url.lastIndexOf("/") + 1,
// url.length());
//
// URL uri = new URL(url);
// InputStream in = uri.openStream();
// String dirName = "static/temp/";
// File dirFile = new File(dirName);
// if(!dirFile.isDirectory()){
// dirFile.mkdir();
// }
// String fileName = dirName+imageName;
// File file = new File(dirFile,imageName);
// FileOutputStream fo = new FileOutputStream(file);
// byte[] buf = new byte[1024];
// int length = 0;
// while ((length = in.read(buf, 0, buf.length)) != -1) {
// fo.write(buf, 0, length);
// }
// in.close();
// fo.close();
// return fileName;
// } catch (Exception e) {
// e.printStackTrace();
// }
// return null;
// }
//
// /**
// * 判断链接是否失效
// * @param url
// * @return
// */
// public static boolean isConnect(String url){
// HttpURLConnection connection;
// int counts = 0;
// boolean flag = false;
// if (url == null || url.length() <= 0) {
// return flag;
// }
// while (counts < 5) {
// try {
// connection = (HttpURLConnection) new URL(url).openConnection();
// int state = connection.getResponseCode();
// if (state == 200) {
// flag = true;
// }
// break;
// } catch (Exception e) {
// counts++;
// continue;
// }
// }
// return flag;
// }
// /**
// * @auther: Wang
// * @date: 2020/02/15 14:44
// * 功能描述:分割书签URL 得到官网主机
// * @return
// */
public static String Urlutils(URL url) throws MalformedURLException {
String host = url.getHost();// 获取主机名
return host;
}
}