书签导入功能

2020-08-27 21:51:33 +08:00
parent 5e67a19e04
commit 96672852ae
9 changed files with 1407 additions and 676 deletions
--- a/ruoyi-common/src/main/java/com/ruoyi/common/utils/BookmarkHtml/Const.java
+++ b/ruoyi-common/src/main/java/com/ruoyi/common/utils/BookmarkHtml/Const.java
@@ -0,0 +1,38 @@
+package com.ruoyi.common.utils.BookmarkHtml;
+
+import org.springframework.stereotype.Component;
+
+/**
+ * @Auther: Wang
+ * @Date: 2020/08/22 23:03
+ * 功能描述:
+ */
+@Component
+public class Const {
+
+    public static String BASE_PATH;
+
+    public static String LOGIN_SESSION_KEY = "Favorites_user";
+
+    public static String PASSWORD_KEY = "@#$%^&*()OPG#$%^&*(HG";
+
+    public static String DES3_KEY = "9964DYByKL967c3308imytCB";
+
+    public static String default_logo="img/logo.jpg";
+
+    public static String userAgent="Mozilla";
+
+    public static String default_Profile=BASE_PATH+"/img/logo.jpg";
+
+    public static String LAST_REFERER = "LAST_REFERER";
+
+    public static int COOKIE_TIMEOUT= 30*24*60*60;
+
+
+//	  @Autowired(required = true)
+//	  public void setBasePath(@Value("${favorites.base.path}")String basePath) {
+//		  Const.BASE_PATH = basePath;
+//	  }
+
+
+}
--- a/ruoyi-common/src/main/java/com/ruoyi/common/utils/BookmarkHtml/ImportHtml.java
+++ b/ruoyi-common/src/main/java/com/ruoyi/common/utils/BookmarkHtml/ImportHtml.java
@@ -0,0 +1,363 @@
+package com.ruoyi.common.utils.BookmarkHtml;
+
+
+import com.ruoyi.common.utils.StringUtils;
+import org.jsoup.Connection;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * @Auther: Wang
+ * @Date: 2020/08/22 22:56
+ * 功能描述:
+ */
+public class ImportHtml {
+
+    public static Logger logger =  LoggerFactory.getLogger(ImportHtml.class);
+    /**
+     * @param url
+     * @return
+     */
+//	public static String getImge(String url){
+//		String logo="";
+//		logo=getPageImg(url);
+//		if(StringUtils.isBlank(logo) || logo.length()>300){
+//			logo=Const.BASE_PATH + Const.default_logo;
+//		}
+//		return logo;
+//	}
+
+    /**
+     * @param url
+     * @return
+     */
+//	public static String getPageImg(String url){
+//		String imgUrl="";
+//		Document doc;
+//		try {
+//			doc = Jsoup.connect(url).userAgent(Const.userAgent).get();
+//			Elements images = doc.select("img[src~=(?i)\\.(png|jpe?g|gif)]");
+//			for(Element image : images){
+//				imgUrl=image.attr("src");
+//				if(StringUtils.isNotBlank(imgUrl) ){
+//					if(imgUrl.startsWith("//")){
+//						imgUrl = "http:" + imgUrl;
+//					}else if(!imgUrl.startsWith("http") && !imgUrl.startsWith("/")){
+//						imgUrl=URLUtil.getDomainUrl(url) + "/" + imgUrl;
+//					}else if(!imgUrl.startsWith("http")){
+//						imgUrl=URLUtil.getDomainUrl(url)+imgUrl;
+//					}
+//				}
+//				// 判断图片大小
+//				String fileUrl = download(imgUrl);
+//				if(fileUrl!=null){
+//					File picture = new File(fileUrl);
+//					FileInputStream in = new FileInputStream(picture);
+//					BufferedImage sourceImg = ImageIO.read(in);
+//					String weight = String.format("%.1f",picture.length()/1024.0);
+//					int width = sourceImg.getWidth();
+//					int height = sourceImg.getHeight();
+//					// 删除临时文件
+//					if(picture.exists()){
+//						in.close();
+//						picture.delete();
+//					}
+//					if(Double.parseDouble(weight) <= 0 || width <=0 || height <= 0){
+//						logger.info("当前图片大小为0，继续获取图片链接");
+//						imgUrl="";
+//					}else{
+//						break;
+//					}
+//				}
+//			}
+//		} catch (Exception e) {
+//
+//			logger.warn("getPageImg  失败,url:"+url,e.getMessage());
+//		}
+//		return imgUrl;
+//	}
+    /**
+     * @auther: Wang
+     * @date: 2020/02/14 15:35
+     * 功能描述:查询URL的 最新信息
+     */
+
+    public static Map<String, String> getCollectFromUrl(String url){
+        Map<String, String> result = new HashMap<String, String>();
+        try {
+            result.put("url", url);
+
+            Connection connection = Jsoup.connect(url).userAgent(Const.userAgent);
+            connection.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
+            connection.header("Accept-Encoding", "gzip, deflate, sdch");
+            connection.header("Accept-Language", "zh-CN,zh;q=0.8");
+            connection.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
+            Document doc = connection.get();
+            String title = doc.title();
+            if(StringUtils.isNotBlank(title)){
+                result.put("title", title);
+            }
+            String charset = doc.charset().name();
+            if(StringUtils.isBlank(charset)){
+                Elements eles = doc.select("meta[http-equiv=Content-Type]");
+                Iterator<Element> itor = eles.iterator();
+                while (itor.hasNext()){
+                    charset = matchCharset(itor.next().toString().toUpperCase());
+                }
+            }
+            if(StringUtils.isBlank(charset)){
+                result.put("charset", charset);
+            }
+            Elements metas = doc.head().select("meta");
+            for (Element meta : metas) {
+                String content = meta.attr("content");
+                if ("description".equalsIgnoreCase(meta.attr("name"))) {
+                    result.put("description", content);
+                }
+            }
+            //result.put("logoUrl", getImge(url));
+        } catch (Exception e) {
+            logger.error("文章解析出错：",e);
+        }
+        return result;
+    }
+//
+    /**
+     * 一层，只输出url及对应的title或描述
+     * @param in
+     * @return
+     */
+    public static Map<String, String> parseHtmlOne(InputStream in){
+        Map<String, String> map = new HashMap<String, String>();
+        try {
+            Document doc = Jsoup.parse(in, "UTF-8", "");
+            Elements metas = doc.select("a");
+            for (Element meta : metas) {
+                String url = meta.attr("href");
+                if(url.startsWith("http")){
+                    map.put(url, meta.text());
+                }
+            }
+        } catch (Exception e) {
+            logger.error("解析html 文件异常：",e);
+        }
+        return map;
+    }
+
+    /**
+     * 两层（文件夹<url+title或描述>）
+     * @param HTML
+     * @return
+     */
+    public static Map<String, Map<String, String>> parseHtml(InputStream HTML){
+        Map<String, Map<String, String>> resultMap = new HashMap<String, Map<String, String>>();
+        try {
+            Document doc = Jsoup.parse(HTML, "UTF-8", "");
+            Elements metasdts = doc.select("dt");
+            for(Element dt : metasdts){
+                String favoritesName = "";
+                Elements dtcs = dt.children();
+                Map<String, String> map = new HashMap<String, String>();
+                for(Element dt1 : dtcs){
+                    if("h3".equalsIgnoreCase(dt1.nodeName())){
+                        favoritesName = dt1.text();
+                    }else if("dl".equalsIgnoreCase(dt1.nodeName())){
+                        Elements dts = dt1.children();
+                        for(Element dt11 : dts){
+                            if("dt".equals(dt11.nodeName())){
+                                if("a".equals(dt11.child(0).nodeName())){
+                                    String url = dt11.child(0).attr("href");
+                                    if(url.startsWith("http")){
+                                        map.put(url, dt11.child(0).text());
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+                if(StringUtils.isNotBlank(favoritesName) && map.size() > 0){
+                    resultMap.put(favoritesName, map);
+                }
+            }
+        } catch (Exception e) {
+            logger.error("解析html文件异常：",e);
+        }
+        return resultMap;
+    }
+
+	/**
+	 * 按照文档结构输出(TODO)
+	 */
+	public static Map<String, List<Map>> importHtmlMore(InputStream in){
+        Map<String, List<Map>> resultMap = new HashMap<String, List<Map>>();
+		try {
+			Document doc = Jsoup.parse(in, "UTF-8", "");
+			Elements bodys = doc.child(0).children();
+
+			for(Element body : bodys){
+				if("body".equalsIgnoreCase(body.nodeName())){
+					Elements dls = body.children();
+					for(Element dl : dls){
+						if("dl".equalsIgnoreCase(dl.nodeName())){
+							resultMap = parseElements(dl,resultMap);
+							System.out.println("resultMap:" + resultMap);
+						}
+					}
+				}
+			}
+
+
+		} catch (Exception e) {
+			logger.error("解析html文件异常：",e);
+		}
+        return  resultMap;
+	}
+
+	public static Map<String, List<Map>> parseElements(Element element,Map<String, List<Map>> resultMap){
+		Map<String, Map> favoritesMap = new HashMap<String, Map>();
+		Map<String, String> urlMap = new HashMap<String, String>();
+		String favoritesName = "";
+		Elements dts = element.children();
+		for(Element dt : dts){
+			if("dt".equalsIgnoreCase(dt.nodeName())){
+				Elements dtas = dt.children();
+				for(Element a : dtas){
+					if("a".equalsIgnoreCase(a.nodeName())){
+						String url = a.attr("href");
+			            if(url.startsWith("http")){
+			            	urlMap.put(url, a.text());
+			            	favoritesName=a.parent().parent().parent().child(0).text();
+			            	System.out.println("目录:"+favoritesName);
+                            logger.error("目录：",favoritesName);
+			            }
+					}else if("dl".equalsIgnoreCase(a.nodeName())){
+						resultMap =  parseElements(a,resultMap);
+					}
+				}
+
+			}
+		}
+		if(StringUtils.isNotBlank(favoritesName)){
+			favoritesMap.put(favoritesName, urlMap);
+		}
+		List<Map> mapList = null;
+		Element parment = element.parent().parent().parent().child(0);
+		if("h3".equalsIgnoreCase(parment.nodeName())){
+			String name = parment.text();
+			if(resultMap.containsKey(name)){
+				mapList = resultMap.get(name);
+				mapList.add(favoritesMap);
+			}else{
+				mapList = new ArrayList<Map>();
+				mapList.add(favoritesMap);
+			}
+			resultMap.put(name, mapList);
+		}
+		return resultMap;
+	}
+
+//	public static StringBuilder exportHtml(String title,StringBuilder body){
+//		StringBuilder sb = new StringBuilder();
+//		sb.append("<HTML>");
+//		sb.append("<HEAD>");
+//		sb.append("<TITLE>"+title+"</TITLE>");
+//		sb.append("<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=utf-8\" />");
+//		sb.append("</HEAD>");
+//		sb.append("<BODY><H1>"+title+"</H1>");
+//		sb.append(body);
+//		sb.append("</BODY>");
+//
+//		return sb;
+//	}
+//
+    public static String matchCharset(String content) {
+        Pattern p = Pattern.compile("(?<=charset=)(.+)(?=\")");
+        Matcher m = p.matcher(content);
+        if (m.find()){
+            return m.group();
+        }
+        return null;
+    }
+//
+//	// 图片下载
+//	private static String download(String url) {
+//			try {
+//				String imageName = url.substring(url.lastIndexOf("/") + 1,
+//						url.length());
+//
+//				URL uri = new URL(url);
+//				InputStream in = uri.openStream();
+//				String dirName = "static/temp/";
+//				File dirFile = new File(dirName);
+//				if(!dirFile.isDirectory()){
+//					dirFile.mkdir();
+//				}
+//				String fileName = dirName+imageName;
+//				File file = new File(dirFile,imageName);
+//				FileOutputStream fo = new FileOutputStream(file);
+//				byte[] buf = new byte[1024];
+//				int length = 0;
+//				while ((length = in.read(buf, 0, buf.length)) != -1) {
+//					fo.write(buf, 0, length);
+//				}
+//				in.close();
+//				fo.close();
+//				return fileName;
+//			} catch (Exception e) {
+//				e.printStackTrace();
+//			}
+//			return null;
+//	}
+//
+//	/**
+//	 * 判断链接是否失效
+//	 * @param url
+//	 * @return
+//	 */
+//	public static boolean isConnect(String url){
+//		HttpURLConnection connection;
+//		int counts = 0;
+//		boolean flag = false;
+//		if (url == null || url.length() <= 0) {
+//			return flag;
+//		}
+//		while (counts < 5) {
+//			try {
+//				connection = (HttpURLConnection) new URL(url).openConnection();
+//				int state = connection.getResponseCode();
+//				if (state == 200) {
+//					flag = true;
+//				}
+//				break;
+//			} catch (Exception e) {
+//				counts++;
+//				continue;
+//			}
+//		}
+//		return flag;
+//	}
+
+
+    //    /**
+//     * @auther: Wang
+//     * @date: 2020/02/15 14:44
+//     * 功能描述:分割书签URL 得到官网主机
+//     * @return
+//     */
+    public static String Urlutils(URL url) throws MalformedURLException {
+        String host = url.getHost();// 获取主机名
+        return host;
+    }
+}