feature(地址拆分):地址清洗开发

1. 常规地址
2. 连号(结尾、中间)
3. 多号
4. 包含小区名称和路名
5. 调整一些类型的地址
This commit is contained in:
purple
2020-08-06 18:31:22 +08:00
parent 0bdfcc8c75
commit 226041bc05
31 changed files with 205 additions and 1930 deletions

View File

@ -2,57 +2,25 @@ package com.ruoyi;
import com.ruoyi.common.utils.LoadUtil;
import com.ruoyi.project.tool.address.*;
import com.ruoyi.project.tool.address.model.CleanAddress;
import com.ruoyi.project.tool.address.model.LianHaoNode;
import com.ruoyi.project.tool.address.parse.ZParse;
import com.ruoyi.project.tool.address.utils.DefaultAddressBuilder;
import com.ruoyi.project.tool.address.utils.AddressUtil;
import org.junit.Assert;
import org.junit.Test;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Base64;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class CleanUtilTests {
public class AddressUtilTests {
/**
* 地址清洗测试
*/
@Test
public void printStreet() {
List<String> textList = LoadUtil.loadList("jie.dict");
textList.forEach(item -> {
if (!item.contains("") && item.contains("") && !item.contains("街坊")) {
System.out.println(item);
}
});
}
@Test
public void printNoRoad() {
public void testClear() {
List<String> textList = LoadUtil.loadList("icbc.dict");
textList.forEach(item -> {
if (!item.contains("")) {
System.out.println(item);
}
});
}
@Test
public void testSort() {
List<String> textList = LoadUtil.loadList("temp.dict");
textList.sort((x, y) -> y.length() - x.length());
textList.forEach(item -> {
System.out.println(item);
});
}
@Test
public void testClear3() {
List<String> textList = LoadUtil.loadList("test.dict");
textList.forEach(item -> {
AddressResult addressResult = AddressUtil.clear(item);
if (null == addressResult.getCleanAddress()) {
@ -64,6 +32,7 @@ public class CleanUtilTests {
});
}
/**
* 中间连号
*/
@ -78,6 +47,21 @@ public class CleanUtilTests {
Assert.assertEquals(addressResult.getDistrict(), "浦东");
}
/**
* 幢号室
*/
@Test
public void testZhuangHaoShi() {
AddressResult addressResult = AddressUtil.clear("奉贤区南桥镇江海新村(B)89幢578号402室");
if (null == addressResult.getCleanAddress()) {
return;
}
Assert.assertEquals(addressResult.getCleanAddress().get(0), "江海新村578号402室");
Assert.assertEquals(addressResult.getDistrict(), "奉贤");
}
/**
* 尾部连号
*/
@ -107,19 +91,6 @@ public class CleanUtilTests {
Assert.assertEquals(addressResult.getDistrict(), "松江");
}
@Test
public void testWhileMatch() {
Pattern numberPattern = Pattern.compile("\\d+");
Matcher matcher = numberPattern.matcher("24、25号");
while (matcher.find()) {
System.out.println(matcher.group());
}
}
/**
* 地下
*/
@ -129,48 +100,28 @@ public class CleanUtilTests {
if (null == addressResult.getCleanAddress()) {
return;
}
addressResult.getCleanAddress().forEach(adr -> {
System.out.println(String.format("%s\t%s\t", addressResult.getDistrict(), adr));
});
Assert.assertEquals("北华路168弄35号1002室", addressResult.getCleanAddress().get(0));
Assert.assertEquals(addressResult.getDistrict(), "闵行");
}
/**
* 层结尾
*/
@Test
public void testClear2() {
//
List<String> textList = LoadUtil.loadList("icbc.dict");
textList.forEach(item -> {
DefaultAddressBuilder defaultAddressBuilder = new DefaultAddressBuilder();
StandardAddress standardAddress = defaultAddressBuilder.clear(item);
List<AddressContent> list = standardAddress.getResult();
list.forEach(x -> {
System.out.println(x);
});
//
// CleanAddress cleanAddress = CleanUtil.clear(item);
// System.out.println(cleanAddress);
});
}
@Test
public void testClear() {
List<String> textList = LoadUtil.loadList("icbc.dict");
textList.forEach(item -> {
CleanAddress cleanAddress = CleanUtil.clear(item);
System.out.println(cleanAddress);
});
// Assert.assertEquals(cleanAddress.getDistrict(), "");
// Assert.assertEquals(cleanAddress.getDistrict(), "");
// Assert.assertEquals(cleanAddress.getDistrict(), "");
// Assert.assertEquals(cleanAddress.getDistrict(), "");
// Assert.assertEquals(cleanAddress.getDistrict(), "");
// Assert.assertEquals(cleanAddress.getDistrict(), "");
public void testTailCeng() {
AddressResult addressResult = AddressUtil.clear("浦东新区成山路1488弄119号1-4层");
if (null == addressResult.getCleanAddress()) {
return;
}
Assert.assertEquals("成山路1488弄119号", addressResult.getCleanAddress().get(0));
Assert.assertEquals(addressResult.getDistrict(), "浦东");
}
/**
* base64保存
*
* @throws IOException
*/
@Test
public void testBase64() throws IOException {
Base64.Decoder decoder = Base64.getDecoder();
@ -180,6 +131,47 @@ public class CleanUtilTests {
out.write(buffer);
out.close();
}
@Test
public void testWhileMatch() {
Pattern numberPattern = Pattern.compile("\\d+");
Matcher matcher = numberPattern.matcher("24、25号");
while (matcher.find()) {
System.out.println(matcher.group());
}
}
@Test
public void printStreet() {
List<String> textList = LoadUtil.loadList("jie.dict");
textList.forEach(item -> {
if (!item.contains("") && item.contains("") && !item.contains("街坊")) {
System.out.println(item);
}
});
}
@Test
public void printNoRoad() {
List<String> textList = LoadUtil.loadList("icbc.dict");
textList.forEach(item -> {
if (!item.contains("")) {
System.out.println(item);
}
});
}
@Test
public void testSort() {
List<String> textList = LoadUtil.loadList("temp.dict");
textList.sort((x, y) -> y.length() - x.length());
textList.forEach(item -> {
System.out.println(item);
});
}
}