feature(地址拆分):地址清洗开发
1. 常规地址 2. 连号(结尾、中间) 3. 多号 4. 包含小区名称和路名 5. 调整一些类型的地址
This commit is contained in:
parent
0bdfcc8c75
commit
226041bc05
@ -1,16 +0,0 @@
|
||||
package com.ruoyi.project.tool.address;
|
||||
|
||||
/**
|
||||
* 地址构建
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public interface AddressBuilder {
|
||||
|
||||
/**
|
||||
* 清洗
|
||||
* @param text
|
||||
* @return
|
||||
*/
|
||||
StandardAddress clear(String text);
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
package com.ruoyi.project.tool.address;
|
||||
|
||||
import com.ruoyi.project.tool.address.model.AddressType;
|
||||
|
||||
/**
|
||||
* 地址
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class AddressContent {
|
||||
private AddressType addressType;
|
||||
private StringBuilder sb;
|
||||
|
||||
public AddressContent(AddressType addressType) {
|
||||
this.addressType = addressType;
|
||||
sb = new StringBuilder();
|
||||
}
|
||||
|
||||
public AddressType getAddressType() {
|
||||
return addressType;
|
||||
}
|
||||
|
||||
public void appendContent(String addressNodeContent) {
|
||||
sb.append(addressNodeContent);
|
||||
}
|
||||
|
||||
public String getResult() {
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "AddressContent{" +
|
||||
"addressType=" + addressType +
|
||||
", sb=" + sb +
|
||||
'}';
|
||||
}
|
||||
}
|
@ -1,59 +0,0 @@
|
||||
package com.ruoyi.project.tool.address;
|
||||
|
||||
import com.ruoyi.project.tool.address.model.AddressType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* context
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class AddressContext {
|
||||
/**
|
||||
* 有序列表
|
||||
* 优化成队列,FIFO
|
||||
*/
|
||||
private ArrayList<AddressNode> nodeList = new ArrayList<>();
|
||||
private StringBuilder stringBuilder;
|
||||
private Integer startIndex = 0;
|
||||
|
||||
public AddressContext(String text) {
|
||||
this.stringBuilder = new StringBuilder(text);
|
||||
}
|
||||
|
||||
public StringBuilder getContent() {
|
||||
return this.stringBuilder;
|
||||
}
|
||||
|
||||
public void addAddressNode(AddressNode node) {
|
||||
startIndex = node.getEndIndex() - 1;
|
||||
this.nodeList.add(node);
|
||||
}
|
||||
|
||||
public Integer getStartIndex() {
|
||||
return startIndex;
|
||||
}
|
||||
|
||||
public StandardAddress getResult() {
|
||||
StandardAddress standardAddress = new StandardAddress(this.stringBuilder.toString());
|
||||
nodeList.forEach(node -> {
|
||||
AddressContent addressContent = new AddressContent(AddressType.CONDO);
|
||||
if (node.dataNode()) {
|
||||
addressContent.appendContent(node.getContent());
|
||||
} else {
|
||||
if (1 == node.getContentList().size()) {
|
||||
addressContent.appendContent(node.getContentList().get(0).getContent());
|
||||
} else {
|
||||
// 多个先组合,再拆分
|
||||
}
|
||||
}
|
||||
|
||||
standardAddress.addAddressContent(addressContent);
|
||||
});
|
||||
|
||||
return standardAddress;
|
||||
}
|
||||
}
|
@ -1,80 +0,0 @@
|
||||
package com.ruoyi.project.tool.address;
|
||||
|
||||
import com.ruoyi.common.utils.StringUtils;
|
||||
|
||||
import java.util.AbstractList;
|
||||
import java.util.Collection;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 地址节点
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class AddressNode {
|
||||
|
||||
private AddressNodeType nodeType;
|
||||
private Integer startIndex;
|
||||
private Integer endIndex;
|
||||
private String content;
|
||||
private List<AddressNode> contentList = new LinkedList<>();
|
||||
|
||||
public AddressNode(AddressNodeType addressNodeType) {
|
||||
this.nodeType = addressNodeType;
|
||||
}
|
||||
|
||||
public AddressNode(AddressNodeType addressNodeType, String content) {
|
||||
this.nodeType = addressNodeType;
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
public AddressNodeType getNodeType() {
|
||||
return nodeType;
|
||||
}
|
||||
|
||||
public void setNodeType(AddressNodeType nodeType) {
|
||||
this.nodeType = nodeType;
|
||||
}
|
||||
|
||||
public Integer getStartIndex() {
|
||||
return startIndex;
|
||||
}
|
||||
|
||||
public void setStartIndex(Integer startIndex) {
|
||||
this.startIndex = startIndex;
|
||||
}
|
||||
|
||||
public Integer getEndIndex() {
|
||||
return endIndex;
|
||||
}
|
||||
|
||||
public void setEndIndex(Integer endIndex) {
|
||||
this.endIndex = endIndex;
|
||||
}
|
||||
|
||||
public String getContent() {
|
||||
return content;
|
||||
}
|
||||
|
||||
/**
|
||||
* 数据节点
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Boolean dataNode() {
|
||||
return StringUtils.isNotEmpty(this.content);
|
||||
}
|
||||
|
||||
public void setContent(String content) {
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
public void addChildNode(AddressNode addressNode) {
|
||||
contentList.add(addressNode);
|
||||
}
|
||||
|
||||
public List<AddressNode> getContentList() {
|
||||
return contentList;
|
||||
}
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
package com.ruoyi.project.tool.address;
|
||||
|
||||
import com.ruoyi.project.tool.address.model.AdrNode;
|
||||
import com.ruoyi.project.tool.address.utils.ParseContext;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
@ -1,33 +0,0 @@
|
||||
package com.ruoyi.project.tool.address;
|
||||
|
||||
import com.ruoyi.project.tool.address.model.CleanAddress;
|
||||
import com.ruoyi.project.tool.address.model.CleanAddressBuilder;
|
||||
import com.ruoyi.project.tool.address.service.impl.NoSignalAddressHandler;
|
||||
import com.ruoyi.project.tool.address.service.impl.SignalAddressHandler;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 地址清洗
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class CleanUtil {
|
||||
|
||||
/**
|
||||
* 清洗
|
||||
*
|
||||
* @param rawAddress
|
||||
* @return
|
||||
*/
|
||||
public static CleanAddress clear(String rawAddress) {
|
||||
CleanAddress cleanAddress = new CleanAddress(rawAddress);
|
||||
if (cleanAddress.getContainsSpecialChar()) {
|
||||
new SignalAddressHandler().clear(cleanAddress);
|
||||
} else {
|
||||
new NoSignalAddressHandler().clear(cleanAddress);
|
||||
}
|
||||
return cleanAddress;
|
||||
}
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
package com.ruoyi.project.tool.address;
|
||||
|
||||
import com.ruoyi.project.tool.address.model.AddressType;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* context
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class StandardAddress {
|
||||
|
||||
private String rawAddress;
|
||||
private List<AddressContent> children = new LinkedList<>();
|
||||
|
||||
public StandardAddress(String text) {
|
||||
this.rawAddress = text;
|
||||
}
|
||||
|
||||
public void addAddressContent(AddressContent addressContent) {
|
||||
children.add(addressContent);
|
||||
}
|
||||
|
||||
public List<AddressContent> getResult() {
|
||||
return children;
|
||||
}
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 室地址
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class BuildingAddress extends PartialAddress {
|
||||
|
||||
public BuildingAddress(String address) {
|
||||
super(address);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AddressType getAddressType() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean multiAddress() {
|
||||
return null;
|
||||
}
|
||||
}
|
@ -1,215 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.model;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 清洗地址
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class CleanAddress {
|
||||
/**
|
||||
* 原地址
|
||||
*/
|
||||
private String rawAddress;
|
||||
/**
|
||||
* 待处理的地址
|
||||
*/
|
||||
private StringBuilder boundAddress;
|
||||
/**
|
||||
* 区域
|
||||
*/
|
||||
private String district;
|
||||
/**
|
||||
* 镇
|
||||
*/
|
||||
private String region;
|
||||
/**
|
||||
* 板块
|
||||
*/
|
||||
private String block;
|
||||
/**
|
||||
* 路名(可能交叉路)
|
||||
*/
|
||||
private String road;
|
||||
/**
|
||||
* 小区名称
|
||||
*/
|
||||
private String communityName;
|
||||
/**
|
||||
* 楼层
|
||||
* (和地下有关系)
|
||||
*/
|
||||
private String floor;
|
||||
/**
|
||||
* 单套地址
|
||||
*/
|
||||
private PartialAddress condoAddress;
|
||||
/**
|
||||
* 物业(普通、车位)
|
||||
*/
|
||||
private String propertyType;
|
||||
/**
|
||||
* 独栋
|
||||
*/
|
||||
private Boolean independent;
|
||||
/**
|
||||
* 多个地址
|
||||
*/
|
||||
private List<PartialAddress> addressList = new LinkedList<>();
|
||||
|
||||
/**
|
||||
* 是否包含特殊字符
|
||||
*/
|
||||
private Boolean containsSpecialChar;
|
||||
|
||||
private static List<String> specialChar = new LinkedList<>();
|
||||
|
||||
static {
|
||||
specialChar.add("。");
|
||||
specialChar.add(".");
|
||||
specialChar.add(",");
|
||||
specialChar.add(",");
|
||||
specialChar.add("-");
|
||||
specialChar.add("——");
|
||||
specialChar.add("_");
|
||||
specialChar.add("、");
|
||||
specialChar.add("(");
|
||||
specialChar.add(")");
|
||||
specialChar.add("(");
|
||||
specialChar.add(")");
|
||||
specialChar.add("《");
|
||||
specialChar.add("》");
|
||||
}
|
||||
|
||||
public CleanAddress(String rawAddress) {
|
||||
this.rawAddress = rawAddress;
|
||||
|
||||
}
|
||||
|
||||
public String getRawAddress() {
|
||||
return rawAddress;
|
||||
}
|
||||
|
||||
public void setRawAddress(String rawAddress) {
|
||||
this.rawAddress = rawAddress;
|
||||
}
|
||||
|
||||
public String getDistrict() {
|
||||
return district;
|
||||
}
|
||||
|
||||
public void setDistrict(String district) {
|
||||
this.district = district;
|
||||
}
|
||||
|
||||
public String getRegion() {
|
||||
return region;
|
||||
}
|
||||
|
||||
public void setRegion(String region) {
|
||||
this.region = region;
|
||||
}
|
||||
|
||||
public String getBlock() {
|
||||
return block;
|
||||
}
|
||||
|
||||
public void setBlock(String block) {
|
||||
this.block = block;
|
||||
}
|
||||
|
||||
public String getRoad() {
|
||||
return road;
|
||||
}
|
||||
|
||||
public void setRoad(String road) {
|
||||
this.road = road;
|
||||
}
|
||||
|
||||
public String getCommunityName() {
|
||||
return communityName;
|
||||
}
|
||||
|
||||
public void setCommunityName(String communityName) {
|
||||
this.communityName = communityName;
|
||||
}
|
||||
|
||||
public String getFloor() {
|
||||
return floor;
|
||||
}
|
||||
|
||||
public void setFloor(String floor) {
|
||||
this.floor = floor;
|
||||
}
|
||||
|
||||
public PartialAddress getCondoAddress() {
|
||||
return condoAddress;
|
||||
}
|
||||
|
||||
public void setCondoAddress(PartialAddress condoAddress) {
|
||||
this.condoAddress = condoAddress;
|
||||
}
|
||||
|
||||
public String getPropertyType() {
|
||||
return propertyType;
|
||||
}
|
||||
|
||||
public void setPropertyType(String propertyType) {
|
||||
this.propertyType = propertyType;
|
||||
}
|
||||
|
||||
public List<PartialAddress> getAddressList() {
|
||||
return addressList;
|
||||
}
|
||||
|
||||
public void addAddress(PartialAddress partialAddress) {
|
||||
this.addressList.add(partialAddress);
|
||||
}
|
||||
|
||||
public Boolean getIndependent() {
|
||||
return independent;
|
||||
}
|
||||
|
||||
public void setIndependent(Boolean independent) {
|
||||
this.independent = independent;
|
||||
}
|
||||
|
||||
public StringBuilder getBoundAddress() {
|
||||
return boundAddress;
|
||||
}
|
||||
|
||||
public Boolean getContainsSpecialChar() {
|
||||
String todoAddress = rawAddress.trim()
|
||||
.replace("\t", "")
|
||||
.replace(" ", "");
|
||||
|
||||
this.boundAddress = new StringBuilder(rawAddress);
|
||||
for (int i = 0; i < specialChar.size(); i++) {
|
||||
if (todoAddress.contains(specialChar.get(i))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "CleanAddress{" +
|
||||
"rawAddress='" + rawAddress + '\'' +
|
||||
", district='" + district + '\'' +
|
||||
", region='" + region + '\'' +
|
||||
", block='" + block + '\'' +
|
||||
", road='" + road + '\'' +
|
||||
", communityName='" + communityName + '\'' +
|
||||
", floor=" + floor +
|
||||
", condoAddress=" + condoAddress +
|
||||
", propertyType='" + propertyType + '\'' +
|
||||
", independent=" + independent +
|
||||
", addressList=" + addressList +
|
||||
", containsSpecialChar=" + containsSpecialChar +
|
||||
'}';
|
||||
}
|
||||
}
|
@ -1,222 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.model;
|
||||
|
||||
import com.ruoyi.common.utils.LoadUtil;
|
||||
import com.ruoyi.common.utils.StringUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* 清洗地址构建
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class CleanAddressBuilder {
|
||||
|
||||
private Map<String, String> districtMap = LoadUtil.loadDict("address-dict/district.dict");
|
||||
private List<String> regionList = LoadUtil.loadList("address-dict/region.dict");
|
||||
private List<String> blockList = LoadUtil.loadList("address-dict/block.dict");
|
||||
private List<String> roadList = LoadUtil.loadList("address-dict/road.dict");
|
||||
private CleanAddress cleanAddress;
|
||||
private static final int DISTRICT_LENGTH = 8;
|
||||
private static final String SHANGHAI_SHI = "上海市";
|
||||
private static final String SHANGHAI = "上海";
|
||||
private static final String SHANG = "上";
|
||||
private static final String HAO = "号";
|
||||
private static final String SHANG_SHANG = "上上";
|
||||
private static final String SHI_PATTERN = "([\\dA-Za-z]+)(室?)$";
|
||||
private static final String CHINESE_FLOOR_PATTERN = "([一二三四五六七八九十]+)层";
|
||||
private static final String NUMBER_FLOOR_PATTERN = "(\\d+)层";
|
||||
private static final String HAO_PATTERN = "([\\dA-Za-z]+)号";
|
||||
|
||||
private CleanAddressBuilder(CleanAddress cleanAddress) {
|
||||
this.cleanAddress = cleanAddress;
|
||||
}
|
||||
|
||||
public static CleanAddressBuilder builder(CleanAddress cleanAddress) {
|
||||
return new CleanAddressBuilder(cleanAddress);
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析区域
|
||||
* 前面7个字符
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public CleanAddressBuilder parseDistrict() {
|
||||
String shanghaiAndDistrict = null;
|
||||
if (cleanAddress.getBoundAddress().length() >= DISTRICT_LENGTH) {
|
||||
shanghaiAndDistrict = (SHANG + cleanAddress.getBoundAddress().substring(0, 7)).replace(SHANG_SHANG, SHANG);
|
||||
} else {
|
||||
shanghaiAndDistrict = (SHANG + cleanAddress.getBoundAddress()).replace(SHANG_SHANG, SHANG);
|
||||
}
|
||||
|
||||
//
|
||||
if (!shanghaiAndDistrict.startsWith(SHANGHAI_SHI) && !shanghaiAndDistrict.startsWith(SHANGHAI) && shanghaiAndDistrict.startsWith(SHANG)) {
|
||||
shanghaiAndDistrict = shanghaiAndDistrict.substring(1, shanghaiAndDistrict.length() - 1);
|
||||
}
|
||||
|
||||
for (Map.Entry<String, String> district : districtMap.entrySet()) {
|
||||
if (shanghaiAndDistrict.startsWith(district.getKey())) {
|
||||
cleanAddress.setDistrict(district.getValue());
|
||||
break;
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析镇
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public CleanAddressBuilder parseRegion() {
|
||||
for (int i = 0; i < regionList.size(); i++) {
|
||||
if (-1 != cleanAddress.getBoundAddress().indexOf(regionList.get(i))) {
|
||||
cleanAddress.setRegion(regionList.get(i));
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析板块
|
||||
*/
|
||||
public CleanAddressBuilder parseBlock() {
|
||||
for (int i = 0; i < blockList.size(); i++) {
|
||||
if (-1 != cleanAddress.getBoundAddress().indexOf(blockList.get(i))) {
|
||||
cleanAddress.setBlock(regionList.get(i));
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析路(可能缺失“路”)
|
||||
*/
|
||||
public CleanAddressBuilder parseRoad() {
|
||||
for (int i = 0; i < roadList.size(); i++) {
|
||||
if (-1 != cleanAddress.getBoundAddress().indexOf(roadList.get(i))) {
|
||||
cleanAddress.setRoad(roadList.get(i));
|
||||
}
|
||||
}
|
||||
//
|
||||
if (StringUtils.isEmpty(cleanAddress.getRoad())) {
|
||||
for (int i = 0; i < roadList.size(); i++) {
|
||||
// 砍掉“路”
|
||||
String noRoad = roadList.get(i);
|
||||
if (noRoad.endsWith("路")) {
|
||||
noRoad = noRoad.substring(0, noRoad.length() - 1);
|
||||
}
|
||||
|
||||
if (-1 != cleanAddress.getBoundAddress().indexOf(noRoad)) {
|
||||
cleanAddress.setRoad(noRoad);
|
||||
}
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析小区名称(路名和小区名称重定义)
|
||||
*/
|
||||
public CleanAddressBuilder parseCommunityName() {
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 是否独栋?“全”“全幢”
|
||||
* 别墅?
|
||||
*/
|
||||
public CleanAddressBuilder parseIndependent() {
|
||||
Pattern shiPattern = Pattern.compile(SHI_PATTERN);
|
||||
Matcher matcher = shiPattern.matcher(cleanAddress.getBoundAddress());
|
||||
if (matcher.find()) {
|
||||
getCondoAddress();
|
||||
cleanAddress.setIndependent(Boolean.FALSE);
|
||||
} else {
|
||||
cleanAddress.setIndependent(Boolean.TRUE);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析地址
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public void parseAddress() {
|
||||
getCondoAddress();
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取楼层
|
||||
*/
|
||||
private void getFloorText() {
|
||||
Pattern pattern = Pattern.compile(CHINESE_FLOOR_PATTERN);
|
||||
Matcher matcher = pattern.matcher(cleanAddress.getBoundAddress());
|
||||
if (matcher.find()) {
|
||||
cleanAddress.setFloor(matcher.group(1));
|
||||
return;
|
||||
}
|
||||
pattern = Pattern.compile(NUMBER_FLOOR_PATTERN);
|
||||
matcher = pattern.matcher(cleanAddress.getBoundAddress());
|
||||
if (matcher.find()) {
|
||||
cleanAddress.setFloor(matcher.group(1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 小区地址
|
||||
*/
|
||||
private void getCommunityAddress(String buildingAddressText) {
|
||||
// 只有号、号号,弄(支弄)号
|
||||
Pattern pattern = Pattern.compile(HAO_PATTERN);
|
||||
Matcher matcher = pattern.matcher(buildingAddressText);
|
||||
if (matcher.find()) {
|
||||
String communityText = buildingAddressText.replace(matcher.group(0), "");
|
||||
CommunityAddress communityAddress = new CommunityAddress(communityText);
|
||||
this.cleanAddress.addAddress(communityAddress);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取清洗地址
|
||||
*/
|
||||
private void getCondoAddress() {
|
||||
getFloorText();
|
||||
// 有路
|
||||
if (!StringUtils.isEmpty(cleanAddress.getRoad())) {
|
||||
int roadIndex = cleanAddress.getBoundAddress().indexOf(cleanAddress.getRoad());
|
||||
String condoAddressText =
|
||||
cleanAddress.getBoundAddress().substring(roadIndex);
|
||||
|
||||
// 单套
|
||||
CondoAddress condoAddress = new CondoAddress(condoAddressText);
|
||||
cleanAddress.setCondoAddress(condoAddress);
|
||||
cleanAddress.addAddress(condoAddress);
|
||||
|
||||
int haoIndex = condoAddressText.lastIndexOf(HAO);
|
||||
String buildingAddressText = condoAddressText.substring(0, haoIndex + 1);
|
||||
BuildingAddress buildingAddress = new BuildingAddress(buildingAddressText);
|
||||
// 楼栋
|
||||
condoAddress.addPartialAddress(buildingAddress);
|
||||
cleanAddress.addAddress(condoAddress);
|
||||
getCommunityAddress(buildingAddressText);
|
||||
} else if (!StringUtils.isEmpty(cleanAddress.getCommunityName())) {
|
||||
|
||||
} else {
|
||||
// 没有路,没有小区
|
||||
}
|
||||
|
||||
// 从路找到最后面。
|
||||
// 把室号去掉
|
||||
// 把号去掉
|
||||
// 单套地址、楼栋地址、小区地址
|
||||
}
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 小区地址
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class CommunityAddress extends PartialAddress {
|
||||
|
||||
public CommunityAddress(String address) {
|
||||
super(address);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AddressType getAddressType() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean multiAddress() {
|
||||
return null;
|
||||
}
|
||||
}
|
@ -1,43 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.model;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* 单套地址
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class CondoAddress extends PartialAddress {
|
||||
|
||||
public CondoAddress(String address) {
|
||||
super(address);
|
||||
this.shi = parseShi();
|
||||
this.floor = parseFloor();
|
||||
this.hao = parseHAO();
|
||||
}
|
||||
|
||||
@Override
|
||||
public AddressType getAddressType() {
|
||||
return AddressType.CONDO;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean multiAddress() {
|
||||
return childrenAddress.size() > 0;
|
||||
}
|
||||
|
||||
public String getHao() {
|
||||
return hao;
|
||||
}
|
||||
|
||||
public String getShi() {
|
||||
return shi;
|
||||
}
|
||||
|
||||
public Integer getFloor() {
|
||||
return floor;
|
||||
}
|
||||
|
||||
}
|
@ -1,121 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.model;
|
||||
|
||||
import com.ruoyi.common.exception.CustomException;
|
||||
import com.ruoyi.common.utils.StringUtils;
|
||||
import io.swagger.models.auth.In;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* 清洗地址
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public abstract class PartialAddress {
|
||||
protected String address;
|
||||
protected String hao;
|
||||
protected String shi;
|
||||
protected Integer floor;
|
||||
|
||||
protected final int HUNDRED = 100;
|
||||
protected final int TEN_THOUSAND = 10 * 1000;
|
||||
protected final static String SHI_PATTERN = "([\\dA-Za-z]+)室$";
|
||||
protected final static String HAO_PATTERN = "([\\dA-Za-z]+)(甲乙丙丁戊己庚辛仍亏)?号";
|
||||
protected final static String NUMBER_PATTERN = "\\d+";
|
||||
|
||||
|
||||
public PartialAddress(String address) {
|
||||
this.address = address;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
public abstract AddressType getAddressType();
|
||||
|
||||
|
||||
/**
|
||||
* 获取地址(单套、楼栋、小区)
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
protected List<PartialAddress> childrenAddress = new LinkedList<>();
|
||||
|
||||
public abstract Boolean multiAddress();
|
||||
|
||||
/**
|
||||
* 室解析
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
protected String parseShi() {
|
||||
Pattern pattern = Pattern.compile(SHI_PATTERN);
|
||||
Matcher matcher = pattern.matcher(this.address);
|
||||
// 这个需要优化,地下层没有考虑
|
||||
if (matcher.find()) {
|
||||
return matcher.group(1);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 楼层解析
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
protected Integer parseFloor() {
|
||||
if (StringUtils.isEmpty(this.shi)) {
|
||||
return null;
|
||||
}
|
||||
Pattern pattern = Pattern.compile(NUMBER_PATTERN);
|
||||
Matcher matcher = pattern.matcher(this.shi);
|
||||
// 这个需要优化,地下层没有考虑
|
||||
if (matcher.find()) {
|
||||
Integer num = new Integer(matcher.group(0));
|
||||
if (num <= HUNDRED) {
|
||||
return num / 10;
|
||||
} else if (num > HUNDRED && num <= TEN_THOUSAND) {
|
||||
return num / 100;
|
||||
} else {
|
||||
throw new CustomException("室号太大");
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 号
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
protected String parseHAO() {
|
||||
Pattern pattern = Pattern.compile(HAO_PATTERN);
|
||||
Matcher matcher = pattern.matcher(this.address);
|
||||
// 这个需要优化,地下层没有考虑
|
||||
if (matcher.find()) {
|
||||
return matcher.group(1);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 增加地址
|
||||
*
|
||||
* @param partialAddress
|
||||
*/
|
||||
protected void addPartialAddress(PartialAddress partialAddress) {
|
||||
this.childrenAddress.add(partialAddress);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "PartialAddress{" +
|
||||
"hao='" + hao + '\'' +
|
||||
", shi='" + shi + '\'' +
|
||||
", floor=" + floor +
|
||||
'}';
|
||||
}
|
||||
}
|
@ -1,103 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.model;
|
||||
|
||||
/**
|
||||
* 地址分段
|
||||
*/
|
||||
public class SegmentAddress {
|
||||
/**
|
||||
* 弄、支弄
|
||||
*/
|
||||
private String nong;
|
||||
/**
|
||||
* 号
|
||||
*/
|
||||
private String hao;
|
||||
/**
|
||||
* 是否有多个号
|
||||
*/
|
||||
private Boolean multiHao;
|
||||
/**
|
||||
* 楼层(包含复式)
|
||||
*/
|
||||
private String floor;
|
||||
/**
|
||||
* 是否多楼层
|
||||
*/
|
||||
private Boolean multiFloor;
|
||||
/**
|
||||
* 栋、幢、座
|
||||
*/
|
||||
private String dong;
|
||||
/**
|
||||
* 室号
|
||||
*/
|
||||
private String shi;
|
||||
/**
|
||||
* 多个室号
|
||||
*/
|
||||
private Boolean multiShi;
|
||||
|
||||
public String getNong() {
|
||||
return nong;
|
||||
}
|
||||
|
||||
public void setNong(String nong) {
|
||||
this.nong = nong;
|
||||
}
|
||||
|
||||
public String getHao() {
|
||||
return hao;
|
||||
}
|
||||
|
||||
public void setHao(String hao) {
|
||||
this.hao = hao;
|
||||
}
|
||||
|
||||
public Boolean getMultiHao() {
|
||||
return multiHao;
|
||||
}
|
||||
|
||||
public void setMultiHao(Boolean multiHao) {
|
||||
this.multiHao = multiHao;
|
||||
}
|
||||
|
||||
public String getFloor() {
|
||||
return floor;
|
||||
}
|
||||
|
||||
public void setFloor(String floor) {
|
||||
this.floor = floor;
|
||||
}
|
||||
|
||||
public Boolean getMultiFloor() {
|
||||
return multiFloor;
|
||||
}
|
||||
|
||||
public void setMultiFloor(Boolean multiFloor) {
|
||||
this.multiFloor = multiFloor;
|
||||
}
|
||||
|
||||
public String getDong() {
|
||||
return dong;
|
||||
}
|
||||
|
||||
public void setDong(String dong) {
|
||||
this.dong = dong;
|
||||
}
|
||||
|
||||
public String getShi() {
|
||||
return shi;
|
||||
}
|
||||
|
||||
public void setShi(String shi) {
|
||||
this.shi = shi;
|
||||
}
|
||||
|
||||
public Boolean getMultiShi() {
|
||||
return multiShi;
|
||||
}
|
||||
|
||||
public void setMultiShi(Boolean multiShi) {
|
||||
this.multiShi = multiShi;
|
||||
}
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
package com.ruoyi.project.tool.address.parse;
|
||||
|
||||
import com.ruoyi.common.utils.LoadUtil;
|
||||
import com.ruoyi.project.tool.address.ParseContext;
|
||||
import com.ruoyi.project.tool.address.utils.ParseContext;
|
||||
import com.ruoyi.project.tool.address.model.RoadNode;
|
||||
|
||||
import java.util.List;
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.ruoyi.project.tool.address.parse;
|
||||
|
||||
import com.ruoyi.project.tool.address.ParseContext;
|
||||
import com.ruoyi.project.tool.address.utils.ParseContext;
|
||||
import com.ruoyi.project.tool.address.model.*;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
|
@ -1,10 +1,8 @@
|
||||
package com.ruoyi.project.tool.address.parse;
|
||||
|
||||
import com.ruoyi.project.tool.address.ParseContext;
|
||||
import com.ruoyi.project.tool.address.model.AdrNode;
|
||||
import com.ruoyi.project.tool.address.model.CengNode;
|
||||
import com.ruoyi.project.tool.address.model.HaoNode;
|
||||
import com.ruoyi.project.tool.address.model.ShiNode;
|
||||
import com.ruoyi.common.utils.StringUtils;
|
||||
import com.ruoyi.project.tool.address.model.*;
|
||||
import com.ruoyi.project.tool.address.utils.ParseContext;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
@ -21,8 +19,10 @@ import java.util.regex.Pattern;
|
||||
public class ZParse {
|
||||
private static final String FU_SHI = "复式";
|
||||
private static final String NUMBER_PATTERN = "\\d+";
|
||||
private static final String DEFAULT_SHI_PATTERN1 = "([\\dA-Za-z\\_\\-\\.\\——\\、,]+)(室?)$";
|
||||
private static final String DEFAULT_SHI_PATTERN = "([\\dA-Za-z\\-\\_\\.,]+)室";
|
||||
private static final String LIAN_SHI_PATTERN = "(\\d+)[、\\.\\-\\_](\\d)(室)?$";
|
||||
private static final String DEFAULT_SHI_PATTERN = "[\\dA-Za-z]+室";
|
||||
private static final String STRICT_SHI_PATTERN = DEFAULT_SHI_PATTERN + "$";
|
||||
private static final String MULTI_SHI_PATTERN = "[号](\\d+(室)?[、\\.\\-\\_,,]?)+(室)?$";
|
||||
private static final String CHINESE_FLOOR_PATTERN = "([一二三四五六七八九十]+)层";
|
||||
private static final String DEFAULT_FLOOR_PATTERN = "地下?([\\d一二三四五六七八九十])层";
|
||||
private static final String TAIL_FLOOR_PATTERN = "(\\d[\\-\\_\\——\\—]?\\d)(层)?$";
|
||||
@ -31,6 +31,7 @@ public class ZParse {
|
||||
private AdrNode rootNode;
|
||||
private String floor;
|
||||
private Integer startIndex;
|
||||
private Boolean isPark = Boolean.FALSE;
|
||||
|
||||
public ZParse(AdrNode adrNode, ParseContext context, Integer index) {
|
||||
this.context = context;
|
||||
@ -39,10 +40,10 @@ public class ZParse {
|
||||
}
|
||||
|
||||
public void parse() {
|
||||
tag();
|
||||
parseCeng();
|
||||
parseZhuang();
|
||||
parseShi();
|
||||
tag();
|
||||
}
|
||||
|
||||
private void parseZhuang() {
|
||||
@ -50,34 +51,109 @@ public class ZParse {
|
||||
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
||||
if (matcher.find()) {
|
||||
int index = this.context.getContent().indexOf(matcher.group(0), this.startIndex);
|
||||
HaoNode haoNode = new HaoNode(matcher.group(0), index, index + matcher.group(0).length());
|
||||
this.rootNode.addNode(haoNode);
|
||||
startIndex = haoNode.getEndIndex();
|
||||
if (-1 != index) {
|
||||
HaoNode haoNode = new HaoNode(matcher.group(), index, index + matcher.group().length());
|
||||
this.rootNode.addNode(haoNode);
|
||||
startIndex = haoNode.getEndIndex();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 复式
|
||||
*/
|
||||
private void parseFUSHI() {
|
||||
String content = this.context.getContent().substring(this.startIndex);
|
||||
Pattern shiPattern = Pattern.compile(NUMBER_PATTERN);
|
||||
Matcher matcher = shiPattern.matcher(content);
|
||||
if (matcher.find()) {
|
||||
int index = this.context.getContent().indexOf(matcher.group(0), this.startIndex);
|
||||
ShiNode shiNode = new ShiNode(matcher.group(0) + "室", index, index + matcher.group(0).length());
|
||||
this.rootNode.addNode(shiNode);
|
||||
startIndex = shiNode.getEndIndex();
|
||||
this.context.addFeature("复式");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 连室号
|
||||
* 连着的 102.3室
|
||||
*
|
||||
* @param shi1
|
||||
* @param shi2
|
||||
*/
|
||||
private void parseLianShi(String shi1, String shi2) {
|
||||
|
||||
int startIndex = this.context.getContent().indexOf(shi1, this.startIndex);
|
||||
ShiNode shiNode1 = new ShiNode(shi1 + "室", startIndex, startIndex + shi1.length());
|
||||
this.rootNode.addNode(shiNode1);
|
||||
startIndex = shiNode1.getEndIndex();
|
||||
|
||||
startIndex = this.context.getContent().indexOf(shi2, startIndex);
|
||||
ShiNode shiNode2 = new ShiNode(shi1.substring(0, shi1.length() - shi2.length()) + shi2 + "室", startIndex,
|
||||
startIndex + shi2.length());
|
||||
this.rootNode.addNode(shiNode2);
|
||||
}
|
||||
|
||||
/**
|
||||
* 多室号
|
||||
*
|
||||
* @param matcherText 室字符文本
|
||||
*/
|
||||
private void parseMultiShi(String matcherText) {
|
||||
Pattern numberPattern = Pattern.compile(NUMBER_PATTERN);
|
||||
Matcher matcher = numberPattern.matcher(matcherText);
|
||||
int startIndex = this.startIndex;
|
||||
while (matcher.find()) {
|
||||
int index = this.context.getContent().indexOf(matcher.group(), startIndex);
|
||||
if (-1 == index) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ShiNode shiNode = new ShiNode(matcher.group() + "室", index, index + matcher.group().length());
|
||||
this.rootNode.addNode(shiNode);
|
||||
startIndex = shiNode.getEndIndex();
|
||||
}
|
||||
}
|
||||
|
||||
private void parseShi() {
|
||||
String text = this.context.getContent().substring(this.startIndex);
|
||||
// 复式
|
||||
if (-1 != this.context.getContent().indexOf(FU_SHI)) {
|
||||
String content = this.context.getContent().substring(this.startIndex);
|
||||
Pattern shiPattern = Pattern.compile(NUMBER_PATTERN);
|
||||
Matcher matcher = shiPattern.matcher(content);
|
||||
if (matcher.find()) {
|
||||
int index = this.context.getContent().indexOf(matcher.group(0), this.startIndex);
|
||||
ShiNode shiNode = new ShiNode(matcher.group(0) + "室", index, index + matcher.group(0).length());
|
||||
this.rootNode.addNode(shiNode);
|
||||
startIndex = shiNode.getEndIndex();
|
||||
}
|
||||
} else {
|
||||
Pattern shiPattern = Pattern.compile(DEFAULT_SHI_PATTERN);
|
||||
Matcher matcher = shiPattern.matcher(this.context.getContent());
|
||||
if (matcher.find()) {
|
||||
int index = this.context.getContent().indexOf(matcher.group(0), this.startIndex);
|
||||
ShiNode shiNode = new ShiNode(matcher.group(0), index, index + matcher.group(0).length());
|
||||
this.rootNode.addNode(shiNode);
|
||||
startIndex = shiNode.getEndIndex();
|
||||
}
|
||||
if (-1 != text.indexOf(FU_SHI)) {
|
||||
parseFUSHI();
|
||||
return;
|
||||
}
|
||||
// 连室
|
||||
Pattern lianShiPattern = Pattern.compile(LIAN_SHI_PATTERN);
|
||||
Matcher matcher = lianShiPattern.matcher(text);
|
||||
if (matcher.find()) {
|
||||
parseLianShi(matcher.group(1), matcher.group(2));
|
||||
return;
|
||||
}
|
||||
// 多室
|
||||
Pattern shiPattern = Pattern.compile(MULTI_SHI_PATTERN);
|
||||
matcher = shiPattern.matcher(text);
|
||||
if (matcher.find()) {
|
||||
parseMultiShi(matcher.group());
|
||||
return;
|
||||
}
|
||||
// 单室
|
||||
String matcherText = null;
|
||||
if (this.isPark) {
|
||||
shiPattern = Pattern.compile(DEFAULT_SHI_PATTERN);
|
||||
} else {
|
||||
shiPattern = Pattern.compile(STRICT_SHI_PATTERN);
|
||||
}
|
||||
matcher = shiPattern.matcher(text);
|
||||
if (matcher.find()) {
|
||||
matcherText = matcher.group();
|
||||
}
|
||||
if (StringUtils.isNotEmpty(matcherText)) {
|
||||
int index = this.context.getContent().indexOf(matcherText, startIndex);
|
||||
ShiNode shiNode = new ShiNode(matcherText, index, index + matcherText.length());
|
||||
this.rootNode.addNode(shiNode);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void tag() {
|
||||
@ -85,15 +161,25 @@ public class ZParse {
|
||||
if (-1 != this.context.getContent().indexOf("车位") || -1 != this.context.getContent().indexOf("车库")
|
||||
|| -1 != this.context.getContent().indexOf("停车")) {
|
||||
this.context.addFeature("车位");
|
||||
this.isPark = Boolean.TRUE;
|
||||
}
|
||||
|
||||
if (-1 != this.context.getContent().indexOf("地块")) {
|
||||
this.context.addFeature("地块");
|
||||
}
|
||||
|
||||
if (-1 != this.context.getContent().indexOf("别墅")) {
|
||||
this.context.addFeature("别墅");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断这个层是否可取?
|
||||
*
|
||||
* @param floorIndex
|
||||
* @return
|
||||
*/
|
||||
private Boolean invalidCeng(int floorIndex) {
|
||||
// 判断这个层是否可取?
|
||||
Pattern shiPattern = Pattern.compile(DEFAULT_SHI_PATTERN);
|
||||
Matcher shiMatcher = shiPattern.matcher(this.context.getContent());
|
||||
if (shiMatcher.find()) {
|
||||
@ -136,9 +222,10 @@ public class ZParse {
|
||||
.replace("四", "4")
|
||||
.replace("五", "5");
|
||||
int index = this.context.getContent().indexOf(matcher.group(0), this.startIndex);
|
||||
if (invalidCeng(index)) {
|
||||
if (invalidCeng(index) || this.isPark) {
|
||||
return;
|
||||
}
|
||||
// 并且是否包含车位?如果有包含车位,忽略
|
||||
CengNode cengNode = new CengNode("-" + num, index, index + matcher.group(0).length());
|
||||
this.rootNode.addNode(cengNode);
|
||||
rootNode.addNode(cengNode);
|
||||
|
@ -1,81 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.service;
|
||||
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 地址构建者模式
|
||||
* @author lihe
|
||||
*/
|
||||
public interface AddressBuilder {
|
||||
|
||||
/**
|
||||
* 区域
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
String parseDistrict();
|
||||
|
||||
/**
|
||||
* 镇
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
String parseRegion();
|
||||
|
||||
/**
|
||||
* 板块
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
String parseBlock();
|
||||
|
||||
/**
|
||||
* 路
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
List<String> parseRoad();
|
||||
|
||||
/**
|
||||
* 小区名称
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
String parseCommunityName();
|
||||
|
||||
/**
|
||||
* 弄
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
String parseNONG();
|
||||
|
||||
/**
|
||||
* “号”、幢、座
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
List<String> parseHao();
|
||||
|
||||
/**
|
||||
* 室
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
String parseShi();
|
||||
|
||||
/**
|
||||
* 楼层(地下)
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
String parseFloor();
|
||||
|
||||
/**
|
||||
* 是否独栋
|
||||
* @return
|
||||
*/
|
||||
Boolean parseIndependent();
|
||||
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.service;
|
||||
|
||||
import com.ruoyi.project.tool.address.model.CleanAddress;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 地址清洗
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class AddressCleanUtils {
|
||||
private AddressBuilder builder;
|
||||
|
||||
private static List<String> specialChar = new LinkedList<>();
|
||||
|
||||
static {
|
||||
specialChar.add("。");
|
||||
specialChar.add(".");
|
||||
specialChar.add(",");
|
||||
specialChar.add(",");
|
||||
specialChar.add("-");
|
||||
specialChar.add("——");
|
||||
specialChar.add("_");
|
||||
specialChar.add("、");
|
||||
specialChar.add("(");
|
||||
specialChar.add(")");
|
||||
specialChar.add("(");
|
||||
specialChar.add(")");
|
||||
specialChar.add("《");
|
||||
specialChar.add("》");
|
||||
}
|
||||
|
||||
/**
|
||||
* @param text
|
||||
* @return
|
||||
*/
|
||||
public List<CleanAddress> clear(String text) {
|
||||
String todoAddress = text.trim()
|
||||
.replace("\t", "")
|
||||
.replace(" ", "");
|
||||
|
||||
for (int i = 0; i < specialChar.size(); i++) {
|
||||
if (todoAddress.contains(specialChar.get(i))) {
|
||||
builder = new DefaultAddressBuilder(todoAddress);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.service;
|
||||
|
||||
import com.ruoyi.project.tool.address.model.CleanAddress;
|
||||
|
||||
/**
|
||||
* 地址处理方法
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public interface AddressHandler {
|
||||
|
||||
/**
|
||||
* 地址清洗
|
||||
*
|
||||
* @param cleanAddress
|
||||
*/
|
||||
void clear(CleanAddress cleanAddress);
|
||||
|
||||
}
|
@ -1,45 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.service;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
public abstract class AddressModel {
|
||||
private String district;
|
||||
private String region;
|
||||
private String block;
|
||||
private String road;
|
||||
private String communityName;
|
||||
private String nong;
|
||||
private String complexHao;
|
||||
/**
|
||||
* 泛化的号(号、幢、座、连号)
|
||||
*/
|
||||
private String hao;
|
||||
/**
|
||||
* 是否有多个号
|
||||
* x号y号=x弄y号
|
||||
* x号y幢=x号
|
||||
* x幢y号=y号
|
||||
* x幢y号=x幢
|
||||
*/
|
||||
private Boolean multiHao;
|
||||
/**
|
||||
* 是否包含楼层
|
||||
* 复式、多层
|
||||
*/
|
||||
private String floor;
|
||||
private String shi;
|
||||
/**
|
||||
* 是否独栋
|
||||
*/
|
||||
private Boolean dependency;
|
||||
/**
|
||||
* 层级(每个层级只有一个,一对一)
|
||||
*/
|
||||
private AddressModel hierarchy;
|
||||
// /**
|
||||
// * 兄弟节点
|
||||
// */
|
||||
// private List<AddressModel> sibling = new LinkedList<>();
|
||||
|
||||
}
|
@ -1,192 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.service;
|
||||
|
||||
import com.ruoyi.common.utils.LoadUtil;
|
||||
import com.ruoyi.project.tool.address.model.AddressType;
|
||||
import com.ruoyi.project.tool.address.model.CleanAddress;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* 地址构建基类
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public abstract class BaseAddressBuilder implements AddressBuilder {
|
||||
|
||||
private Map<String, String> districtMap = LoadUtil.loadDict("address-dict/district.dict");
|
||||
private List<String> regionList = LoadUtil.loadList("address-dict/region.dict");
|
||||
private List<String> blockList = LoadUtil.loadList("address-dict/block.dict");
|
||||
private List<String> roadList = LoadUtil.loadList("address-dict/road.dict");
|
||||
private static final int DISTRICT_LENGTH = 8;
|
||||
private static final String SHANGHAI_SHI = "上海市";
|
||||
private static final String SHANGHAI = "上海";
|
||||
private static final String SHANG_SHANG = "上上";
|
||||
private static final String SHANG = "上";
|
||||
private static final String HAO = "号";
|
||||
|
||||
private static final String CHINESE_FLOOR_PATTERN = "([一二三四五六七八九十]+)层";
|
||||
private static final String NUMBER_FLOOR_PATTERN = "(\\d+)层";
|
||||
private static final String DEFAULT_HAO_PATTERN = "([\\dA-Za-z]+)号";
|
||||
private static final String DEFAULT_SHI_PATTERN = "([\\dA-Za-z]+)(室?)$";
|
||||
private static final String DEFAULT_NONG_PATTERN = "[\\d一二三四五六七八九十]+弄([\\d一二三四五六七八九十]+支弄)?";
|
||||
protected CleanContext context;
|
||||
|
||||
public BaseAddressBuilder(String text) {
|
||||
this.context = new CleanContext(text);
|
||||
}
|
||||
|
||||
/**
|
||||
* 地址类别(单套、楼栋、小区)
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public abstract AddressType getAddressType();
|
||||
|
||||
|
||||
@Override
|
||||
public String parseDistrict() {
|
||||
String shanghaiAndDistrict = null;
|
||||
if (this.context.getBoundAddress().length() >= DISTRICT_LENGTH) {
|
||||
shanghaiAndDistrict = (SHANG + this.context.getBoundAddress().substring(0, 7)).replace(SHANG_SHANG, SHANG);
|
||||
} else {
|
||||
shanghaiAndDistrict = (SHANG + this.context.getBoundAddress()).replace(SHANG_SHANG, SHANG);
|
||||
}
|
||||
|
||||
if (!shanghaiAndDistrict.startsWith(SHANGHAI_SHI) && !shanghaiAndDistrict.startsWith(SHANGHAI) && shanghaiAndDistrict.startsWith(SHANG)) {
|
||||
shanghaiAndDistrict = shanghaiAndDistrict.substring(1, shanghaiAndDistrict.length() - 1);
|
||||
}
|
||||
|
||||
for (Map.Entry<String, String> district : districtMap.entrySet()) {
|
||||
if (shanghaiAndDistrict.startsWith(district.getKey())) {
|
||||
return district.getValue();
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String parseRegion() {
|
||||
for (int i = 0; i < regionList.size(); i++) {
|
||||
if (-1 != this.context.getBoundAddress().indexOf(regionList.get(i))) {
|
||||
return regionList.get(i);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String parseBlock() {
|
||||
for (int i = 0; i < blockList.size(); i++) {
|
||||
if (-1 != this.context.getBoundAddress().indexOf(blockList.get(i))) {
|
||||
return regionList.get(i);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> parseRoad() {
|
||||
List<String> roadSegmentList = new LinkedList<>();
|
||||
|
||||
for (int i = 0; i < roadList.size(); i++) {
|
||||
if (-1 != this.context.getBoundAddress().indexOf(roadList.get(i))) {
|
||||
roadSegmentList.add(roadList.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
if (0 != roadSegmentList.size()) {
|
||||
return roadSegmentList;
|
||||
}
|
||||
for (int i = 0; i < roadList.size(); i++) {
|
||||
// 砍掉“路”
|
||||
String noRoad = roadList.get(i);
|
||||
if (noRoad.endsWith("路")) {
|
||||
noRoad = noRoad.substring(0, noRoad.length() - 1);
|
||||
}
|
||||
|
||||
if (-1 != this.context.getBoundAddress().indexOf(noRoad)) {
|
||||
roadSegmentList.add(noRoad);
|
||||
}
|
||||
}
|
||||
|
||||
return roadSegmentList;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String parseCommunityName() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String parseNONG() {
|
||||
Pattern shiPattern = Pattern.compile(DEFAULT_NONG_PATTERN);
|
||||
Matcher matcher = shiPattern.matcher(this.context.getBoundAddress());
|
||||
if (matcher.find()) {
|
||||
return matcher.group(0);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> parseHao() {
|
||||
List<String> haoSegmentList = new LinkedList<>();
|
||||
Pattern shiPattern = Pattern.compile(DEFAULT_HAO_PATTERN);
|
||||
Matcher matcher = shiPattern.matcher(this.context.getBoundAddress());
|
||||
while (matcher.find()) {
|
||||
haoSegmentList.add(matcher.group());
|
||||
}
|
||||
if (0 != haoSegmentList.size()) {
|
||||
return haoSegmentList;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String parseShi() {
|
||||
Pattern shiPattern = Pattern.compile(DEFAULT_SHI_PATTERN);
|
||||
Matcher matcher = shiPattern.matcher(this.context.getBoundAddress());
|
||||
if (matcher.find()) {
|
||||
return matcher.group(1);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String parseFloor() {
|
||||
Pattern pattern = Pattern.compile(CHINESE_FLOOR_PATTERN);
|
||||
Matcher matcher = pattern.matcher(this.context.getBoundAddress());
|
||||
if (matcher.find()) {
|
||||
return matcher.group(1);
|
||||
}
|
||||
|
||||
pattern = Pattern.compile(NUMBER_FLOOR_PATTERN);
|
||||
matcher = pattern.matcher(this.context.getBoundAddress());
|
||||
if (matcher.find()) {
|
||||
return matcher.group(1);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean parseIndependent() {
|
||||
Pattern shiPattern = Pattern.compile(DEFAULT_SHI_PATTERN);
|
||||
Matcher matcher = shiPattern.matcher(this.context.getBoundAddress());
|
||||
if (matcher.find()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public List<CleanAddress> getResult() {
|
||||
return this.context.getResult();
|
||||
}
|
||||
}
|
@ -1,53 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.service;
|
||||
|
||||
import com.ruoyi.project.tool.address.model.CleanAddress;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 地址清洗
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class CleanContext {
|
||||
private StringBuilder boundAddress;
|
||||
private Boolean multiHao;
|
||||
private String address;
|
||||
private List<CleanAddress> result;
|
||||
|
||||
public CleanContext(String text) {
|
||||
this.boundAddress = new StringBuilder(text);
|
||||
}
|
||||
|
||||
public StringBuilder getBoundAddress() {
|
||||
return boundAddress;
|
||||
}
|
||||
|
||||
public void setBoundAddress(StringBuilder boundAddress) {
|
||||
this.boundAddress = boundAddress;
|
||||
}
|
||||
|
||||
public Boolean getMultiHao() {
|
||||
return multiHao;
|
||||
}
|
||||
|
||||
public void setMultiHao(Boolean multiHao) {
|
||||
this.multiHao = multiHao;
|
||||
}
|
||||
|
||||
public String getAddress() {
|
||||
return address;
|
||||
}
|
||||
|
||||
public void setAddress(String address) {
|
||||
this.address = address;
|
||||
}
|
||||
|
||||
public List<CleanAddress> getResult() {
|
||||
return result;
|
||||
}
|
||||
|
||||
public void setResult(List<CleanAddress> result) {
|
||||
this.result = result;
|
||||
}
|
||||
}
|
@ -1,22 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.service;
|
||||
|
||||
import com.ruoyi.project.tool.address.model.AddressType;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 地址构建基类
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class DefaultAddressBuilder extends BaseAddressBuilder {
|
||||
|
||||
public DefaultAddressBuilder(String text) {
|
||||
super(text);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AddressType getAddressType() {
|
||||
return AddressType.CONDO;
|
||||
}
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.service.impl;
|
||||
|
||||
import com.ruoyi.project.tool.address.model.CleanAddress;
|
||||
import com.ruoyi.project.tool.address.model.CleanAddressBuilder;
|
||||
import com.ruoyi.project.tool.address.service.AddressHandler;
|
||||
|
||||
/**
|
||||
* 没有特殊符号的地址清洗
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class NoSignalAddressHandler implements AddressHandler {
|
||||
|
||||
@Override
|
||||
public void clear(CleanAddress cleanAddress) {
|
||||
CleanAddressBuilder.builder(cleanAddress)
|
||||
.parseDistrict()
|
||||
.parseRegion()
|
||||
.parseBlock()
|
||||
.parseRoad()
|
||||
.parseIndependent()
|
||||
.parseAddress();
|
||||
}
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.service.impl;
|
||||
|
||||
import com.ruoyi.project.tool.address.model.CleanAddress;
|
||||
import com.ruoyi.project.tool.address.service.AddressHandler;
|
||||
|
||||
/**
|
||||
* 带有字符的地址
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class SignalAddressHandler implements AddressHandler {
|
||||
|
||||
|
||||
@Override
|
||||
public void clear(CleanAddress cleanAddress) {
|
||||
|
||||
}
|
||||
}
|
@ -1,255 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.utils;
|
||||
|
||||
import com.ruoyi.common.utils.LoadUtil;
|
||||
import com.ruoyi.project.tool.address.AddressContext;
|
||||
import com.ruoyi.project.tool.address.AddressNode;
|
||||
import com.ruoyi.project.tool.address.AddressNodeType;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* 地址段分析
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class AddressNodeParse {
|
||||
|
||||
private Map<String, String> districtMap = LoadUtil.loadDict("address-dict/district.dict");
|
||||
private List<String> regionList = LoadUtil.loadList("address-dict/region.dict");
|
||||
private List<String> blockList = LoadUtil.loadList("address-dict/block.dict");
|
||||
private List<String> roadList = LoadUtil.loadList("address-dict/road.dict");
|
||||
private static final int DISTRICT_LENGTH = 8;
|
||||
private static final String SHANGHAI_SHI = "上海市";
|
||||
private static final String SHANGHAI = "上海";
|
||||
private static final String SHANG = "上";
|
||||
private static final String HAO = "号";
|
||||
private static final String SHANG_SHANG = "上上";
|
||||
private static final String DEFAULT_FLOOR_PATTERN = "([\\d一二三四五六七八九十]+)层";
|
||||
private static final String NUMBER_FLOOR_PATTERN = "(\\d+)层";
|
||||
private static final String DEFAULT_HAO_PATTERN = "([\\dA-Za-z]+)号";
|
||||
private static final String DEFAULT_ZHUANG_PATTERN = "([\\dA-Za-z]+)幢";
|
||||
private static final String DEFAULT_CENG_PATTERN = "([\\d])层";
|
||||
private static final String DEFAULT_SHI_PATTERN = "([\\dA-Za-z]+)(室?)$";
|
||||
private static final String DEFAULT_NONG_PATTERN = "[\\d一二三四五六七八九十]+弄([\\d一二三四五六七八九十]+支弄)?";
|
||||
private static List<String> specialChar = new LinkedList<>();
|
||||
private AddressContext context;
|
||||
|
||||
static {
|
||||
specialChar.add("。");
|
||||
specialChar.add(".");
|
||||
specialChar.add(",");
|
||||
specialChar.add(",");
|
||||
specialChar.add("-");
|
||||
specialChar.add("——");
|
||||
specialChar.add("_");
|
||||
specialChar.add("、");
|
||||
specialChar.add("(");
|
||||
specialChar.add(")");
|
||||
specialChar.add("(");
|
||||
specialChar.add(")");
|
||||
specialChar.add("《");
|
||||
specialChar.add("》");
|
||||
}
|
||||
|
||||
public AddressNodeParse(AddressContext addressContext) {
|
||||
this.context = addressContext;
|
||||
}
|
||||
|
||||
/**
|
||||
* 区域
|
||||
*/
|
||||
public void parseDistrict() {
|
||||
String shanghaiAndDistrict = null;
|
||||
if (this.context.getContent().length() >= DISTRICT_LENGTH) {
|
||||
shanghaiAndDistrict = (SHANG + this.context.getContent().substring(0, 7)).replace(SHANG_SHANG, SHANG);
|
||||
} else {
|
||||
shanghaiAndDistrict = (SHANG + this.context.getContent()).replace(SHANG_SHANG, SHANG);
|
||||
}
|
||||
|
||||
if (!shanghaiAndDistrict.startsWith(SHANGHAI_SHI) && !shanghaiAndDistrict.startsWith(SHANGHAI) && shanghaiAndDistrict.startsWith(SHANG)) {
|
||||
shanghaiAndDistrict = shanghaiAndDistrict.substring(1, shanghaiAndDistrict.length() - 1);
|
||||
}
|
||||
|
||||
for (Map.Entry<String, String> district : districtMap.entrySet()) {
|
||||
int index = shanghaiAndDistrict.indexOf(district.getKey(), this.context.getStartIndex());
|
||||
if (-1 != index) {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.DISTRICT, district.getValue());
|
||||
addressNode.setStartIndex(index);
|
||||
addressNode.setEndIndex(index + district.getKey().length() + 1);
|
||||
|
||||
this.context.addAddressNode(addressNode);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 镇
|
||||
*/
|
||||
public void parseRegion() {
|
||||
for (int i = 0; i < regionList.size(); i++) {
|
||||
int index = this.context.getContent().indexOf(regionList.get(i), this.context.getStartIndex());
|
||||
if (-1 != index) {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.REGION, regionList.get(i));
|
||||
addressNode.setStartIndex(index);
|
||||
addressNode.setEndIndex(index + regionList.get(i).length() + 1);
|
||||
|
||||
this.context.addAddressNode(addressNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 板块
|
||||
*/
|
||||
public void parseBlock() {
|
||||
for (int i = 0; i < blockList.size(); i++) {
|
||||
int index = this.context.getContent().indexOf(blockList.get(i));
|
||||
if (-1 != index) {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.BLOCK, blockList.get(i));
|
||||
addressNode.setStartIndex(index);
|
||||
addressNode.setEndIndex(index + blockList.get(i).length() + 1);
|
||||
|
||||
this.context.addAddressNode(addressNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 路
|
||||
*/
|
||||
public void parseRoad() {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.ROAD);
|
||||
for (int i = 0; i < roadList.size(); i++) {
|
||||
int index = this.context.getContent().indexOf(roadList.get(i), this.context.getStartIndex());
|
||||
if (-1 != index) {
|
||||
if (null == addressNode.getStartIndex()) {
|
||||
addressNode.setStartIndex(index);
|
||||
}
|
||||
AddressNode childAddressNode = new AddressNode(AddressNodeType.ROAD, roadList.get(i));
|
||||
childAddressNode.setStartIndex(index);
|
||||
childAddressNode.setEndIndex(index + roadList.get(i).length() + 1);
|
||||
addressNode.setEndIndex(index + roadList.get(i).length() + 1);
|
||||
addressNode.addChildNode(childAddressNode);
|
||||
}
|
||||
}
|
||||
|
||||
if (null != addressNode.getStartIndex()) {
|
||||
this.context.addAddressNode(addressNode);
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = 0; i < roadList.size(); i++) {
|
||||
// 砍掉“路”
|
||||
String noRoad = roadList.get(i);
|
||||
if (noRoad.endsWith("路")) {
|
||||
noRoad = noRoad.substring(0, noRoad.length() - 1);
|
||||
}
|
||||
int index = this.context.getContent().indexOf(noRoad);
|
||||
if (-1 != index) {
|
||||
if (null == addressNode.getStartIndex()) {
|
||||
addressNode.setStartIndex(index);
|
||||
}
|
||||
AddressNode childAddressNode = new AddressNode(AddressNodeType.ROAD, noRoad);
|
||||
childAddressNode.setStartIndex(index);
|
||||
childAddressNode.setEndIndex(index + roadList.get(i).length() + 1);
|
||||
addressNode.setEndIndex(index + roadList.get(i).length() + 1);
|
||||
addressNode.addChildNode(childAddressNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 弄
|
||||
*/
|
||||
public void parseNONG() {
|
||||
Pattern nongPattern = Pattern.compile(DEFAULT_NONG_PATTERN);
|
||||
Matcher matcher = nongPattern.matcher(this.context.getContent());
|
||||
if (matcher.find()) {
|
||||
int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
|
||||
if (-1 != index) {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.NONG, matcher.group(0));
|
||||
addressNode.setStartIndex(index);
|
||||
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
||||
addressNode.addChildNode(addressNode);
|
||||
this.context.addAddressNode(addressNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 号
|
||||
*/
|
||||
public void parseHao() {
|
||||
Pattern haoPattern = Pattern.compile(DEFAULT_HAO_PATTERN);
|
||||
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
||||
if (matcher.find()) {
|
||||
int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
|
||||
if (-1 != index) {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.HAO, matcher.group(0));
|
||||
addressNode.setStartIndex(index);
|
||||
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
||||
addressNode.addChildNode(addressNode);
|
||||
this.context.addAddressNode(addressNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 幢
|
||||
*/
|
||||
public void parseZhuang() {
|
||||
Pattern haoPattern = Pattern.compile(DEFAULT_ZHUANG_PATTERN);
|
||||
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
||||
if (matcher.find()) {
|
||||
int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
|
||||
if (-1 != index) {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.ZHUANG, matcher.group(0));
|
||||
addressNode.setStartIndex(index);
|
||||
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
||||
addressNode.addChildNode(addressNode);
|
||||
this.context.addAddressNode(addressNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 层
|
||||
*/
|
||||
public void parseCeng() {
|
||||
Pattern haoPattern = Pattern.compile(DEFAULT_FLOOR_PATTERN);
|
||||
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
||||
if (matcher.find()) {
|
||||
int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
|
||||
if (-1 != index) {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.CENG, matcher.group(0));
|
||||
addressNode.setStartIndex(index);
|
||||
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
||||
addressNode.addChildNode(addressNode);
|
||||
this.context.addAddressNode(addressNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 室
|
||||
*/
|
||||
public void parseShi() {
|
||||
Pattern haoPattern = Pattern.compile(DEFAULT_SHI_PATTERN);
|
||||
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
||||
if (matcher.find()) {
|
||||
int index = this.context.getContent().indexOf(matcher.group(0),this.context.getStartIndex());
|
||||
if (-1 != index) {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.SHI, matcher.group(0));
|
||||
addressNode.setStartIndex(index);
|
||||
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
||||
addressNode.addChildNode(addressNode);
|
||||
this.context.addAddressNode(addressNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
package com.ruoyi.project.tool.address;
|
||||
package com.ruoyi.project.tool.address.utils;
|
||||
|
||||
import com.ruoyi.project.tool.address.AddressResult;
|
||||
import com.ruoyi.project.tool.address.parse.XParse;
|
||||
|
||||
/**
|
@ -1,36 +0,0 @@
|
||||
package com.ruoyi.project.tool.address.utils;
|
||||
|
||||
import com.ruoyi.project.tool.address.AddressBuilder;
|
||||
import com.ruoyi.project.tool.address.AddressContext;
|
||||
import com.ruoyi.project.tool.address.AddressNode;
|
||||
import com.ruoyi.project.tool.address.StandardAddress;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 默认地址构建
|
||||
*
|
||||
* @author lihe
|
||||
*/
|
||||
public class DefaultAddressBuilder implements AddressBuilder {
|
||||
private List<AddressNode> addressNodeList;
|
||||
private AddressContext addressContext;
|
||||
|
||||
@Override
|
||||
public StandardAddress clear(String text) {
|
||||
this.addressContext = new AddressContext(text);
|
||||
|
||||
AddressNodeParse addressNodeParse = new AddressNodeParse(this.addressContext);
|
||||
// 找到区域
|
||||
addressNodeParse.parseDistrict();
|
||||
addressNodeParse.parseRegion();
|
||||
addressNodeParse.parseBlock();
|
||||
addressNodeParse.parseRoad();
|
||||
addressNodeParse.parseNONG();
|
||||
addressNodeParse.parseHao();
|
||||
addressNodeParse.parseCeng();
|
||||
addressNodeParse.parseShi();
|
||||
|
||||
return addressContext.getResult();
|
||||
}
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package com.ruoyi.project.tool.address;
|
||||
package com.ruoyi.project.tool.address.utils;
|
||||
|
||||
import com.ruoyi.project.tool.address.model.AdrNode;
|
||||
|
@ -2,57 +2,25 @@ package com.ruoyi;
|
||||
|
||||
import com.ruoyi.common.utils.LoadUtil;
|
||||
import com.ruoyi.project.tool.address.*;
|
||||
import com.ruoyi.project.tool.address.model.CleanAddress;
|
||||
import com.ruoyi.project.tool.address.model.LianHaoNode;
|
||||
import com.ruoyi.project.tool.address.parse.ZParse;
|
||||
import com.ruoyi.project.tool.address.utils.DefaultAddressBuilder;
|
||||
import com.ruoyi.project.tool.address.utils.AddressUtil;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.Base64;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class CleanUtilTests {
|
||||
public class AddressUtilTests {
|
||||
|
||||
/**
|
||||
* 地址清洗测试
|
||||
*/
|
||||
@Test
|
||||
public void printStreet() {
|
||||
List<String> textList = LoadUtil.loadList("jie.dict");
|
||||
textList.forEach(item -> {
|
||||
if (!item.contains("路") && item.contains("街") && !item.contains("街坊")) {
|
||||
System.out.println(item);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void printNoRoad() {
|
||||
public void testClear() {
|
||||
List<String> textList = LoadUtil.loadList("icbc.dict");
|
||||
textList.forEach(item -> {
|
||||
if (!item.contains("路")) {
|
||||
System.out.println(item);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testSort() {
|
||||
List<String> textList = LoadUtil.loadList("temp.dict");
|
||||
textList.sort((x, y) -> y.length() - x.length());
|
||||
textList.forEach(item -> {
|
||||
System.out.println(item);
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClear3() {
|
||||
List<String> textList = LoadUtil.loadList("test.dict");
|
||||
textList.forEach(item -> {
|
||||
AddressResult addressResult = AddressUtil.clear(item);
|
||||
if (null == addressResult.getCleanAddress()) {
|
||||
@ -64,6 +32,7 @@ public class CleanUtilTests {
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 中间连号
|
||||
*/
|
||||
@ -78,6 +47,21 @@ public class CleanUtilTests {
|
||||
Assert.assertEquals(addressResult.getDistrict(), "浦东");
|
||||
}
|
||||
|
||||
/**
|
||||
* 幢号室
|
||||
*/
|
||||
@Test
|
||||
public void testZhuangHaoShi() {
|
||||
AddressResult addressResult = AddressUtil.clear("奉贤区南桥镇江海新村(B)89幢578号402室");
|
||||
if (null == addressResult.getCleanAddress()) {
|
||||
return;
|
||||
}
|
||||
|
||||
Assert.assertEquals(addressResult.getCleanAddress().get(0), "江海新村578号402室");
|
||||
Assert.assertEquals(addressResult.getDistrict(), "奉贤");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 尾部连号
|
||||
*/
|
||||
@ -107,19 +91,6 @@ public class CleanUtilTests {
|
||||
Assert.assertEquals(addressResult.getDistrict(), "松江");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWhileMatch() {
|
||||
Pattern numberPattern = Pattern.compile("\\d+");
|
||||
Matcher matcher = numberPattern.matcher("24、25号");
|
||||
|
||||
while (matcher.find()) {
|
||||
|
||||
System.out.println(matcher.group());
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 室、地下
|
||||
*/
|
||||
@ -129,48 +100,28 @@ public class CleanUtilTests {
|
||||
if (null == addressResult.getCleanAddress()) {
|
||||
return;
|
||||
}
|
||||
addressResult.getCleanAddress().forEach(adr -> {
|
||||
System.out.println(String.format("%s\t%s\t", addressResult.getDistrict(), adr));
|
||||
});
|
||||
Assert.assertEquals("北华路168弄35号1002室", addressResult.getCleanAddress().get(0));
|
||||
Assert.assertEquals(addressResult.getDistrict(), "闵行");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 层结尾
|
||||
*/
|
||||
@Test
|
||||
public void testClear2() {
|
||||
//
|
||||
List<String> textList = LoadUtil.loadList("icbc.dict");
|
||||
|
||||
textList.forEach(item -> {
|
||||
DefaultAddressBuilder defaultAddressBuilder = new DefaultAddressBuilder();
|
||||
StandardAddress standardAddress = defaultAddressBuilder.clear(item);
|
||||
List<AddressContent> list = standardAddress.getResult();
|
||||
list.forEach(x -> {
|
||||
System.out.println(x);
|
||||
});
|
||||
//
|
||||
// CleanAddress cleanAddress = CleanUtil.clear(item);
|
||||
// System.out.println(cleanAddress);
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClear() {
|
||||
List<String> textList = LoadUtil.loadList("icbc.dict");
|
||||
|
||||
textList.forEach(item -> {
|
||||
CleanAddress cleanAddress = CleanUtil.clear(item);
|
||||
System.out.println(cleanAddress);
|
||||
});
|
||||
|
||||
|
||||
// Assert.assertEquals(cleanAddress.getDistrict(), "");
|
||||
// Assert.assertEquals(cleanAddress.getDistrict(), "");
|
||||
// Assert.assertEquals(cleanAddress.getDistrict(), "");
|
||||
// Assert.assertEquals(cleanAddress.getDistrict(), "");
|
||||
// Assert.assertEquals(cleanAddress.getDistrict(), "");
|
||||
// Assert.assertEquals(cleanAddress.getDistrict(), "");
|
||||
public void testTailCeng() {
|
||||
AddressResult addressResult = AddressUtil.clear("浦东新区成山路1488弄119号1-4层");
|
||||
if (null == addressResult.getCleanAddress()) {
|
||||
return;
|
||||
}
|
||||
Assert.assertEquals("成山路1488弄119号", addressResult.getCleanAddress().get(0));
|
||||
Assert.assertEquals(addressResult.getDistrict(), "浦东");
|
||||
}
|
||||
|
||||
/**
|
||||
* base64保存
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
@Test
|
||||
public void testBase64() throws IOException {
|
||||
Base64.Decoder decoder = Base64.getDecoder();
|
||||
@ -180,6 +131,47 @@ public class CleanUtilTests {
|
||||
out.write(buffer);
|
||||
out.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWhileMatch() {
|
||||
Pattern numberPattern = Pattern.compile("\\d+");
|
||||
Matcher matcher = numberPattern.matcher("24、25号");
|
||||
|
||||
while (matcher.find()) {
|
||||
System.out.println(matcher.group());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void printStreet() {
|
||||
List<String> textList = LoadUtil.loadList("jie.dict");
|
||||
textList.forEach(item -> {
|
||||
if (!item.contains("路") && item.contains("街") && !item.contains("街坊")) {
|
||||
System.out.println(item);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void printNoRoad() {
|
||||
List<String> textList = LoadUtil.loadList("icbc.dict");
|
||||
textList.forEach(item -> {
|
||||
if (!item.contains("路")) {
|
||||
System.out.println(item);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSort() {
|
||||
List<String> textList = LoadUtil.loadList("temp.dict");
|
||||
textList.sort((x, y) -> y.length() - x.length());
|
||||
textList.forEach(item -> {
|
||||
System.out.println(item);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user