feature(地址拆分):地址清洗开发

1. 常规地址
2. 连号(结尾、中间)
3. 多号
4. 包含小区名称和路名
5. 调整一些类型的地址
This commit is contained in:
purple 2020-08-06 18:31:22 +08:00
parent 0bdfcc8c75
commit 226041bc05
31 changed files with 205 additions and 1930 deletions

View File

@ -1,16 +0,0 @@
package com.ruoyi.project.tool.address;
/**
* 地址构建
*
* @author lihe
*/
public interface AddressBuilder {
/**
* 清洗
* @param text
* @return
*/
StandardAddress clear(String text);
}

View File

@ -1,38 +0,0 @@
package com.ruoyi.project.tool.address;
import com.ruoyi.project.tool.address.model.AddressType;
/**
* 地址
*
* @author lihe
*/
public class AddressContent {
private AddressType addressType;
private StringBuilder sb;
public AddressContent(AddressType addressType) {
this.addressType = addressType;
sb = new StringBuilder();
}
public AddressType getAddressType() {
return addressType;
}
public void appendContent(String addressNodeContent) {
sb.append(addressNodeContent);
}
public String getResult() {
return sb.toString();
}
@Override
public String toString() {
return "AddressContent{" +
"addressType=" + addressType +
", sb=" + sb +
'}';
}
}

View File

@ -1,59 +0,0 @@
package com.ruoyi.project.tool.address;
import com.ruoyi.project.tool.address.model.AddressType;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
/**
* context
*
* @author lihe
*/
public class AddressContext {
/**
* 有序列表
* 优化成队列FIFO
*/
private ArrayList<AddressNode> nodeList = new ArrayList<>();
private StringBuilder stringBuilder;
private Integer startIndex = 0;
public AddressContext(String text) {
this.stringBuilder = new StringBuilder(text);
}
public StringBuilder getContent() {
return this.stringBuilder;
}
public void addAddressNode(AddressNode node) {
startIndex = node.getEndIndex() - 1;
this.nodeList.add(node);
}
public Integer getStartIndex() {
return startIndex;
}
public StandardAddress getResult() {
StandardAddress standardAddress = new StandardAddress(this.stringBuilder.toString());
nodeList.forEach(node -> {
AddressContent addressContent = new AddressContent(AddressType.CONDO);
if (node.dataNode()) {
addressContent.appendContent(node.getContent());
} else {
if (1 == node.getContentList().size()) {
addressContent.appendContent(node.getContentList().get(0).getContent());
} else {
// 多个先组合再拆分
}
}
standardAddress.addAddressContent(addressContent);
});
return standardAddress;
}
}

View File

@ -1,80 +0,0 @@
package com.ruoyi.project.tool.address;
import com.ruoyi.common.utils.StringUtils;
import java.util.AbstractList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
/**
* 地址节点
*
* @author lihe
*/
public class AddressNode {
private AddressNodeType nodeType;
private Integer startIndex;
private Integer endIndex;
private String content;
private List<AddressNode> contentList = new LinkedList<>();
public AddressNode(AddressNodeType addressNodeType) {
this.nodeType = addressNodeType;
}
public AddressNode(AddressNodeType addressNodeType, String content) {
this.nodeType = addressNodeType;
this.content = content;
}
public AddressNodeType getNodeType() {
return nodeType;
}
public void setNodeType(AddressNodeType nodeType) {
this.nodeType = nodeType;
}
public Integer getStartIndex() {
return startIndex;
}
public void setStartIndex(Integer startIndex) {
this.startIndex = startIndex;
}
public Integer getEndIndex() {
return endIndex;
}
public void setEndIndex(Integer endIndex) {
this.endIndex = endIndex;
}
public String getContent() {
return content;
}
/**
* 数据节点
*
* @return
*/
public Boolean dataNode() {
return StringUtils.isNotEmpty(this.content);
}
public void setContent(String content) {
this.content = content;
}
public void addChildNode(AddressNode addressNode) {
contentList.add(addressNode);
}
public List<AddressNode> getContentList() {
return contentList;
}
}

View File

@ -1,6 +1,7 @@
package com.ruoyi.project.tool.address;
import com.ruoyi.project.tool.address.model.AdrNode;
import com.ruoyi.project.tool.address.utils.ParseContext;
import java.util.LinkedList;
import java.util.List;

View File

@ -1,33 +0,0 @@
package com.ruoyi.project.tool.address;
import com.ruoyi.project.tool.address.model.CleanAddress;
import com.ruoyi.project.tool.address.model.CleanAddressBuilder;
import com.ruoyi.project.tool.address.service.impl.NoSignalAddressHandler;
import com.ruoyi.project.tool.address.service.impl.SignalAddressHandler;
import java.util.LinkedList;
import java.util.List;
/**
* 地址清洗
*
* @author lihe
*/
public class CleanUtil {
/**
* 清洗
*
* @param rawAddress
* @return
*/
public static CleanAddress clear(String rawAddress) {
CleanAddress cleanAddress = new CleanAddress(rawAddress);
if (cleanAddress.getContainsSpecialChar()) {
new SignalAddressHandler().clear(cleanAddress);
} else {
new NoSignalAddressHandler().clear(cleanAddress);
}
return cleanAddress;
}
}

View File

@ -1,29 +0,0 @@
package com.ruoyi.project.tool.address;
import com.ruoyi.project.tool.address.model.AddressType;
import java.util.LinkedList;
import java.util.List;
/**
* context
*
* @author lihe
*/
public class StandardAddress {
private String rawAddress;
private List<AddressContent> children = new LinkedList<>();
public StandardAddress(String text) {
this.rawAddress = text;
}
public void addAddressContent(AddressContent addressContent) {
children.add(addressContent);
}
public List<AddressContent> getResult() {
return children;
}
}

View File

@ -1,25 +0,0 @@
package com.ruoyi.project.tool.address.model;
import java.util.List;
/**
* 室地址
*
* @author lihe
*/
public class BuildingAddress extends PartialAddress {
public BuildingAddress(String address) {
super(address);
}
@Override
public AddressType getAddressType() {
return null;
}
@Override
public Boolean multiAddress() {
return null;
}
}

View File

@ -1,215 +0,0 @@
package com.ruoyi.project.tool.address.model;
import java.util.LinkedList;
import java.util.List;
/**
* 清洗地址
*
* @author lihe
*/
public class CleanAddress {
/**
* 原地址
*/
private String rawAddress;
/**
* 待处理的地址
*/
private StringBuilder boundAddress;
/**
* 区域
*/
private String district;
/**
*
*/
private String region;
/**
* 板块
*/
private String block;
/**
* 路名可能交叉路
*/
private String road;
/**
* 小区名称
*/
private String communityName;
/**
* 楼层
* 和地下有关系
*/
private String floor;
/**
* 单套地址
*/
private PartialAddress condoAddress;
/**
* 物业普通车位
*/
private String propertyType;
/**
* 独栋
*/
private Boolean independent;
/**
* 多个地址
*/
private List<PartialAddress> addressList = new LinkedList<>();
/**
* 是否包含特殊字符
*/
private Boolean containsSpecialChar;
private static List<String> specialChar = new LinkedList<>();
static {
specialChar.add("");
specialChar.add(".");
specialChar.add("");
specialChar.add(",");
specialChar.add("-");
specialChar.add("——");
specialChar.add("_");
specialChar.add("");
specialChar.add("");
specialChar.add("");
specialChar.add("(");
specialChar.add(")");
specialChar.add("");
specialChar.add("");
}
public CleanAddress(String rawAddress) {
this.rawAddress = rawAddress;
}
public String getRawAddress() {
return rawAddress;
}
public void setRawAddress(String rawAddress) {
this.rawAddress = rawAddress;
}
public String getDistrict() {
return district;
}
public void setDistrict(String district) {
this.district = district;
}
public String getRegion() {
return region;
}
public void setRegion(String region) {
this.region = region;
}
public String getBlock() {
return block;
}
public void setBlock(String block) {
this.block = block;
}
public String getRoad() {
return road;
}
public void setRoad(String road) {
this.road = road;
}
public String getCommunityName() {
return communityName;
}
public void setCommunityName(String communityName) {
this.communityName = communityName;
}
public String getFloor() {
return floor;
}
public void setFloor(String floor) {
this.floor = floor;
}
public PartialAddress getCondoAddress() {
return condoAddress;
}
public void setCondoAddress(PartialAddress condoAddress) {
this.condoAddress = condoAddress;
}
public String getPropertyType() {
return propertyType;
}
public void setPropertyType(String propertyType) {
this.propertyType = propertyType;
}
public List<PartialAddress> getAddressList() {
return addressList;
}
public void addAddress(PartialAddress partialAddress) {
this.addressList.add(partialAddress);
}
public Boolean getIndependent() {
return independent;
}
public void setIndependent(Boolean independent) {
this.independent = independent;
}
public StringBuilder getBoundAddress() {
return boundAddress;
}
public Boolean getContainsSpecialChar() {
String todoAddress = rawAddress.trim()
.replace("\t", "")
.replace(" ", "");
this.boundAddress = new StringBuilder(rawAddress);
for (int i = 0; i < specialChar.size(); i++) {
if (todoAddress.contains(specialChar.get(i))) {
return true;
}
}
return false;
}
@Override
public String toString() {
return "CleanAddress{" +
"rawAddress='" + rawAddress + '\'' +
", district='" + district + '\'' +
", region='" + region + '\'' +
", block='" + block + '\'' +
", road='" + road + '\'' +
", communityName='" + communityName + '\'' +
", floor=" + floor +
", condoAddress=" + condoAddress +
", propertyType='" + propertyType + '\'' +
", independent=" + independent +
", addressList=" + addressList +
", containsSpecialChar=" + containsSpecialChar +
'}';
}
}

View File

@ -1,222 +0,0 @@
package com.ruoyi.project.tool.address.model;
import com.ruoyi.common.utils.LoadUtil;
import com.ruoyi.common.utils.StringUtils;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 清洗地址构建
*
* @author lihe
*/
public class CleanAddressBuilder {
private Map<String, String> districtMap = LoadUtil.loadDict("address-dict/district.dict");
private List<String> regionList = LoadUtil.loadList("address-dict/region.dict");
private List<String> blockList = LoadUtil.loadList("address-dict/block.dict");
private List<String> roadList = LoadUtil.loadList("address-dict/road.dict");
private CleanAddress cleanAddress;
private static final int DISTRICT_LENGTH = 8;
private static final String SHANGHAI_SHI = "上海市";
private static final String SHANGHAI = "上海";
private static final String SHANG = "";
private static final String HAO = "";
private static final String SHANG_SHANG = "上上";
private static final String SHI_PATTERN = "([\\dA-Za-z]+)(室?)$";
private static final String CHINESE_FLOOR_PATTERN = "([一二三四五六七八九十]+)层";
private static final String NUMBER_FLOOR_PATTERN = "(\\d+)层";
private static final String HAO_PATTERN = "([\\dA-Za-z]+)号";
private CleanAddressBuilder(CleanAddress cleanAddress) {
this.cleanAddress = cleanAddress;
}
public static CleanAddressBuilder builder(CleanAddress cleanAddress) {
return new CleanAddressBuilder(cleanAddress);
}
/**
* 解析区域
* 前面7个字符
*
* @return
*/
public CleanAddressBuilder parseDistrict() {
String shanghaiAndDistrict = null;
if (cleanAddress.getBoundAddress().length() >= DISTRICT_LENGTH) {
shanghaiAndDistrict = (SHANG + cleanAddress.getBoundAddress().substring(0, 7)).replace(SHANG_SHANG, SHANG);
} else {
shanghaiAndDistrict = (SHANG + cleanAddress.getBoundAddress()).replace(SHANG_SHANG, SHANG);
}
//
if (!shanghaiAndDistrict.startsWith(SHANGHAI_SHI) && !shanghaiAndDistrict.startsWith(SHANGHAI) && shanghaiAndDistrict.startsWith(SHANG)) {
shanghaiAndDistrict = shanghaiAndDistrict.substring(1, shanghaiAndDistrict.length() - 1);
}
for (Map.Entry<String, String> district : districtMap.entrySet()) {
if (shanghaiAndDistrict.startsWith(district.getKey())) {
cleanAddress.setDistrict(district.getValue());
break;
}
}
return this;
}
/**
* 解析镇
*
* @return
*/
public CleanAddressBuilder parseRegion() {
for (int i = 0; i < regionList.size(); i++) {
if (-1 != cleanAddress.getBoundAddress().indexOf(regionList.get(i))) {
cleanAddress.setRegion(regionList.get(i));
}
}
return this;
}
/**
* 解析板块
*/
public CleanAddressBuilder parseBlock() {
for (int i = 0; i < blockList.size(); i++) {
if (-1 != cleanAddress.getBoundAddress().indexOf(blockList.get(i))) {
cleanAddress.setBlock(regionList.get(i));
}
}
return this;
}
/**
* 解析路可能缺失
*/
public CleanAddressBuilder parseRoad() {
for (int i = 0; i < roadList.size(); i++) {
if (-1 != cleanAddress.getBoundAddress().indexOf(roadList.get(i))) {
cleanAddress.setRoad(roadList.get(i));
}
}
//
if (StringUtils.isEmpty(cleanAddress.getRoad())) {
for (int i = 0; i < roadList.size(); i++) {
// 砍掉
String noRoad = roadList.get(i);
if (noRoad.endsWith("")) {
noRoad = noRoad.substring(0, noRoad.length() - 1);
}
if (-1 != cleanAddress.getBoundAddress().indexOf(noRoad)) {
cleanAddress.setRoad(noRoad);
}
}
}
return this;
}
/**
* 解析小区名称路名和小区名称重定义
*/
public CleanAddressBuilder parseCommunityName() {
return this;
}
/**
* 是否独栋全幢
* 别墅
*/
public CleanAddressBuilder parseIndependent() {
Pattern shiPattern = Pattern.compile(SHI_PATTERN);
Matcher matcher = shiPattern.matcher(cleanAddress.getBoundAddress());
if (matcher.find()) {
getCondoAddress();
cleanAddress.setIndependent(Boolean.FALSE);
} else {
cleanAddress.setIndependent(Boolean.TRUE);
}
return this;
}
/**
* 解析地址
*
* @return
*/
public void parseAddress() {
getCondoAddress();
}
/**
* 获取楼层
*/
private void getFloorText() {
Pattern pattern = Pattern.compile(CHINESE_FLOOR_PATTERN);
Matcher matcher = pattern.matcher(cleanAddress.getBoundAddress());
if (matcher.find()) {
cleanAddress.setFloor(matcher.group(1));
return;
}
pattern = Pattern.compile(NUMBER_FLOOR_PATTERN);
matcher = pattern.matcher(cleanAddress.getBoundAddress());
if (matcher.find()) {
cleanAddress.setFloor(matcher.group(1));
return;
}
}
/**
* 小区地址
*/
private void getCommunityAddress(String buildingAddressText) {
// 只有号号号(支弄)
Pattern pattern = Pattern.compile(HAO_PATTERN);
Matcher matcher = pattern.matcher(buildingAddressText);
if (matcher.find()) {
String communityText = buildingAddressText.replace(matcher.group(0), "");
CommunityAddress communityAddress = new CommunityAddress(communityText);
this.cleanAddress.addAddress(communityAddress);
}
}
/**
* 获取清洗地址
*/
private void getCondoAddress() {
getFloorText();
// 有路
if (!StringUtils.isEmpty(cleanAddress.getRoad())) {
int roadIndex = cleanAddress.getBoundAddress().indexOf(cleanAddress.getRoad());
String condoAddressText =
cleanAddress.getBoundAddress().substring(roadIndex);
// 单套
CondoAddress condoAddress = new CondoAddress(condoAddressText);
cleanAddress.setCondoAddress(condoAddress);
cleanAddress.addAddress(condoAddress);
int haoIndex = condoAddressText.lastIndexOf(HAO);
String buildingAddressText = condoAddressText.substring(0, haoIndex + 1);
BuildingAddress buildingAddress = new BuildingAddress(buildingAddressText);
// 楼栋
condoAddress.addPartialAddress(buildingAddress);
cleanAddress.addAddress(condoAddress);
getCommunityAddress(buildingAddressText);
} else if (!StringUtils.isEmpty(cleanAddress.getCommunityName())) {
} else {
// 没有路没有小区
}
// 从路找到最后面
// 把室号去掉
// 把号去掉
// 单套地址楼栋地址小区地址
}
}

View File

@ -1,25 +0,0 @@
package com.ruoyi.project.tool.address.model;
import java.util.List;
/**
* 小区地址
*
* @author lihe
*/
public class CommunityAddress extends PartialAddress {
public CommunityAddress(String address) {
super(address);
}
@Override
public AddressType getAddressType() {
return null;
}
@Override
public Boolean multiAddress() {
return null;
}
}

View File

@ -1,43 +0,0 @@
package com.ruoyi.project.tool.address.model;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 单套地址
*
* @author lihe
*/
public class CondoAddress extends PartialAddress {
public CondoAddress(String address) {
super(address);
this.shi = parseShi();
this.floor = parseFloor();
this.hao = parseHAO();
}
@Override
public AddressType getAddressType() {
return AddressType.CONDO;
}
@Override
public Boolean multiAddress() {
return childrenAddress.size() > 0;
}
public String getHao() {
return hao;
}
public String getShi() {
return shi;
}
public Integer getFloor() {
return floor;
}
}

View File

@ -1,121 +0,0 @@
package com.ruoyi.project.tool.address.model;
import com.ruoyi.common.exception.CustomException;
import com.ruoyi.common.utils.StringUtils;
import io.swagger.models.auth.In;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 清洗地址
*
* @author lihe
*/
public abstract class PartialAddress {
protected String address;
protected String hao;
protected String shi;
protected Integer floor;
protected final int HUNDRED = 100;
protected final int TEN_THOUSAND = 10 * 1000;
protected final static String SHI_PATTERN = "([\\dA-Za-z]+)室$";
protected final static String HAO_PATTERN = "([\\dA-Za-z]+)(甲乙丙丁戊己庚辛仍亏)?号";
protected final static String NUMBER_PATTERN = "\\d+";
public PartialAddress(String address) {
this.address = address;
}
/**
* @return
*/
public abstract AddressType getAddressType();
/**
* 获取地址单套楼栋小区
*
* @return
*/
protected List<PartialAddress> childrenAddress = new LinkedList<>();
public abstract Boolean multiAddress();
/**
* 室解析
*
* @return
*/
protected String parseShi() {
Pattern pattern = Pattern.compile(SHI_PATTERN);
Matcher matcher = pattern.matcher(this.address);
// 这个需要优化地下层没有考虑
if (matcher.find()) {
return matcher.group(1);
}
return null;
}
/**
* 楼层解析
*
* @return
*/
protected Integer parseFloor() {
if (StringUtils.isEmpty(this.shi)) {
return null;
}
Pattern pattern = Pattern.compile(NUMBER_PATTERN);
Matcher matcher = pattern.matcher(this.shi);
// 这个需要优化地下层没有考虑
if (matcher.find()) {
Integer num = new Integer(matcher.group(0));
if (num <= HUNDRED) {
return num / 10;
} else if (num > HUNDRED && num <= TEN_THOUSAND) {
return num / 100;
} else {
throw new CustomException("室号太大");
}
}
return null;
}
/**
*
*
* @return
*/
protected String parseHAO() {
Pattern pattern = Pattern.compile(HAO_PATTERN);
Matcher matcher = pattern.matcher(this.address);
// 这个需要优化地下层没有考虑
if (matcher.find()) {
return matcher.group(1);
}
return null;
}
/**
* 增加地址
*
* @param partialAddress
*/
protected void addPartialAddress(PartialAddress partialAddress) {
this.childrenAddress.add(partialAddress);
}
@Override
public String toString() {
return "PartialAddress{" +
"hao='" + hao + '\'' +
", shi='" + shi + '\'' +
", floor=" + floor +
'}';
}
}

View File

@ -1,103 +0,0 @@
package com.ruoyi.project.tool.address.model;
/**
* 地址分段
*/
public class SegmentAddress {
/**
* 支弄
*/
private String nong;
/**
*
*/
private String hao;
/**
* 是否有多个号
*/
private Boolean multiHao;
/**
* 楼层包含复式
*/
private String floor;
/**
* 是否多楼层
*/
private Boolean multiFloor;
/**
*
*/
private String dong;
/**
* 室号
*/
private String shi;
/**
* 多个室号
*/
private Boolean multiShi;
public String getNong() {
return nong;
}
public void setNong(String nong) {
this.nong = nong;
}
public String getHao() {
return hao;
}
public void setHao(String hao) {
this.hao = hao;
}
public Boolean getMultiHao() {
return multiHao;
}
public void setMultiHao(Boolean multiHao) {
this.multiHao = multiHao;
}
public String getFloor() {
return floor;
}
public void setFloor(String floor) {
this.floor = floor;
}
public Boolean getMultiFloor() {
return multiFloor;
}
public void setMultiFloor(Boolean multiFloor) {
this.multiFloor = multiFloor;
}
public String getDong() {
return dong;
}
public void setDong(String dong) {
this.dong = dong;
}
public String getShi() {
return shi;
}
public void setShi(String shi) {
this.shi = shi;
}
public Boolean getMultiShi() {
return multiShi;
}
public void setMultiShi(Boolean multiShi) {
this.multiShi = multiShi;
}
}

View File

@ -1,7 +1,7 @@
package com.ruoyi.project.tool.address.parse;
import com.ruoyi.common.utils.LoadUtil;
import com.ruoyi.project.tool.address.ParseContext;
import com.ruoyi.project.tool.address.utils.ParseContext;
import com.ruoyi.project.tool.address.model.RoadNode;
import java.util.List;

View File

@ -1,6 +1,6 @@
package com.ruoyi.project.tool.address.parse;
import com.ruoyi.project.tool.address.ParseContext;
import com.ruoyi.project.tool.address.utils.ParseContext;
import com.ruoyi.project.tool.address.model.*;
import java.util.regex.Matcher;

View File

@ -1,10 +1,8 @@
package com.ruoyi.project.tool.address.parse;
import com.ruoyi.project.tool.address.ParseContext;
import com.ruoyi.project.tool.address.model.AdrNode;
import com.ruoyi.project.tool.address.model.CengNode;
import com.ruoyi.project.tool.address.model.HaoNode;
import com.ruoyi.project.tool.address.model.ShiNode;
import com.ruoyi.common.utils.StringUtils;
import com.ruoyi.project.tool.address.model.*;
import com.ruoyi.project.tool.address.utils.ParseContext;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -21,8 +19,10 @@ import java.util.regex.Pattern;
public class ZParse {
private static final String FU_SHI = "复式";
private static final String NUMBER_PATTERN = "\\d+";
private static final String DEFAULT_SHI_PATTERN1 = "([\\dA-Za-z\\_\\-\\.\\——\\、,]+)(室?)$";
private static final String DEFAULT_SHI_PATTERN = "([\\dA-Za-z\\-\\_\\.]+)室";
private static final String LIAN_SHI_PATTERN = "(\\d+)[、\\.\\-\\_](\\d)(室)?$";
private static final String DEFAULT_SHI_PATTERN = "[\\dA-Za-z]+室";
private static final String STRICT_SHI_PATTERN = DEFAULT_SHI_PATTERN + "$";
private static final String MULTI_SHI_PATTERN = "[号](\\d+(室)?[、\\.\\-\\_,]?)+(室)?$";
private static final String CHINESE_FLOOR_PATTERN = "([一二三四五六七八九十]+)层";
private static final String DEFAULT_FLOOR_PATTERN = "地下?([\\d一二三四五六七八九十])层";
private static final String TAIL_FLOOR_PATTERN = "(\\d[\\-\\_\\——\\—]?\\d)(层)?$";
@ -31,6 +31,7 @@ public class ZParse {
private AdrNode rootNode;
private String floor;
private Integer startIndex;
private Boolean isPark = Boolean.FALSE;
public ZParse(AdrNode adrNode, ParseContext context, Integer index) {
this.context = context;
@ -39,10 +40,10 @@ public class ZParse {
}
public void parse() {
tag();
parseCeng();
parseZhuang();
parseShi();
tag();
}
private void parseZhuang() {
@ -50,34 +51,109 @@ public class ZParse {
Matcher matcher = haoPattern.matcher(this.context.getContent());
if (matcher.find()) {
int index = this.context.getContent().indexOf(matcher.group(0), this.startIndex);
HaoNode haoNode = new HaoNode(matcher.group(0), index, index + matcher.group(0).length());
this.rootNode.addNode(haoNode);
startIndex = haoNode.getEndIndex();
if (-1 != index) {
HaoNode haoNode = new HaoNode(matcher.group(), index, index + matcher.group().length());
this.rootNode.addNode(haoNode);
startIndex = haoNode.getEndIndex();
}
}
}
/**
* 复式
*/
private void parseFUSHI() {
String content = this.context.getContent().substring(this.startIndex);
Pattern shiPattern = Pattern.compile(NUMBER_PATTERN);
Matcher matcher = shiPattern.matcher(content);
if (matcher.find()) {
int index = this.context.getContent().indexOf(matcher.group(0), this.startIndex);
ShiNode shiNode = new ShiNode(matcher.group(0) + "", index, index + matcher.group(0).length());
this.rootNode.addNode(shiNode);
startIndex = shiNode.getEndIndex();
this.context.addFeature("复式");
}
}
/**
* 连室号
* 连着的 102.3室
*
* @param shi1
* @param shi2
*/
private void parseLianShi(String shi1, String shi2) {
int startIndex = this.context.getContent().indexOf(shi1, this.startIndex);
ShiNode shiNode1 = new ShiNode(shi1 + "", startIndex, startIndex + shi1.length());
this.rootNode.addNode(shiNode1);
startIndex = shiNode1.getEndIndex();
startIndex = this.context.getContent().indexOf(shi2, startIndex);
ShiNode shiNode2 = new ShiNode(shi1.substring(0, shi1.length() - shi2.length()) + shi2 + "", startIndex,
startIndex + shi2.length());
this.rootNode.addNode(shiNode2);
}
/**
* 多室号
*
* @param matcherText 室字符文本
*/
private void parseMultiShi(String matcherText) {
Pattern numberPattern = Pattern.compile(NUMBER_PATTERN);
Matcher matcher = numberPattern.matcher(matcherText);
int startIndex = this.startIndex;
while (matcher.find()) {
int index = this.context.getContent().indexOf(matcher.group(), startIndex);
if (-1 == index) {
continue;
}
ShiNode shiNode = new ShiNode(matcher.group() + "", index, index + matcher.group().length());
this.rootNode.addNode(shiNode);
startIndex = shiNode.getEndIndex();
}
}
private void parseShi() {
String text = this.context.getContent().substring(this.startIndex);
// 复式
if (-1 != this.context.getContent().indexOf(FU_SHI)) {
String content = this.context.getContent().substring(this.startIndex);
Pattern shiPattern = Pattern.compile(NUMBER_PATTERN);
Matcher matcher = shiPattern.matcher(content);
if (matcher.find()) {
int index = this.context.getContent().indexOf(matcher.group(0), this.startIndex);
ShiNode shiNode = new ShiNode(matcher.group(0) + "", index, index + matcher.group(0).length());
this.rootNode.addNode(shiNode);
startIndex = shiNode.getEndIndex();
}
} else {
Pattern shiPattern = Pattern.compile(DEFAULT_SHI_PATTERN);
Matcher matcher = shiPattern.matcher(this.context.getContent());
if (matcher.find()) {
int index = this.context.getContent().indexOf(matcher.group(0), this.startIndex);
ShiNode shiNode = new ShiNode(matcher.group(0), index, index + matcher.group(0).length());
this.rootNode.addNode(shiNode);
startIndex = shiNode.getEndIndex();
}
if (-1 != text.indexOf(FU_SHI)) {
parseFUSHI();
return;
}
// 连室
Pattern lianShiPattern = Pattern.compile(LIAN_SHI_PATTERN);
Matcher matcher = lianShiPattern.matcher(text);
if (matcher.find()) {
parseLianShi(matcher.group(1), matcher.group(2));
return;
}
// 多室
Pattern shiPattern = Pattern.compile(MULTI_SHI_PATTERN);
matcher = shiPattern.matcher(text);
if (matcher.find()) {
parseMultiShi(matcher.group());
return;
}
// 单室
String matcherText = null;
if (this.isPark) {
shiPattern = Pattern.compile(DEFAULT_SHI_PATTERN);
} else {
shiPattern = Pattern.compile(STRICT_SHI_PATTERN);
}
matcher = shiPattern.matcher(text);
if (matcher.find()) {
matcherText = matcher.group();
}
if (StringUtils.isNotEmpty(matcherText)) {
int index = this.context.getContent().indexOf(matcherText, startIndex);
ShiNode shiNode = new ShiNode(matcherText, index, index + matcherText.length());
this.rootNode.addNode(shiNode);
}
}
private void tag() {
@ -85,15 +161,25 @@ public class ZParse {
if (-1 != this.context.getContent().indexOf("车位") || -1 != this.context.getContent().indexOf("车库")
|| -1 != this.context.getContent().indexOf("停车")) {
this.context.addFeature("车位");
this.isPark = Boolean.TRUE;
}
if (-1 != this.context.getContent().indexOf("地块")) {
this.context.addFeature("地块");
}
if (-1 != this.context.getContent().indexOf("别墅")) {
this.context.addFeature("别墅");
}
}
/**
* 判断这个层是否可取
*
* @param floorIndex
* @return
*/
private Boolean invalidCeng(int floorIndex) {
// 判断这个层是否可取
Pattern shiPattern = Pattern.compile(DEFAULT_SHI_PATTERN);
Matcher shiMatcher = shiPattern.matcher(this.context.getContent());
if (shiMatcher.find()) {
@ -136,9 +222,10 @@ public class ZParse {
.replace("", "4")
.replace("", "5");
int index = this.context.getContent().indexOf(matcher.group(0), this.startIndex);
if (invalidCeng(index)) {
if (invalidCeng(index) || this.isPark) {
return;
}
// 并且是否包含车位如果有包含车位忽略
CengNode cengNode = new CengNode("-" + num, index, index + matcher.group(0).length());
this.rootNode.addNode(cengNode);
rootNode.addNode(cengNode);

View File

@ -1,81 +0,0 @@
package com.ruoyi.project.tool.address.service;
import java.util.List;
/**
* 地址构建者模式
* @author lihe
*/
public interface AddressBuilder {
/**
* 区域
*
* @return
*/
String parseDistrict();
/**
*
*
* @return
*/
String parseRegion();
/**
* 板块
*
* @return
*/
String parseBlock();
/**
*
*
* @return
*/
List<String> parseRoad();
/**
* 小区名称
*
* @return
*/
String parseCommunityName();
/**
*
*
* @return
*/
String parseNONG();
/**
*
*
* @return
*/
List<String> parseHao();
/**
*
*
* @return
*/
String parseShi();
/**
* 楼层地下
*
* @return
*/
String parseFloor();
/**
* 是否独栋
* @return
*/
Boolean parseIndependent();
}

View File

@ -1,52 +0,0 @@
package com.ruoyi.project.tool.address.service;
import com.ruoyi.project.tool.address.model.CleanAddress;
import java.util.LinkedList;
import java.util.List;
/**
* 地址清洗
*
* @author lihe
*/
public class AddressCleanUtils {
private AddressBuilder builder;
private static List<String> specialChar = new LinkedList<>();
static {
specialChar.add("");
specialChar.add(".");
specialChar.add("");
specialChar.add(",");
specialChar.add("-");
specialChar.add("——");
specialChar.add("_");
specialChar.add("");
specialChar.add("");
specialChar.add("");
specialChar.add("(");
specialChar.add(")");
specialChar.add("");
specialChar.add("");
}
/**
* @param text
* @return
*/
public List<CleanAddress> clear(String text) {
String todoAddress = text.trim()
.replace("\t", "")
.replace(" ", "");
for (int i = 0; i < specialChar.size(); i++) {
if (todoAddress.contains(specialChar.get(i))) {
builder = new DefaultAddressBuilder(todoAddress);
}
}
return null;
}
}

View File

@ -1,19 +0,0 @@
package com.ruoyi.project.tool.address.service;
import com.ruoyi.project.tool.address.model.CleanAddress;
/**
* 地址处理方法
*
* @author lihe
*/
public interface AddressHandler {
/**
* 地址清洗
*
* @param cleanAddress
*/
void clear(CleanAddress cleanAddress);
}

View File

@ -1,45 +0,0 @@
package com.ruoyi.project.tool.address.service;
import java.util.LinkedList;
import java.util.List;
public abstract class AddressModel {
private String district;
private String region;
private String block;
private String road;
private String communityName;
private String nong;
private String complexHao;
/**
* 泛化的号连号
*/
private String hao;
/**
* 是否有多个号
* x号y号=x弄y号
* x号y幢=x号
* x幢y号=y号
* x幢y号=x幢
*/
private Boolean multiHao;
/**
* 是否包含楼层
* 复式多层
*/
private String floor;
private String shi;
/**
* 是否独栋
*/
private Boolean dependency;
/**
* 层级每个层级只有一个一对一
*/
private AddressModel hierarchy;
// /**
// * 兄弟节点
// */
// private List<AddressModel> sibling = new LinkedList<>();
}

View File

@ -1,192 +0,0 @@
package com.ruoyi.project.tool.address.service;
import com.ruoyi.common.utils.LoadUtil;
import com.ruoyi.project.tool.address.model.AddressType;
import com.ruoyi.project.tool.address.model.CleanAddress;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 地址构建基类
*
* @author lihe
*/
public abstract class BaseAddressBuilder implements AddressBuilder {
private Map<String, String> districtMap = LoadUtil.loadDict("address-dict/district.dict");
private List<String> regionList = LoadUtil.loadList("address-dict/region.dict");
private List<String> blockList = LoadUtil.loadList("address-dict/block.dict");
private List<String> roadList = LoadUtil.loadList("address-dict/road.dict");
private static final int DISTRICT_LENGTH = 8;
private static final String SHANGHAI_SHI = "上海市";
private static final String SHANGHAI = "上海";
private static final String SHANG_SHANG = "上上";
private static final String SHANG = "";
private static final String HAO = "";
private static final String CHINESE_FLOOR_PATTERN = "([一二三四五六七八九十]+)层";
private static final String NUMBER_FLOOR_PATTERN = "(\\d+)层";
private static final String DEFAULT_HAO_PATTERN = "([\\dA-Za-z]+)号";
private static final String DEFAULT_SHI_PATTERN = "([\\dA-Za-z]+)(室?)$";
private static final String DEFAULT_NONG_PATTERN = "[\\d一二三四五六七八九十]+弄([\\d一二三四五六七八九十]+支弄)?";
protected CleanContext context;
public BaseAddressBuilder(String text) {
this.context = new CleanContext(text);
}
/**
* 地址类别单套楼栋小区
*
* @return
*/
public abstract AddressType getAddressType();
@Override
public String parseDistrict() {
String shanghaiAndDistrict = null;
if (this.context.getBoundAddress().length() >= DISTRICT_LENGTH) {
shanghaiAndDistrict = (SHANG + this.context.getBoundAddress().substring(0, 7)).replace(SHANG_SHANG, SHANG);
} else {
shanghaiAndDistrict = (SHANG + this.context.getBoundAddress()).replace(SHANG_SHANG, SHANG);
}
if (!shanghaiAndDistrict.startsWith(SHANGHAI_SHI) && !shanghaiAndDistrict.startsWith(SHANGHAI) && shanghaiAndDistrict.startsWith(SHANG)) {
shanghaiAndDistrict = shanghaiAndDistrict.substring(1, shanghaiAndDistrict.length() - 1);
}
for (Map.Entry<String, String> district : districtMap.entrySet()) {
if (shanghaiAndDistrict.startsWith(district.getKey())) {
return district.getValue();
}
}
return null;
}
@Override
public String parseRegion() {
for (int i = 0; i < regionList.size(); i++) {
if (-1 != this.context.getBoundAddress().indexOf(regionList.get(i))) {
return regionList.get(i);
}
}
return null;
}
@Override
public String parseBlock() {
for (int i = 0; i < blockList.size(); i++) {
if (-1 != this.context.getBoundAddress().indexOf(blockList.get(i))) {
return regionList.get(i);
}
}
return null;
}
@Override
public List<String> parseRoad() {
List<String> roadSegmentList = new LinkedList<>();
for (int i = 0; i < roadList.size(); i++) {
if (-1 != this.context.getBoundAddress().indexOf(roadList.get(i))) {
roadSegmentList.add(roadList.get(i));
}
}
if (0 != roadSegmentList.size()) {
return roadSegmentList;
}
for (int i = 0; i < roadList.size(); i++) {
// 砍掉
String noRoad = roadList.get(i);
if (noRoad.endsWith("")) {
noRoad = noRoad.substring(0, noRoad.length() - 1);
}
if (-1 != this.context.getBoundAddress().indexOf(noRoad)) {
roadSegmentList.add(noRoad);
}
}
return roadSegmentList;
}
@Override
public String parseCommunityName() {
return null;
}
@Override
public String parseNONG() {
Pattern shiPattern = Pattern.compile(DEFAULT_NONG_PATTERN);
Matcher matcher = shiPattern.matcher(this.context.getBoundAddress());
if (matcher.find()) {
return matcher.group(0);
}
return null;
}
@Override
public List<String> parseHao() {
List<String> haoSegmentList = new LinkedList<>();
Pattern shiPattern = Pattern.compile(DEFAULT_HAO_PATTERN);
Matcher matcher = shiPattern.matcher(this.context.getBoundAddress());
while (matcher.find()) {
haoSegmentList.add(matcher.group());
}
if (0 != haoSegmentList.size()) {
return haoSegmentList;
} else {
return null;
}
}
@Override
public String parseShi() {
Pattern shiPattern = Pattern.compile(DEFAULT_SHI_PATTERN);
Matcher matcher = shiPattern.matcher(this.context.getBoundAddress());
if (matcher.find()) {
return matcher.group(1);
}
return null;
}
@Override
public String parseFloor() {
Pattern pattern = Pattern.compile(CHINESE_FLOOR_PATTERN);
Matcher matcher = pattern.matcher(this.context.getBoundAddress());
if (matcher.find()) {
return matcher.group(1);
}
pattern = Pattern.compile(NUMBER_FLOOR_PATTERN);
matcher = pattern.matcher(this.context.getBoundAddress());
if (matcher.find()) {
return matcher.group(1);
}
return null;
}
@Override
public Boolean parseIndependent() {
Pattern shiPattern = Pattern.compile(DEFAULT_SHI_PATTERN);
Matcher matcher = shiPattern.matcher(this.context.getBoundAddress());
if (matcher.find()) {
return false;
}
return true;
}
public List<CleanAddress> getResult() {
return this.context.getResult();
}
}

View File

@ -1,53 +0,0 @@
package com.ruoyi.project.tool.address.service;
import com.ruoyi.project.tool.address.model.CleanAddress;
import java.util.List;
/**
* 地址清洗
*
* @author lihe
*/
public class CleanContext {
private StringBuilder boundAddress;
private Boolean multiHao;
private String address;
private List<CleanAddress> result;
public CleanContext(String text) {
this.boundAddress = new StringBuilder(text);
}
public StringBuilder getBoundAddress() {
return boundAddress;
}
public void setBoundAddress(StringBuilder boundAddress) {
this.boundAddress = boundAddress;
}
public Boolean getMultiHao() {
return multiHao;
}
public void setMultiHao(Boolean multiHao) {
this.multiHao = multiHao;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
public List<CleanAddress> getResult() {
return result;
}
public void setResult(List<CleanAddress> result) {
this.result = result;
}
}

View File

@ -1,22 +0,0 @@
package com.ruoyi.project.tool.address.service;
import com.ruoyi.project.tool.address.model.AddressType;
import java.util.List;
/**
* 地址构建基类
*
* @author lihe
*/
public class DefaultAddressBuilder extends BaseAddressBuilder {
public DefaultAddressBuilder(String text) {
super(text);
}
@Override
public AddressType getAddressType() {
return AddressType.CONDO;
}
}

View File

@ -1,24 +0,0 @@
package com.ruoyi.project.tool.address.service.impl;
import com.ruoyi.project.tool.address.model.CleanAddress;
import com.ruoyi.project.tool.address.model.CleanAddressBuilder;
import com.ruoyi.project.tool.address.service.AddressHandler;
/**
* 没有特殊符号的地址清洗
*
* @author lihe
*/
public class NoSignalAddressHandler implements AddressHandler {
@Override
public void clear(CleanAddress cleanAddress) {
CleanAddressBuilder.builder(cleanAddress)
.parseDistrict()
.parseRegion()
.parseBlock()
.parseRoad()
.parseIndependent()
.parseAddress();
}
}

View File

@ -1,18 +0,0 @@
package com.ruoyi.project.tool.address.service.impl;
import com.ruoyi.project.tool.address.model.CleanAddress;
import com.ruoyi.project.tool.address.service.AddressHandler;
/**
* 带有字符的地址
*
* @author lihe
*/
public class SignalAddressHandler implements AddressHandler {
@Override
public void clear(CleanAddress cleanAddress) {
}
}

View File

@ -1,255 +0,0 @@
package com.ruoyi.project.tool.address.utils;
import com.ruoyi.common.utils.LoadUtil;
import com.ruoyi.project.tool.address.AddressContext;
import com.ruoyi.project.tool.address.AddressNode;
import com.ruoyi.project.tool.address.AddressNodeType;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 地址段分析
*
* @author lihe
*/
public class AddressNodeParse {
private Map<String, String> districtMap = LoadUtil.loadDict("address-dict/district.dict");
private List<String> regionList = LoadUtil.loadList("address-dict/region.dict");
private List<String> blockList = LoadUtil.loadList("address-dict/block.dict");
private List<String> roadList = LoadUtil.loadList("address-dict/road.dict");
private static final int DISTRICT_LENGTH = 8;
private static final String SHANGHAI_SHI = "上海市";
private static final String SHANGHAI = "上海";
private static final String SHANG = "";
private static final String HAO = "";
private static final String SHANG_SHANG = "上上";
private static final String DEFAULT_FLOOR_PATTERN = "([\\d一二三四五六七八九十]+)层";
private static final String NUMBER_FLOOR_PATTERN = "(\\d+)层";
private static final String DEFAULT_HAO_PATTERN = "([\\dA-Za-z]+)号";
private static final String DEFAULT_ZHUANG_PATTERN = "([\\dA-Za-z]+)幢";
private static final String DEFAULT_CENG_PATTERN = "([\\d])层";
private static final String DEFAULT_SHI_PATTERN = "([\\dA-Za-z]+)(室?)$";
private static final String DEFAULT_NONG_PATTERN = "[\\d一二三四五六七八九十]+弄([\\d一二三四五六七八九十]+支弄)?";
private static List<String> specialChar = new LinkedList<>();
private AddressContext context;
static {
specialChar.add("");
specialChar.add(".");
specialChar.add("");
specialChar.add(",");
specialChar.add("-");
specialChar.add("——");
specialChar.add("_");
specialChar.add("");
specialChar.add("");
specialChar.add("");
specialChar.add("(");
specialChar.add(")");
specialChar.add("");
specialChar.add("");
}
public AddressNodeParse(AddressContext addressContext) {
this.context = addressContext;
}
/**
* 区域
*/
public void parseDistrict() {
String shanghaiAndDistrict = null;
if (this.context.getContent().length() >= DISTRICT_LENGTH) {
shanghaiAndDistrict = (SHANG + this.context.getContent().substring(0, 7)).replace(SHANG_SHANG, SHANG);
} else {
shanghaiAndDistrict = (SHANG + this.context.getContent()).replace(SHANG_SHANG, SHANG);
}
if (!shanghaiAndDistrict.startsWith(SHANGHAI_SHI) && !shanghaiAndDistrict.startsWith(SHANGHAI) && shanghaiAndDistrict.startsWith(SHANG)) {
shanghaiAndDistrict = shanghaiAndDistrict.substring(1, shanghaiAndDistrict.length() - 1);
}
for (Map.Entry<String, String> district : districtMap.entrySet()) {
int index = shanghaiAndDistrict.indexOf(district.getKey(), this.context.getStartIndex());
if (-1 != index) {
AddressNode addressNode = new AddressNode(AddressNodeType.DISTRICT, district.getValue());
addressNode.setStartIndex(index);
addressNode.setEndIndex(index + district.getKey().length() + 1);
this.context.addAddressNode(addressNode);
}
}
}
/**
*
*/
public void parseRegion() {
for (int i = 0; i < regionList.size(); i++) {
int index = this.context.getContent().indexOf(regionList.get(i), this.context.getStartIndex());
if (-1 != index) {
AddressNode addressNode = new AddressNode(AddressNodeType.REGION, regionList.get(i));
addressNode.setStartIndex(index);
addressNode.setEndIndex(index + regionList.get(i).length() + 1);
this.context.addAddressNode(addressNode);
}
}
}
/**
* 板块
*/
public void parseBlock() {
for (int i = 0; i < blockList.size(); i++) {
int index = this.context.getContent().indexOf(blockList.get(i));
if (-1 != index) {
AddressNode addressNode = new AddressNode(AddressNodeType.BLOCK, blockList.get(i));
addressNode.setStartIndex(index);
addressNode.setEndIndex(index + blockList.get(i).length() + 1);
this.context.addAddressNode(addressNode);
}
}
}
/**
*
*/
public void parseRoad() {
AddressNode addressNode = new AddressNode(AddressNodeType.ROAD);
for (int i = 0; i < roadList.size(); i++) {
int index = this.context.getContent().indexOf(roadList.get(i), this.context.getStartIndex());
if (-1 != index) {
if (null == addressNode.getStartIndex()) {
addressNode.setStartIndex(index);
}
AddressNode childAddressNode = new AddressNode(AddressNodeType.ROAD, roadList.get(i));
childAddressNode.setStartIndex(index);
childAddressNode.setEndIndex(index + roadList.get(i).length() + 1);
addressNode.setEndIndex(index + roadList.get(i).length() + 1);
addressNode.addChildNode(childAddressNode);
}
}
if (null != addressNode.getStartIndex()) {
this.context.addAddressNode(addressNode);
return;
}
for (int i = 0; i < roadList.size(); i++) {
// 砍掉
String noRoad = roadList.get(i);
if (noRoad.endsWith("")) {
noRoad = noRoad.substring(0, noRoad.length() - 1);
}
int index = this.context.getContent().indexOf(noRoad);
if (-1 != index) {
if (null == addressNode.getStartIndex()) {
addressNode.setStartIndex(index);
}
AddressNode childAddressNode = new AddressNode(AddressNodeType.ROAD, noRoad);
childAddressNode.setStartIndex(index);
childAddressNode.setEndIndex(index + roadList.get(i).length() + 1);
addressNode.setEndIndex(index + roadList.get(i).length() + 1);
addressNode.addChildNode(childAddressNode);
}
}
}
/**
*
*/
public void parseNONG() {
Pattern nongPattern = Pattern.compile(DEFAULT_NONG_PATTERN);
Matcher matcher = nongPattern.matcher(this.context.getContent());
if (matcher.find()) {
int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
if (-1 != index) {
AddressNode addressNode = new AddressNode(AddressNodeType.NONG, matcher.group(0));
addressNode.setStartIndex(index);
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
addressNode.addChildNode(addressNode);
this.context.addAddressNode(addressNode);
}
}
}
/**
*
*/
public void parseHao() {
Pattern haoPattern = Pattern.compile(DEFAULT_HAO_PATTERN);
Matcher matcher = haoPattern.matcher(this.context.getContent());
if (matcher.find()) {
int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
if (-1 != index) {
AddressNode addressNode = new AddressNode(AddressNodeType.HAO, matcher.group(0));
addressNode.setStartIndex(index);
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
addressNode.addChildNode(addressNode);
this.context.addAddressNode(addressNode);
}
}
}
/**
*
*/
public void parseZhuang() {
Pattern haoPattern = Pattern.compile(DEFAULT_ZHUANG_PATTERN);
Matcher matcher = haoPattern.matcher(this.context.getContent());
if (matcher.find()) {
int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
if (-1 != index) {
AddressNode addressNode = new AddressNode(AddressNodeType.ZHUANG, matcher.group(0));
addressNode.setStartIndex(index);
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
addressNode.addChildNode(addressNode);
this.context.addAddressNode(addressNode);
}
}
}
/**
*
*/
public void parseCeng() {
Pattern haoPattern = Pattern.compile(DEFAULT_FLOOR_PATTERN);
Matcher matcher = haoPattern.matcher(this.context.getContent());
if (matcher.find()) {
int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
if (-1 != index) {
AddressNode addressNode = new AddressNode(AddressNodeType.CENG, matcher.group(0));
addressNode.setStartIndex(index);
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
addressNode.addChildNode(addressNode);
this.context.addAddressNode(addressNode);
}
}
}
/**
*
*/
public void parseShi() {
Pattern haoPattern = Pattern.compile(DEFAULT_SHI_PATTERN);
Matcher matcher = haoPattern.matcher(this.context.getContent());
if (matcher.find()) {
int index = this.context.getContent().indexOf(matcher.group(0),this.context.getStartIndex());
if (-1 != index) {
AddressNode addressNode = new AddressNode(AddressNodeType.SHI, matcher.group(0));
addressNode.setStartIndex(index);
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
addressNode.addChildNode(addressNode);
this.context.addAddressNode(addressNode);
}
}
}
}

View File

@ -1,5 +1,6 @@
package com.ruoyi.project.tool.address;
package com.ruoyi.project.tool.address.utils;
import com.ruoyi.project.tool.address.AddressResult;
import com.ruoyi.project.tool.address.parse.XParse;
/**

View File

@ -1,36 +0,0 @@
package com.ruoyi.project.tool.address.utils;
import com.ruoyi.project.tool.address.AddressBuilder;
import com.ruoyi.project.tool.address.AddressContext;
import com.ruoyi.project.tool.address.AddressNode;
import com.ruoyi.project.tool.address.StandardAddress;
import java.util.List;
/**
* 默认地址构建
*
* @author lihe
*/
public class DefaultAddressBuilder implements AddressBuilder {
private List<AddressNode> addressNodeList;
private AddressContext addressContext;
@Override
public StandardAddress clear(String text) {
this.addressContext = new AddressContext(text);
AddressNodeParse addressNodeParse = new AddressNodeParse(this.addressContext);
// 找到区域
addressNodeParse.parseDistrict();
addressNodeParse.parseRegion();
addressNodeParse.parseBlock();
addressNodeParse.parseRoad();
addressNodeParse.parseNONG();
addressNodeParse.parseHao();
addressNodeParse.parseCeng();
addressNodeParse.parseShi();
return addressContext.getResult();
}
}

View File

@ -1,4 +1,4 @@
package com.ruoyi.project.tool.address;
package com.ruoyi.project.tool.address.utils;
import com.ruoyi.project.tool.address.model.AdrNode;

View File

@ -2,57 +2,25 @@ package com.ruoyi;
import com.ruoyi.common.utils.LoadUtil;
import com.ruoyi.project.tool.address.*;
import com.ruoyi.project.tool.address.model.CleanAddress;
import com.ruoyi.project.tool.address.model.LianHaoNode;
import com.ruoyi.project.tool.address.parse.ZParse;
import com.ruoyi.project.tool.address.utils.DefaultAddressBuilder;
import com.ruoyi.project.tool.address.utils.AddressUtil;
import org.junit.Assert;
import org.junit.Test;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Base64;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class CleanUtilTests {
public class AddressUtilTests {
/**
* 地址清洗测试
*/
@Test
public void printStreet() {
List<String> textList = LoadUtil.loadList("jie.dict");
textList.forEach(item -> {
if (!item.contains("") && item.contains("") && !item.contains("街坊")) {
System.out.println(item);
}
});
}
@Test
public void printNoRoad() {
public void testClear() {
List<String> textList = LoadUtil.loadList("icbc.dict");
textList.forEach(item -> {
if (!item.contains("")) {
System.out.println(item);
}
});
}
@Test
public void testSort() {
List<String> textList = LoadUtil.loadList("temp.dict");
textList.sort((x, y) -> y.length() - x.length());
textList.forEach(item -> {
System.out.println(item);
});
}
@Test
public void testClear3() {
List<String> textList = LoadUtil.loadList("test.dict");
textList.forEach(item -> {
AddressResult addressResult = AddressUtil.clear(item);
if (null == addressResult.getCleanAddress()) {
@ -64,6 +32,7 @@ public class CleanUtilTests {
});
}
/**
* 中间连号
*/
@ -78,6 +47,21 @@ public class CleanUtilTests {
Assert.assertEquals(addressResult.getDistrict(), "浦东");
}
/**
* 幢号室
*/
@Test
public void testZhuangHaoShi() {
AddressResult addressResult = AddressUtil.clear("奉贤区南桥镇江海新村(B)89幢578号402室");
if (null == addressResult.getCleanAddress()) {
return;
}
Assert.assertEquals(addressResult.getCleanAddress().get(0), "江海新村578号402室");
Assert.assertEquals(addressResult.getDistrict(), "奉贤");
}
/**
* 尾部连号
*/
@ -107,19 +91,6 @@ public class CleanUtilTests {
Assert.assertEquals(addressResult.getDistrict(), "松江");
}
@Test
public void testWhileMatch() {
Pattern numberPattern = Pattern.compile("\\d+");
Matcher matcher = numberPattern.matcher("24、25号");
while (matcher.find()) {
System.out.println(matcher.group());
}
}
/**
* 地下
*/
@ -129,48 +100,28 @@ public class CleanUtilTests {
if (null == addressResult.getCleanAddress()) {
return;
}
addressResult.getCleanAddress().forEach(adr -> {
System.out.println(String.format("%s\t%s\t", addressResult.getDistrict(), adr));
});
Assert.assertEquals("北华路168弄35号1002室", addressResult.getCleanAddress().get(0));
Assert.assertEquals(addressResult.getDistrict(), "闵行");
}
/**
* 层结尾
*/
@Test
public void testClear2() {
//
List<String> textList = LoadUtil.loadList("icbc.dict");
textList.forEach(item -> {
DefaultAddressBuilder defaultAddressBuilder = new DefaultAddressBuilder();
StandardAddress standardAddress = defaultAddressBuilder.clear(item);
List<AddressContent> list = standardAddress.getResult();
list.forEach(x -> {
System.out.println(x);
});
//
// CleanAddress cleanAddress = CleanUtil.clear(item);
// System.out.println(cleanAddress);
});
}
@Test
public void testClear() {
List<String> textList = LoadUtil.loadList("icbc.dict");
textList.forEach(item -> {
CleanAddress cleanAddress = CleanUtil.clear(item);
System.out.println(cleanAddress);
});
// Assert.assertEquals(cleanAddress.getDistrict(), "");
// Assert.assertEquals(cleanAddress.getDistrict(), "");
// Assert.assertEquals(cleanAddress.getDistrict(), "");
// Assert.assertEquals(cleanAddress.getDistrict(), "");
// Assert.assertEquals(cleanAddress.getDistrict(), "");
// Assert.assertEquals(cleanAddress.getDistrict(), "");
public void testTailCeng() {
AddressResult addressResult = AddressUtil.clear("浦东新区成山路1488弄119号1-4层");
if (null == addressResult.getCleanAddress()) {
return;
}
Assert.assertEquals("成山路1488弄119号", addressResult.getCleanAddress().get(0));
Assert.assertEquals(addressResult.getDistrict(), "浦东");
}
/**
* base64保存
*
* @throws IOException
*/
@Test
public void testBase64() throws IOException {
Base64.Decoder decoder = Base64.getDecoder();
@ -180,6 +131,47 @@ public class CleanUtilTests {
out.write(buffer);
out.close();
}
@Test
public void testWhileMatch() {
Pattern numberPattern = Pattern.compile("\\d+");
Matcher matcher = numberPattern.matcher("24、25号");
while (matcher.find()) {
System.out.println(matcher.group());
}
}
@Test
public void printStreet() {
List<String> textList = LoadUtil.loadList("jie.dict");
textList.forEach(item -> {
if (!item.contains("") && item.contains("") && !item.contains("街坊")) {
System.out.println(item);
}
});
}
@Test
public void printNoRoad() {
List<String> textList = LoadUtil.loadList("icbc.dict");
textList.forEach(item -> {
if (!item.contains("")) {
System.out.println(item);
}
});
}
@Test
public void testSort() {
List<String> textList = LoadUtil.loadList("temp.dict");
textList.sort((x, y) -> y.length() - x.length());
textList.forEach(item -> {
System.out.println(item);
});
}
}