feature(地址清洗)重构
This commit is contained in:
parent
daca46924b
commit
de0a56880c
@ -27,4 +27,12 @@ public class AddressContent {
|
||||
public String getResult() {
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "AddressContent{" +
|
||||
"addressType=" + addressType +
|
||||
", sb=" + sb +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
package com.ruoyi.project.tool.address;
|
||||
|
||||
import com.ruoyi.project.tool.address.model.AddressType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
@ -16,6 +18,7 @@ public class AddressContext {
|
||||
*/
|
||||
private ArrayList<AddressNode> nodeList = new ArrayList<>();
|
||||
private StringBuilder stringBuilder;
|
||||
private Integer startIndex = 0;
|
||||
|
||||
public AddressContext(String text) {
|
||||
this.stringBuilder = new StringBuilder(text);
|
||||
@ -26,16 +29,30 @@ public class AddressContext {
|
||||
}
|
||||
|
||||
public void addAddressNode(AddressNode node) {
|
||||
startIndex = node.getEndIndex() - 1;
|
||||
this.nodeList.add(node);
|
||||
}
|
||||
|
||||
public Integer getStartIndex() {
|
||||
return startIndex;
|
||||
}
|
||||
|
||||
public StandardAddress getResult() {
|
||||
StandardAddress standardAddress = new StandardAddress(this.stringBuilder.toString());
|
||||
// nodeList.forEach(node->{
|
||||
//// AddressContent addressContent = new AddressContent();
|
||||
//
|
||||
//// standardAddress.addAddressContent();
|
||||
// });
|
||||
nodeList.forEach(node -> {
|
||||
AddressContent addressContent = new AddressContent(AddressType.CONDO);
|
||||
if (node.dataNode()) {
|
||||
addressContent.appendContent(node.getContent());
|
||||
} else {
|
||||
if (1 == node.getContentList().size()) {
|
||||
addressContent.appendContent(node.getContentList().get(0).getContent());
|
||||
} else {
|
||||
// 多个先组合,再拆分
|
||||
}
|
||||
}
|
||||
|
||||
standardAddress.addAddressContent(addressContent);
|
||||
});
|
||||
|
||||
return standardAddress;
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
package com.ruoyi.project.tool.address;
|
||||
|
||||
import com.ruoyi.common.utils.StringUtils;
|
||||
|
||||
import java.util.AbstractList;
|
||||
import java.util.Collection;
|
||||
import java.util.LinkedList;
|
||||
@ -55,6 +57,15 @@ public class AddressNode {
|
||||
return content;
|
||||
}
|
||||
|
||||
/**
|
||||
* 数据节点
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Boolean dataNode() {
|
||||
return StringUtils.isNotEmpty(this.content);
|
||||
}
|
||||
|
||||
public void setContent(String content) {
|
||||
this.content = content;
|
||||
}
|
||||
@ -62,4 +73,8 @@ public class AddressNode {
|
||||
public void addChildNode(AddressNode addressNode) {
|
||||
contentList.add(addressNode);
|
||||
}
|
||||
|
||||
public List<AddressNode> getContentList() {
|
||||
return contentList;
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
package com.ruoyi.project.tool.address.service;
|
||||
|
||||
import com.sun.xml.internal.fastinfoset.algorithm.BooleanEncodingAlgorithm;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
@ -75,7 +75,7 @@ public class AddressNodeParse {
|
||||
}
|
||||
|
||||
for (Map.Entry<String, String> district : districtMap.entrySet()) {
|
||||
int index = shanghaiAndDistrict.indexOf(district.getKey());
|
||||
int index = shanghaiAndDistrict.indexOf(district.getKey(), this.context.getStartIndex());
|
||||
if (-1 != index) {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.DISTRICT, district.getValue());
|
||||
addressNode.setStartIndex(index);
|
||||
@ -92,7 +92,7 @@ public class AddressNodeParse {
|
||||
*/
|
||||
public void parseRegion() {
|
||||
for (int i = 0; i < regionList.size(); i++) {
|
||||
int index = this.context.getContent().indexOf(regionList.get(i));
|
||||
int index = this.context.getContent().indexOf(regionList.get(i), this.context.getStartIndex());
|
||||
if (-1 != index) {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.REGION, regionList.get(i));
|
||||
addressNode.setStartIndex(index);
|
||||
@ -124,9 +124,8 @@ public class AddressNodeParse {
|
||||
*/
|
||||
public void parseRoad() {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.ROAD);
|
||||
|
||||
for (int i = 0; i < roadList.size(); i++) {
|
||||
int index = this.context.getContent().indexOf(roadList.get(i));
|
||||
int index = this.context.getContent().indexOf(roadList.get(i), this.context.getStartIndex());
|
||||
if (-1 != index) {
|
||||
if (null == addressNode.getStartIndex()) {
|
||||
addressNode.setStartIndex(index);
|
||||
@ -171,12 +170,13 @@ public class AddressNodeParse {
|
||||
Pattern nongPattern = Pattern.compile(DEFAULT_NONG_PATTERN);
|
||||
Matcher matcher = nongPattern.matcher(this.context.getContent());
|
||||
if (matcher.find()) {
|
||||
int index = this.context.getContent().indexOf(matcher.group(0));
|
||||
int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
|
||||
if (-1 != index) {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.NONG, matcher.group(0));
|
||||
addressNode.setStartIndex(index);
|
||||
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
||||
addressNode.addChildNode(addressNode);
|
||||
this.context.addAddressNode(addressNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -188,12 +188,13 @@ public class AddressNodeParse {
|
||||
Pattern haoPattern = Pattern.compile(DEFAULT_HAO_PATTERN);
|
||||
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
||||
if (matcher.find()) {
|
||||
int index = this.context.getContent().indexOf(matcher.group(0));
|
||||
int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
|
||||
if (-1 != index) {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.HAO, matcher.group(0));
|
||||
addressNode.setStartIndex(index);
|
||||
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
||||
addressNode.addChildNode(addressNode);
|
||||
this.context.addAddressNode(addressNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -205,12 +206,13 @@ public class AddressNodeParse {
|
||||
Pattern haoPattern = Pattern.compile(DEFAULT_ZHUANG_PATTERN);
|
||||
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
||||
if (matcher.find()) {
|
||||
int index = this.context.getContent().indexOf(matcher.group(0));
|
||||
int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
|
||||
if (-1 != index) {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.ZHUANG, matcher.group(0));
|
||||
addressNode.setStartIndex(index);
|
||||
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
||||
addressNode.addChildNode(addressNode);
|
||||
this.context.addAddressNode(addressNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -222,12 +224,13 @@ public class AddressNodeParse {
|
||||
Pattern haoPattern = Pattern.compile(DEFAULT_FLOOR_PATTERN);
|
||||
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
||||
if (matcher.find()) {
|
||||
int index = this.context.getContent().indexOf(matcher.group(0));
|
||||
int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
|
||||
if (-1 != index) {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.CENG, matcher.group(0));
|
||||
addressNode.setStartIndex(index);
|
||||
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
||||
addressNode.addChildNode(addressNode);
|
||||
this.context.addAddressNode(addressNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -239,12 +242,13 @@ public class AddressNodeParse {
|
||||
Pattern haoPattern = Pattern.compile(DEFAULT_SHI_PATTERN);
|
||||
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
||||
if (matcher.find()) {
|
||||
int index = this.context.getContent().indexOf(matcher.group(0));
|
||||
int index = this.context.getContent().indexOf(matcher.group(0),this.context.getStartIndex());
|
||||
if (-1 != index) {
|
||||
AddressNode addressNode = new AddressNode(AddressNodeType.SHI, matcher.group(0));
|
||||
addressNode.setStartIndex(index);
|
||||
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
||||
addressNode.addChildNode(addressNode);
|
||||
this.context.addAddressNode(addressNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,11 @@
|
||||
package com.ruoyi;
|
||||
|
||||
import com.ruoyi.common.utils.LoadUtil;
|
||||
import com.ruoyi.project.tool.address.AddressContent;
|
||||
import com.ruoyi.project.tool.address.CleanUtil;
|
||||
import com.ruoyi.project.tool.address.StandardAddress;
|
||||
import com.ruoyi.project.tool.address.model.CleanAddress;
|
||||
import com.ruoyi.project.tool.address.utils.DefaultAddressBuilder;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
@ -24,6 +27,24 @@ public class CleanUtilTests {
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClear2() {
|
||||
//
|
||||
List<String> textList = LoadUtil.loadList("icbc.dict");
|
||||
|
||||
textList.forEach(item -> {
|
||||
DefaultAddressBuilder defaultAddressBuilder = new DefaultAddressBuilder();
|
||||
StandardAddress standardAddress = defaultAddressBuilder.clear(item);
|
||||
List<AddressContent> list = standardAddress.getResult();
|
||||
list.forEach(x -> {
|
||||
System.out.println(x);
|
||||
});
|
||||
//
|
||||
// CleanAddress cleanAddress = CleanUtil.clear(item);
|
||||
// System.out.println(cleanAddress);
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClear() {
|
||||
List<String> textList = LoadUtil.loadList("icbc.dict");
|
||||
|
Loading…
x
Reference in New Issue
Block a user