feature(地址清洗)重构
This commit is contained in:
parent
daca46924b
commit
de0a56880c
@ -27,4 +27,12 @@ public class AddressContent {
|
|||||||
public String getResult() {
|
public String getResult() {
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "AddressContent{" +
|
||||||
|
"addressType=" + addressType +
|
||||||
|
", sb=" + sb +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
package com.ruoyi.project.tool.address;
|
package com.ruoyi.project.tool.address;
|
||||||
|
|
||||||
|
import com.ruoyi.project.tool.address.model.AddressType;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -16,6 +18,7 @@ public class AddressContext {
|
|||||||
*/
|
*/
|
||||||
private ArrayList<AddressNode> nodeList = new ArrayList<>();
|
private ArrayList<AddressNode> nodeList = new ArrayList<>();
|
||||||
private StringBuilder stringBuilder;
|
private StringBuilder stringBuilder;
|
||||||
|
private Integer startIndex = 0;
|
||||||
|
|
||||||
public AddressContext(String text) {
|
public AddressContext(String text) {
|
||||||
this.stringBuilder = new StringBuilder(text);
|
this.stringBuilder = new StringBuilder(text);
|
||||||
@ -26,16 +29,30 @@ public class AddressContext {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void addAddressNode(AddressNode node) {
|
public void addAddressNode(AddressNode node) {
|
||||||
|
startIndex = node.getEndIndex() - 1;
|
||||||
this.nodeList.add(node);
|
this.nodeList.add(node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Integer getStartIndex() {
|
||||||
|
return startIndex;
|
||||||
|
}
|
||||||
|
|
||||||
public StandardAddress getResult() {
|
public StandardAddress getResult() {
|
||||||
StandardAddress standardAddress = new StandardAddress(this.stringBuilder.toString());
|
StandardAddress standardAddress = new StandardAddress(this.stringBuilder.toString());
|
||||||
// nodeList.forEach(node->{
|
nodeList.forEach(node -> {
|
||||||
//// AddressContent addressContent = new AddressContent();
|
AddressContent addressContent = new AddressContent(AddressType.CONDO);
|
||||||
//
|
if (node.dataNode()) {
|
||||||
//// standardAddress.addAddressContent();
|
addressContent.appendContent(node.getContent());
|
||||||
// });
|
} else {
|
||||||
|
if (1 == node.getContentList().size()) {
|
||||||
|
addressContent.appendContent(node.getContentList().get(0).getContent());
|
||||||
|
} else {
|
||||||
|
// 多个先组合,再拆分
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
standardAddress.addAddressContent(addressContent);
|
||||||
|
});
|
||||||
|
|
||||||
return standardAddress;
|
return standardAddress;
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
package com.ruoyi.project.tool.address;
|
package com.ruoyi.project.tool.address;
|
||||||
|
|
||||||
|
import com.ruoyi.common.utils.StringUtils;
|
||||||
|
|
||||||
import java.util.AbstractList;
|
import java.util.AbstractList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
@ -55,6 +57,15 @@ public class AddressNode {
|
|||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 数据节点
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public Boolean dataNode() {
|
||||||
|
return StringUtils.isNotEmpty(this.content);
|
||||||
|
}
|
||||||
|
|
||||||
public void setContent(String content) {
|
public void setContent(String content) {
|
||||||
this.content = content;
|
this.content = content;
|
||||||
}
|
}
|
||||||
@ -62,4 +73,8 @@ public class AddressNode {
|
|||||||
public void addChildNode(AddressNode addressNode) {
|
public void addChildNode(AddressNode addressNode) {
|
||||||
contentList.add(addressNode);
|
contentList.add(addressNode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<AddressNode> getContentList() {
|
||||||
|
return contentList;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
package com.ruoyi.project.tool.address.service;
|
package com.ruoyi.project.tool.address.service;
|
||||||
|
|
||||||
import com.sun.xml.internal.fastinfoset.algorithm.BooleanEncodingAlgorithm;
|
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
@ -75,7 +75,7 @@ public class AddressNodeParse {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (Map.Entry<String, String> district : districtMap.entrySet()) {
|
for (Map.Entry<String, String> district : districtMap.entrySet()) {
|
||||||
int index = shanghaiAndDistrict.indexOf(district.getKey());
|
int index = shanghaiAndDistrict.indexOf(district.getKey(), this.context.getStartIndex());
|
||||||
if (-1 != index) {
|
if (-1 != index) {
|
||||||
AddressNode addressNode = new AddressNode(AddressNodeType.DISTRICT, district.getValue());
|
AddressNode addressNode = new AddressNode(AddressNodeType.DISTRICT, district.getValue());
|
||||||
addressNode.setStartIndex(index);
|
addressNode.setStartIndex(index);
|
||||||
@ -92,7 +92,7 @@ public class AddressNodeParse {
|
|||||||
*/
|
*/
|
||||||
public void parseRegion() {
|
public void parseRegion() {
|
||||||
for (int i = 0; i < regionList.size(); i++) {
|
for (int i = 0; i < regionList.size(); i++) {
|
||||||
int index = this.context.getContent().indexOf(regionList.get(i));
|
int index = this.context.getContent().indexOf(regionList.get(i), this.context.getStartIndex());
|
||||||
if (-1 != index) {
|
if (-1 != index) {
|
||||||
AddressNode addressNode = new AddressNode(AddressNodeType.REGION, regionList.get(i));
|
AddressNode addressNode = new AddressNode(AddressNodeType.REGION, regionList.get(i));
|
||||||
addressNode.setStartIndex(index);
|
addressNode.setStartIndex(index);
|
||||||
@ -124,9 +124,8 @@ public class AddressNodeParse {
|
|||||||
*/
|
*/
|
||||||
public void parseRoad() {
|
public void parseRoad() {
|
||||||
AddressNode addressNode = new AddressNode(AddressNodeType.ROAD);
|
AddressNode addressNode = new AddressNode(AddressNodeType.ROAD);
|
||||||
|
|
||||||
for (int i = 0; i < roadList.size(); i++) {
|
for (int i = 0; i < roadList.size(); i++) {
|
||||||
int index = this.context.getContent().indexOf(roadList.get(i));
|
int index = this.context.getContent().indexOf(roadList.get(i), this.context.getStartIndex());
|
||||||
if (-1 != index) {
|
if (-1 != index) {
|
||||||
if (null == addressNode.getStartIndex()) {
|
if (null == addressNode.getStartIndex()) {
|
||||||
addressNode.setStartIndex(index);
|
addressNode.setStartIndex(index);
|
||||||
@ -171,12 +170,13 @@ public class AddressNodeParse {
|
|||||||
Pattern nongPattern = Pattern.compile(DEFAULT_NONG_PATTERN);
|
Pattern nongPattern = Pattern.compile(DEFAULT_NONG_PATTERN);
|
||||||
Matcher matcher = nongPattern.matcher(this.context.getContent());
|
Matcher matcher = nongPattern.matcher(this.context.getContent());
|
||||||
if (matcher.find()) {
|
if (matcher.find()) {
|
||||||
int index = this.context.getContent().indexOf(matcher.group(0));
|
int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
|
||||||
if (-1 != index) {
|
if (-1 != index) {
|
||||||
AddressNode addressNode = new AddressNode(AddressNodeType.NONG, matcher.group(0));
|
AddressNode addressNode = new AddressNode(AddressNodeType.NONG, matcher.group(0));
|
||||||
addressNode.setStartIndex(index);
|
addressNode.setStartIndex(index);
|
||||||
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
||||||
addressNode.addChildNode(addressNode);
|
addressNode.addChildNode(addressNode);
|
||||||
|
this.context.addAddressNode(addressNode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -188,12 +188,13 @@ public class AddressNodeParse {
|
|||||||
Pattern haoPattern = Pattern.compile(DEFAULT_HAO_PATTERN);
|
Pattern haoPattern = Pattern.compile(DEFAULT_HAO_PATTERN);
|
||||||
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
||||||
if (matcher.find()) {
|
if (matcher.find()) {
|
||||||
int index = this.context.getContent().indexOf(matcher.group(0));
|
int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
|
||||||
if (-1 != index) {
|
if (-1 != index) {
|
||||||
AddressNode addressNode = new AddressNode(AddressNodeType.HAO, matcher.group(0));
|
AddressNode addressNode = new AddressNode(AddressNodeType.HAO, matcher.group(0));
|
||||||
addressNode.setStartIndex(index);
|
addressNode.setStartIndex(index);
|
||||||
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
||||||
addressNode.addChildNode(addressNode);
|
addressNode.addChildNode(addressNode);
|
||||||
|
this.context.addAddressNode(addressNode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -205,12 +206,13 @@ public class AddressNodeParse {
|
|||||||
Pattern haoPattern = Pattern.compile(DEFAULT_ZHUANG_PATTERN);
|
Pattern haoPattern = Pattern.compile(DEFAULT_ZHUANG_PATTERN);
|
||||||
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
||||||
if (matcher.find()) {
|
if (matcher.find()) {
|
||||||
int index = this.context.getContent().indexOf(matcher.group(0));
|
int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
|
||||||
if (-1 != index) {
|
if (-1 != index) {
|
||||||
AddressNode addressNode = new AddressNode(AddressNodeType.ZHUANG, matcher.group(0));
|
AddressNode addressNode = new AddressNode(AddressNodeType.ZHUANG, matcher.group(0));
|
||||||
addressNode.setStartIndex(index);
|
addressNode.setStartIndex(index);
|
||||||
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
||||||
addressNode.addChildNode(addressNode);
|
addressNode.addChildNode(addressNode);
|
||||||
|
this.context.addAddressNode(addressNode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -222,12 +224,13 @@ public class AddressNodeParse {
|
|||||||
Pattern haoPattern = Pattern.compile(DEFAULT_FLOOR_PATTERN);
|
Pattern haoPattern = Pattern.compile(DEFAULT_FLOOR_PATTERN);
|
||||||
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
||||||
if (matcher.find()) {
|
if (matcher.find()) {
|
||||||
int index = this.context.getContent().indexOf(matcher.group(0));
|
int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
|
||||||
if (-1 != index) {
|
if (-1 != index) {
|
||||||
AddressNode addressNode = new AddressNode(AddressNodeType.CENG, matcher.group(0));
|
AddressNode addressNode = new AddressNode(AddressNodeType.CENG, matcher.group(0));
|
||||||
addressNode.setStartIndex(index);
|
addressNode.setStartIndex(index);
|
||||||
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
||||||
addressNode.addChildNode(addressNode);
|
addressNode.addChildNode(addressNode);
|
||||||
|
this.context.addAddressNode(addressNode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -239,12 +242,13 @@ public class AddressNodeParse {
|
|||||||
Pattern haoPattern = Pattern.compile(DEFAULT_SHI_PATTERN);
|
Pattern haoPattern = Pattern.compile(DEFAULT_SHI_PATTERN);
|
||||||
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
Matcher matcher = haoPattern.matcher(this.context.getContent());
|
||||||
if (matcher.find()) {
|
if (matcher.find()) {
|
||||||
int index = this.context.getContent().indexOf(matcher.group(0));
|
int index = this.context.getContent().indexOf(matcher.group(0),this.context.getStartIndex());
|
||||||
if (-1 != index) {
|
if (-1 != index) {
|
||||||
AddressNode addressNode = new AddressNode(AddressNodeType.SHI, matcher.group(0));
|
AddressNode addressNode = new AddressNode(AddressNodeType.SHI, matcher.group(0));
|
||||||
addressNode.setStartIndex(index);
|
addressNode.setStartIndex(index);
|
||||||
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
addressNode.setEndIndex(index + matcher.group(0).length() + 1);
|
||||||
addressNode.addChildNode(addressNode);
|
addressNode.addChildNode(addressNode);
|
||||||
|
this.context.addAddressNode(addressNode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,11 @@
|
|||||||
package com.ruoyi;
|
package com.ruoyi;
|
||||||
|
|
||||||
import com.ruoyi.common.utils.LoadUtil;
|
import com.ruoyi.common.utils.LoadUtil;
|
||||||
|
import com.ruoyi.project.tool.address.AddressContent;
|
||||||
import com.ruoyi.project.tool.address.CleanUtil;
|
import com.ruoyi.project.tool.address.CleanUtil;
|
||||||
|
import com.ruoyi.project.tool.address.StandardAddress;
|
||||||
import com.ruoyi.project.tool.address.model.CleanAddress;
|
import com.ruoyi.project.tool.address.model.CleanAddress;
|
||||||
|
import com.ruoyi.project.tool.address.utils.DefaultAddressBuilder;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
@ -24,6 +27,24 @@ public class CleanUtilTests {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testClear2() {
|
||||||
|
//
|
||||||
|
List<String> textList = LoadUtil.loadList("icbc.dict");
|
||||||
|
|
||||||
|
textList.forEach(item -> {
|
||||||
|
DefaultAddressBuilder defaultAddressBuilder = new DefaultAddressBuilder();
|
||||||
|
StandardAddress standardAddress = defaultAddressBuilder.clear(item);
|
||||||
|
List<AddressContent> list = standardAddress.getResult();
|
||||||
|
list.forEach(x -> {
|
||||||
|
System.out.println(x);
|
||||||
|
});
|
||||||
|
//
|
||||||
|
// CleanAddress cleanAddress = CleanUtil.clear(item);
|
||||||
|
// System.out.println(cleanAddress);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testClear() {
|
public void testClear() {
|
||||||
List<String> textList = LoadUtil.loadList("icbc.dict");
|
List<String> textList = LoadUtil.loadList("icbc.dict");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user