feature(地址清洗)重构

This commit is contained in:
purple 2020-08-05 23:43:47 +08:00
parent daca46924b
commit de0a56880c
6 changed files with 79 additions and 15 deletions

View File

@ -27,4 +27,12 @@ public class AddressContent {
public String getResult() { public String getResult() {
return sb.toString(); return sb.toString();
} }
@Override
public String toString() {
return "AddressContent{" +
"addressType=" + addressType +
", sb=" + sb +
'}';
}
} }

View File

@ -1,5 +1,7 @@
package com.ruoyi.project.tool.address; package com.ruoyi.project.tool.address;
import com.ruoyi.project.tool.address.model.AddressType;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
@ -16,6 +18,7 @@ public class AddressContext {
*/ */
private ArrayList<AddressNode> nodeList = new ArrayList<>(); private ArrayList<AddressNode> nodeList = new ArrayList<>();
private StringBuilder stringBuilder; private StringBuilder stringBuilder;
private Integer startIndex = 0;
public AddressContext(String text) { public AddressContext(String text) {
this.stringBuilder = new StringBuilder(text); this.stringBuilder = new StringBuilder(text);
@ -26,16 +29,30 @@ public class AddressContext {
} }
public void addAddressNode(AddressNode node) { public void addAddressNode(AddressNode node) {
startIndex = node.getEndIndex() - 1;
this.nodeList.add(node); this.nodeList.add(node);
} }
public Integer getStartIndex() {
return startIndex;
}
public StandardAddress getResult() { public StandardAddress getResult() {
StandardAddress standardAddress = new StandardAddress(this.stringBuilder.toString()); StandardAddress standardAddress = new StandardAddress(this.stringBuilder.toString());
// nodeList.forEach(node->{ nodeList.forEach(node -> {
//// AddressContent addressContent = new AddressContent(); AddressContent addressContent = new AddressContent(AddressType.CONDO);
// if (node.dataNode()) {
//// standardAddress.addAddressContent(); addressContent.appendContent(node.getContent());
// }); } else {
if (1 == node.getContentList().size()) {
addressContent.appendContent(node.getContentList().get(0).getContent());
} else {
// 多个先组合再拆分
}
}
standardAddress.addAddressContent(addressContent);
});
return standardAddress; return standardAddress;
} }

View File

@ -1,5 +1,7 @@
package com.ruoyi.project.tool.address; package com.ruoyi.project.tool.address;
import com.ruoyi.common.utils.StringUtils;
import java.util.AbstractList; import java.util.AbstractList;
import java.util.Collection; import java.util.Collection;
import java.util.LinkedList; import java.util.LinkedList;
@ -55,6 +57,15 @@ public class AddressNode {
return content; return content;
} }
/**
* 数据节点
*
* @return
*/
public Boolean dataNode() {
return StringUtils.isNotEmpty(this.content);
}
public void setContent(String content) { public void setContent(String content) {
this.content = content; this.content = content;
} }
@ -62,4 +73,8 @@ public class AddressNode {
public void addChildNode(AddressNode addressNode) { public void addChildNode(AddressNode addressNode) {
contentList.add(addressNode); contentList.add(addressNode);
} }
public List<AddressNode> getContentList() {
return contentList;
}
} }

View File

@ -1,6 +1,5 @@
package com.ruoyi.project.tool.address.service; package com.ruoyi.project.tool.address.service;
import com.sun.xml.internal.fastinfoset.algorithm.BooleanEncodingAlgorithm;
import java.util.List; import java.util.List;

View File

@ -75,7 +75,7 @@ public class AddressNodeParse {
} }
for (Map.Entry<String, String> district : districtMap.entrySet()) { for (Map.Entry<String, String> district : districtMap.entrySet()) {
int index = shanghaiAndDistrict.indexOf(district.getKey()); int index = shanghaiAndDistrict.indexOf(district.getKey(), this.context.getStartIndex());
if (-1 != index) { if (-1 != index) {
AddressNode addressNode = new AddressNode(AddressNodeType.DISTRICT, district.getValue()); AddressNode addressNode = new AddressNode(AddressNodeType.DISTRICT, district.getValue());
addressNode.setStartIndex(index); addressNode.setStartIndex(index);
@ -92,7 +92,7 @@ public class AddressNodeParse {
*/ */
public void parseRegion() { public void parseRegion() {
for (int i = 0; i < regionList.size(); i++) { for (int i = 0; i < regionList.size(); i++) {
int index = this.context.getContent().indexOf(regionList.get(i)); int index = this.context.getContent().indexOf(regionList.get(i), this.context.getStartIndex());
if (-1 != index) { if (-1 != index) {
AddressNode addressNode = new AddressNode(AddressNodeType.REGION, regionList.get(i)); AddressNode addressNode = new AddressNode(AddressNodeType.REGION, regionList.get(i));
addressNode.setStartIndex(index); addressNode.setStartIndex(index);
@ -124,9 +124,8 @@ public class AddressNodeParse {
*/ */
public void parseRoad() { public void parseRoad() {
AddressNode addressNode = new AddressNode(AddressNodeType.ROAD); AddressNode addressNode = new AddressNode(AddressNodeType.ROAD);
for (int i = 0; i < roadList.size(); i++) { for (int i = 0; i < roadList.size(); i++) {
int index = this.context.getContent().indexOf(roadList.get(i)); int index = this.context.getContent().indexOf(roadList.get(i), this.context.getStartIndex());
if (-1 != index) { if (-1 != index) {
if (null == addressNode.getStartIndex()) { if (null == addressNode.getStartIndex()) {
addressNode.setStartIndex(index); addressNode.setStartIndex(index);
@ -171,12 +170,13 @@ public class AddressNodeParse {
Pattern nongPattern = Pattern.compile(DEFAULT_NONG_PATTERN); Pattern nongPattern = Pattern.compile(DEFAULT_NONG_PATTERN);
Matcher matcher = nongPattern.matcher(this.context.getContent()); Matcher matcher = nongPattern.matcher(this.context.getContent());
if (matcher.find()) { if (matcher.find()) {
int index = this.context.getContent().indexOf(matcher.group(0)); int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
if (-1 != index) { if (-1 != index) {
AddressNode addressNode = new AddressNode(AddressNodeType.NONG, matcher.group(0)); AddressNode addressNode = new AddressNode(AddressNodeType.NONG, matcher.group(0));
addressNode.setStartIndex(index); addressNode.setStartIndex(index);
addressNode.setEndIndex(index + matcher.group(0).length() + 1); addressNode.setEndIndex(index + matcher.group(0).length() + 1);
addressNode.addChildNode(addressNode); addressNode.addChildNode(addressNode);
this.context.addAddressNode(addressNode);
} }
} }
} }
@ -188,12 +188,13 @@ public class AddressNodeParse {
Pattern haoPattern = Pattern.compile(DEFAULT_HAO_PATTERN); Pattern haoPattern = Pattern.compile(DEFAULT_HAO_PATTERN);
Matcher matcher = haoPattern.matcher(this.context.getContent()); Matcher matcher = haoPattern.matcher(this.context.getContent());
if (matcher.find()) { if (matcher.find()) {
int index = this.context.getContent().indexOf(matcher.group(0)); int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
if (-1 != index) { if (-1 != index) {
AddressNode addressNode = new AddressNode(AddressNodeType.HAO, matcher.group(0)); AddressNode addressNode = new AddressNode(AddressNodeType.HAO, matcher.group(0));
addressNode.setStartIndex(index); addressNode.setStartIndex(index);
addressNode.setEndIndex(index + matcher.group(0).length() + 1); addressNode.setEndIndex(index + matcher.group(0).length() + 1);
addressNode.addChildNode(addressNode); addressNode.addChildNode(addressNode);
this.context.addAddressNode(addressNode);
} }
} }
} }
@ -205,12 +206,13 @@ public class AddressNodeParse {
Pattern haoPattern = Pattern.compile(DEFAULT_ZHUANG_PATTERN); Pattern haoPattern = Pattern.compile(DEFAULT_ZHUANG_PATTERN);
Matcher matcher = haoPattern.matcher(this.context.getContent()); Matcher matcher = haoPattern.matcher(this.context.getContent());
if (matcher.find()) { if (matcher.find()) {
int index = this.context.getContent().indexOf(matcher.group(0)); int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
if (-1 != index) { if (-1 != index) {
AddressNode addressNode = new AddressNode(AddressNodeType.ZHUANG, matcher.group(0)); AddressNode addressNode = new AddressNode(AddressNodeType.ZHUANG, matcher.group(0));
addressNode.setStartIndex(index); addressNode.setStartIndex(index);
addressNode.setEndIndex(index + matcher.group(0).length() + 1); addressNode.setEndIndex(index + matcher.group(0).length() + 1);
addressNode.addChildNode(addressNode); addressNode.addChildNode(addressNode);
this.context.addAddressNode(addressNode);
} }
} }
} }
@ -222,12 +224,13 @@ public class AddressNodeParse {
Pattern haoPattern = Pattern.compile(DEFAULT_FLOOR_PATTERN); Pattern haoPattern = Pattern.compile(DEFAULT_FLOOR_PATTERN);
Matcher matcher = haoPattern.matcher(this.context.getContent()); Matcher matcher = haoPattern.matcher(this.context.getContent());
if (matcher.find()) { if (matcher.find()) {
int index = this.context.getContent().indexOf(matcher.group(0)); int index = this.context.getContent().indexOf(matcher.group(0), this.context.getStartIndex());
if (-1 != index) { if (-1 != index) {
AddressNode addressNode = new AddressNode(AddressNodeType.CENG, matcher.group(0)); AddressNode addressNode = new AddressNode(AddressNodeType.CENG, matcher.group(0));
addressNode.setStartIndex(index); addressNode.setStartIndex(index);
addressNode.setEndIndex(index + matcher.group(0).length() + 1); addressNode.setEndIndex(index + matcher.group(0).length() + 1);
addressNode.addChildNode(addressNode); addressNode.addChildNode(addressNode);
this.context.addAddressNode(addressNode);
} }
} }
} }
@ -239,12 +242,13 @@ public class AddressNodeParse {
Pattern haoPattern = Pattern.compile(DEFAULT_SHI_PATTERN); Pattern haoPattern = Pattern.compile(DEFAULT_SHI_PATTERN);
Matcher matcher = haoPattern.matcher(this.context.getContent()); Matcher matcher = haoPattern.matcher(this.context.getContent());
if (matcher.find()) { if (matcher.find()) {
int index = this.context.getContent().indexOf(matcher.group(0)); int index = this.context.getContent().indexOf(matcher.group(0),this.context.getStartIndex());
if (-1 != index) { if (-1 != index) {
AddressNode addressNode = new AddressNode(AddressNodeType.SHI, matcher.group(0)); AddressNode addressNode = new AddressNode(AddressNodeType.SHI, matcher.group(0));
addressNode.setStartIndex(index); addressNode.setStartIndex(index);
addressNode.setEndIndex(index + matcher.group(0).length() + 1); addressNode.setEndIndex(index + matcher.group(0).length() + 1);
addressNode.addChildNode(addressNode); addressNode.addChildNode(addressNode);
this.context.addAddressNode(addressNode);
} }
} }
} }

View File

@ -1,8 +1,11 @@
package com.ruoyi; package com.ruoyi;
import com.ruoyi.common.utils.LoadUtil; import com.ruoyi.common.utils.LoadUtil;
import com.ruoyi.project.tool.address.AddressContent;
import com.ruoyi.project.tool.address.CleanUtil; import com.ruoyi.project.tool.address.CleanUtil;
import com.ruoyi.project.tool.address.StandardAddress;
import com.ruoyi.project.tool.address.model.CleanAddress; import com.ruoyi.project.tool.address.model.CleanAddress;
import com.ruoyi.project.tool.address.utils.DefaultAddressBuilder;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
@ -24,6 +27,24 @@ public class CleanUtilTests {
}); });
} }
@Test
public void testClear2() {
//
List<String> textList = LoadUtil.loadList("icbc.dict");
textList.forEach(item -> {
DefaultAddressBuilder defaultAddressBuilder = new DefaultAddressBuilder();
StandardAddress standardAddress = defaultAddressBuilder.clear(item);
List<AddressContent> list = standardAddress.getResult();
list.forEach(x -> {
System.out.println(x);
});
//
// CleanAddress cleanAddress = CleanUtil.clear(item);
// System.out.println(cleanAddress);
});
}
@Test @Test
public void testClear() { public void testClear() {
List<String> textList = LoadUtil.loadList("icbc.dict"); List<String> textList = LoadUtil.loadList("icbc.dict");