test(地址拆分):地址清洗测试

This commit is contained in:
purple 2020-08-07 18:11:59 +08:00
parent 226041bc05
commit f55c0a77ec
17 changed files with 7445 additions and 2212 deletions

View File

@ -166,7 +166,13 @@ public class ExcelUtil<T> {
Cell cell = heard.getCell(i);
if (StringUtils.isNotNull(cell)) {
String value = this.getCellValue(heard, i).toString();
if (StringUtils.isNotEmpty(value)) {
String noLineValue = value.replace("\r\n", "").replace("\n", "")
.replace("\r", "");
cellMap.put(noLineValue, i);
} else {
cellMap.put(value, i);
}
} else {
cellMap.put(null, i);
}

View File

@ -0,0 +1,46 @@
package com.ruoyi.project.data.basis.domain;
/**
* 成交案例地址
*
* @author lihe
*/
public class ClosingCaseAddress {
private String dealId;
private String address;
private String cleanAddress;
private String tag;
public String getDealId() {
return dealId;
}
public void setDealId(String dealId) {
this.dealId = dealId;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
public String getCleanAddress() {
return cleanAddress;
}
public void setCleanAddress(String cleanAddress) {
this.cleanAddress = cleanAddress;
}
public String getTag() {
return tag;
}
public void setTag(String tag) {
this.tag = tag;
}
}

View File

@ -0,0 +1,39 @@
package com.ruoyi.project.data.basis.mapper;
import com.baomidou.dynamic.datasource.annotation.DS;
import com.ruoyi.project.data.basis.domain.ClosingCaseAddress;
import java.util.List;
/**
* 成交地址Mapper
*
* @author lihe
*/
@DS("teemlink")
public interface ClosingCaseAddressMapper {
/**
* 总数
*
* @return
*/
int selectCount();
/**
* 分页查询
*
* @param offset
* @param limit
* @return
*/
List<ClosingCaseAddress> selectPageList(int offset, int limit);
/**
* 更新
*
* @param closingCaseAddress
* @return
*/
int clear(ClosingCaseAddress closingCaseAddress);
}

View File

@ -3,6 +3,7 @@ package com.ruoyi.project.data.cases.service.impl;
import com.baomidou.dynamic.datasource.annotation.DS;
import com.ruoyi.common.constant.LabelConstants;
import com.ruoyi.common.utils.DateUtils;
import com.ruoyi.common.utils.LoadUtil;
import com.ruoyi.project.data.cases.domain.OriginalNewHouseCase;
import com.ruoyi.project.data.cases.mapper.OriginalNewHouseCaseMapper;
import com.ruoyi.project.data.cases.mapper.sync.DownloadOriginalNewHouseCaseMapper;
@ -127,61 +128,17 @@ public class DownloadOriginalNewHouseCaseServiceImpl {
* @param tableRoute
*/
private void afterFirst(Integer tableRoute) {
//
try {
jdbcTemplate.update("insert into obpm_LianCheng_Data.dbo.TLK_成交案例\n" +
"(\n" +
" ID\n" +
" , ITEM_DEALID\n" +
" , ITEM_SECTOR\n" +
" , ITEM_CIRCLEPOSITION\n" +
" , ITEM_DISTRICT\n" +
" , ITEM_RANAME\n" +
" , ITEM_HOUSEADDRESS\n" +
" , ITEM_AREA\n" +
" , ITEM_SUMPRICE\n" +
" , ITEM_UNITPRICE\n" +
" , ITEM_SIGNINGDATA\n" +
" , ITEM_FLOOR\n" +
" , ITEM_ROOMNATURE\n" +
" , ITEM_APARTMENT\n" +
" , ITEM_DEALTYPE\n" +
" , ITEM_CALCULATIONUNITPRICE\n" +
" , ITEM_CONSULTUNITPRICE\n" +
" , ITEM_CONSULTTOTALPRICE\n" +
" , ITEM_YEARMONTH\n" +
" , ITEM_CALCULATIONTOTALPRICE\n" +
" , ITEM_HOUSETYPE\n" +
" , case_id\n" +
")\n" +
"select newid()\n" +
" , replace(newid(), '-', '')\n" +
" , case_block\n" +
" , case_loop\n" +
" , case_county\n" +
" , case_community_name\n" +
" , case_address\n" +
" , case_area\n" +
" , case_total_price\n" +
" , case_unit_price\n" +
" , case_signing_date\n" +
" , case_floor\n" +
" , case_house_property\n" +
" , case_apartment_layout\n" +
" , '一手'\n" +
" , compute_unit_price\n" +
" , reference_unit_price\n" +
" , reference_total_price\n" +
" , '" + (tableRoute / 100) + "-" + String.format("%02d", (tableRoute % 100)) + "'\n" +
" , compute_total_price\n" +
" , case_house_type\n" +
" , case_id\n" +
"from uv_compute.dbo.original_new_house_case_" + tableRoute + ";");
String rawSql = LoadUtil.loadContent("sql-template/copy_yishou_data.sql");
String yearMonth = String.format("%d-%02d", tableRoute / 100, tableRoute % 100);
String sql = rawSql
.replace("#tableRoute#", tableRoute.toString())
.replace("#yearMonth#", yearMonth);
jdbcTemplate.update(sql);
} catch (Exception e) {
logger.error("第一批次案例推送成交案例库异常", e);
e.printStackTrace();
}
}
/**
@ -192,60 +149,15 @@ public class DownloadOriginalNewHouseCaseServiceImpl {
private void afterSecond(Integer tableRoute) {
//
try {
jdbcTemplate.update("insert into obpm_LianCheng_Data.dbo.TLK_成交案例\n" +
"(\n" +
" ID\n" +
" , ITEM_DEALID\n" +
" , ITEM_SECTOR\n" +
" , ITEM_CIRCLEPOSITION\n" +
" , ITEM_DISTRICT\n" +
" , ITEM_RANAME\n" +
" , ITEM_HOUSEADDRESS\n" +
" , ITEM_AREA\n" +
" , ITEM_SUMPRICE\n" +
" , ITEM_UNITPRICE\n" +
" , ITEM_SIGNINGDATA\n" +
" , ITEM_FLOOR\n" +
" , ITEM_ROOMNATURE\n" +
" , ITEM_APARTMENT\n" +
" , ITEM_DEALTYPE\n" +
" , ITEM_CALCULATIONUNITPRICE\n" +
" , ITEM_CONSULTUNITPRICE\n" +
" , ITEM_CONSULTTOTALPRICE\n" +
" , ITEM_YEARMONTH\n" +
" , ITEM_CALCULATIONTOTALPRICE\n" +
" , ITEM_HOUSETYPE\n" +
" , case_id\n" +
")\n" +
"select newid()\n" +
" , replace(newid(), '-', '')\n" +
" , case_block\n" +
" , case_loop\n" +
" , case_county\n" +
" , case_community_name\n" +
" , case_address\n" +
" , case_area\n" +
" , case_total_price\n" +
" , case_unit_price\n" +
" , case_signing_date\n" +
" , case_floor\n" +
" , case_house_property\n" +
" , case_apartment_layout\n" +
" , '一手'\n" +
" , compute_unit_price\n" +
" , reference_unit_price\n" +
" , reference_total_price\n" +
" , '" + (tableRoute / 100) + "-" + String.format("%02d", (tableRoute % 100)) + "'\n" +
" , compute_total_price\n" +
" , case_house_type\n" +
" , case_id\n" +
"from uv_compute.dbo.original_new_house_case_" + tableRoute + "_update;");
String rawSql = LoadUtil.loadContent("sql-template/copy_yishou_data.sql");
String yearMonth = String.format("%d-%02d", tableRoute / 100, tableRoute % 100);
String sql = rawSql
.replace("#tableRoute#", tableRoute.toString() + "_update")
.replace("#yearMonth#", yearMonth);
jdbcTemplate.update(sql);
} catch (Exception e) {
logger.error("第二批次案例推送成交案例库异常", e);
e.printStackTrace();
}
}
}

View File

@ -1,7 +1,7 @@
package com.ruoyi.project.tool.address;
import com.ruoyi.project.tool.address.model.AdrNode;
import com.ruoyi.project.tool.address.utils.ParseContext;
import com.ruoyi.project.tool.address.parse.ParseContext;
import java.util.LinkedList;
import java.util.List;

View File

@ -1,4 +1,4 @@
package com.ruoyi.project.tool.address.utils;
package com.ruoyi.project.tool.address.parse;
import com.ruoyi.project.tool.address.model.AdrNode;

View File

@ -1,7 +1,6 @@
package com.ruoyi.project.tool.address.parse;
import com.ruoyi.common.utils.LoadUtil;
import com.ruoyi.project.tool.address.utils.ParseContext;
import com.ruoyi.project.tool.address.model.RoadNode;
import java.util.List;

View File

@ -1,6 +1,5 @@
package com.ruoyi.project.tool.address.parse;
import com.ruoyi.project.tool.address.utils.ParseContext;
import com.ruoyi.project.tool.address.model.*;
import java.util.regex.Matcher;
@ -104,7 +103,7 @@ public class YParse {
private void parseHao() {
// 连号
String text = this.context.getContent().substring(this.context.getStartIndex());
String text = this.context.getContent().substring(this.context.getStartIndex()).replace("(临时)", "");
Pattern lianHaoPattern = Pattern.compile(LIAN_HAO_PATTERN);
Matcher matcher = lianHaoPattern.matcher(text);
if (matcher.find()) {

View File

@ -2,7 +2,6 @@ package com.ruoyi.project.tool.address.parse;
import com.ruoyi.common.utils.StringUtils;
import com.ruoyi.project.tool.address.model.*;
import com.ruoyi.project.tool.address.utils.ParseContext;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -18,6 +17,7 @@ import java.util.regex.Pattern;
*/
public class ZParse {
private static final String FU_SHI = "复式";
private static final String SHI_JI = "室及";
private static final String NUMBER_PATTERN = "\\d+";
private static final String LIAN_SHI_PATTERN = "(\\d+)[、\\.\\-\\_](\\d)(室)?$";
private static final String DEFAULT_SHI_PATTERN = "[\\dA-Za-z]+室";
@ -118,6 +118,12 @@ public class ZParse {
private void parseShi() {
String text = this.context.getContent().substring(this.startIndex);
if (this.isPark && !text.contains(SHI_JI)) {
// 表示该地址是车位无需处理室号
return;
}
// 复式
if (-1 != text.indexOf(FU_SHI)) {
parseFUSHI();

View File

@ -1,6 +1,7 @@
package com.ruoyi.project.tool.address.utils;
import com.ruoyi.project.tool.address.AddressResult;
import com.ruoyi.project.tool.address.parse.ParseContext;
import com.ruoyi.project.tool.address.parse.XParse;
/**

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper
PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.ruoyi.project.data.basis.mapper.ClosingCaseAddressMapper">
<select id="selectCount" resultType="int">
select count(1) from closing_case;
</select>
<select id="selectPageList" resultType="com.ruoyi.project.data.basis.domain.ClosingCaseAddress">
select
item_dealid as dealid,
ITEM_HOUSEADDRESS as address,
clean_address as cleanAddress,
tag as tag
from closing_case order by item_dealid offset #{offset} ROW FETCH NEXT #{limit}
ROWS ONLY;
</select>
<update id="clear" parameterType="com.ruoyi.project.data.basis.domain.ClosingCaseAddress">
update closing_case set clean_address=#{cleanAddress},tag=#{tag} where ITEM_DEALID=#{dealId};
</update>
</mapper>

View File

@ -0,0 +1,49 @@
delete from obpm_LianCheng_Data.dbo.TLK_成交案例 where ITEM_YEARMONTH='#yearMonth#' and ITEM_DEALTYPE='一手';
declare @count int
SELECT @count=count(1) from TLK_成交案例;
-- 居住类型
insert into obpm_LianCheng_Data.dbo.TLK_成交案例(LASTMODIFIED, FORMNAME, AUTHOR, AUTHOR_DEPT_INDEX, CREATED, FORMID, ISTMP, VERSIONS, APPLICATIONID, STATEINT, LASTMODIFIER, DOMAINID, AUDITORLIST, COAUDITORLIST,
ID,ITEM_DEALID,ITEM_SECTOR,ITEM_CIRCLEPOSITION,ITEM_DISTRICT,ITEM_RANAME,ITEM_HOUSEADDRESS,ITEM_AREA,ITEM_SUMPRICE,ITEM_UNITPRICE,ITEM_SIGNINGDATA,ITEM_FLOOR,ITEM_ROOMNATURE,ITEM_APARTMENT,ITEM_DEALTYPE,ITEM_CALCULATIONUNITPRICE,ITEM_CONSULTUNITPRICE,ITEM_CONSULTTOTALPRICE,ITEM_YEARMONTH,ITEM_CALCULATIONTOTALPRICE,ITEM_HOUSETYPE,case_id)
select getdate(), N'物业信息管理/成交案例', N'PNDbyK6lSFWFGlLJFXj', 'Os6qe4Pmq5viTO8lTnW_3QBGkL7jqDjm2BRk7uJ', getdate(),
N'gTn1hQuxFRdwLakei6q', 0, 1, N'Ts7TykYmuEzzZgWhXHj', 0, N'PNDbyK6lSFWFGlLJFXj', N'BclzHtmfLQoAA5ICTb5', N'{}',
N'{}', case_id,'a'+cast( (ROW_NUMBER() over(order by case_id) + @count) as nvarchar(20)),case_block,case_loop,case_county,
case_community_name,case_address,case_area,case_total_price,case_unit_price,case_signing_date,case_floor,case_house_property,case_apartment_layout,'一手',compute_unit_price,reference_unit_price,reference_total_price,'#yearMonth#',compute_total_price,case_house_type,case_id
from uv_compute.dbo.original_new_house_case_#tableRoute#
insert into obpm_LianCheng_Data.dbo.T_DOCUMENT
(
ID
, LASTMODIFIED
, FORMNAME
, AUTHOR
, AUTHOR_DEPT_INDEX
, CREATED
, FORMID
, ISTMP
, VERSIONS
, APPLICATIONID
, LASTMODIFIER
, DOMAINID
, AUDITORLIST
, COAUDITORLIST
, MAPPINGID
)
SELECT ID
, LASTMODIFIED
, FORMNAME
, AUTHOR
, AUTHOR_DEPT_INDEX
, CREATED
, FORMID
, ISTMP
, VERSIONS
, APPLICATIONID
, LASTMODIFIER
, DOMAINID
, AUDITORLIST
, COAUDITORLIST
, ID
FROM obpm_LianCheng_Data.dbo.TLK_成交案例
where ITEM_YEARMONTH='#yearMonth#' and ITEM_DEALTYPE='一手';

View File

@ -0,0 +1,69 @@
package com.ruoyi;
import com.baomidou.dynamic.datasource.annotation.DS;
import com.ruoyi.common.utils.LoadUtil;
import com.ruoyi.project.data.basis.domain.ClosingCaseAddress;
import com.ruoyi.project.data.basis.mapper.ClosingCaseAddressMapper;
import com.ruoyi.project.tool.address.AddressResult;
import com.ruoyi.project.tool.address.utils.AddressUtil;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.test.context.ActiveProfiles;
import org.springframework.test.context.junit4.SpringRunner;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.Base64;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
@RunWith(SpringRunner.class)
@SpringBootTest(classes = RuoYiApplication.class)
@ActiveProfiles("uat")
public class AddressUtilDBTests {
@Autowired
private ClosingCaseAddressMapper mapper;
/**
* 地址清洗测试
*/
@Test
public void testClear() {
// 5000
Integer total = mapper.selectCount();
int div = total / 5000;
if (0 != total % 5000) {
div += 1;
}
Arrays.stream(IntStream.range(0, div).toArray()).parallel().forEach(pageIndex -> {
System.out.println(pageIndex);
List<ClosingCaseAddress> list = mapper.selectPageList(pageIndex * 5000, 5000);
list.forEach(item -> {
try {
AddressResult addressResult = AddressUtil.clear(item.getAddress());
if (null == addressResult.getCleanAddress()) {
return;
}
String address = String.join(",", addressResult.getCleanAddress());
item.setCleanAddress(address);
mapper.clear(item);
} catch (Exception e) {
System.out.println(item.getAddress());
e.printStackTrace();
}
});
});
}
}

View File

@ -102,6 +102,48 @@ public class AddressUtilTests {
}
Assert.assertEquals("北华路168弄35号1002室", addressResult.getCleanAddress().get(0));
Assert.assertEquals(addressResult.getDistrict(), "闵行");
addressResult = AddressUtil.clear("西藏北路1308弄8号地下1层车位(人防)27室");
if (null == addressResult.getCleanAddress()) {
return;
}
Assert.assertEquals("西藏北路1308弄8号", addressResult.getCleanAddress().get(0));
Assert.assertEquals(addressResult.getDistrict(), null);
}
/**
* 号号楼
*/
@Test
public void testHaoLou() {
AddressResult addressResult = AddressUtil.clear("四平路283号6号楼2802室");
if (null == addressResult.getCleanAddress()) {
return;
}
Assert.assertEquals("四平路283号6号2802室", addressResult.getCleanAddress().get(0));
}
/**
* 复式
*/
@Test
public void testFUSHI() {
AddressResult addressResult = AddressUtil.clear("硕望路188弄21号101(复式)");
if (null == addressResult.getCleanAddress()) {
return;
}
Assert.assertEquals("硕望路188弄21号101室", addressResult.getCleanAddress().get(0));
Assert.assertEquals(addressResult.getDistrict(), null);
}
@Test
public void testZhenJieFang() {
AddressResult addressResult = AddressUtil.clear("南翔镇7街坊255/2丘19幢1单元1203室");
if (null == addressResult.getCleanAddress()) {
return;
}
Assert.assertFalse(false);
}
/**
@ -144,10 +186,10 @@ public class AddressUtilTests {
@Test
public void printStreet() {
public void printStreetOrRoad() {
List<String> textList = LoadUtil.loadList("jie.dict");
textList.forEach(item -> {
if (!item.contains("") && item.contains("") && !item.contains("街坊")) {
if ((item.contains("") || item.contains("")) && !item.contains("街坊")) {
System.out.println(item);
}
});

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff