فهرست منبع

buyer增量提取固话和手机号

maxiaoshan 4 سال پیش
والد
کامیت
b4ea3233a2
3فایلهای تغییر یافته به همراه122 افزوده شده و 9 حذف شده
  1. 8 8
      standardata/src/config.json
  2. 19 1
      standardata/src/standarbuyer.go
  3. 95 0
      standardata/src/util.go

+ 8 - 8
standardata/src/config.json

@@ -1,14 +1,14 @@
 {
-  "mgofrom": "172.17.4.187:27083",
+  "mgofrom": "192.168.3.207:27092",
   "mgofromsize":5,
-  "mgofromdb":"qfw",
-  "mgoto": "172.17.145.163:27082",
+  "mgofromdb":"mxs",
+  "mgoto": "192.168.3.207:27092",
   "mgotosize":5,
-  "mgotodb":"extract_v3",
-  "mgoent": "172.17.145.163:27082",
+  "mgotodb":"mxs",
+  "mgoent": "192.168.3.207:27092",
   "mgoentsize":5,
-  "mgoentdb":"enterprise",
-  "extractcoll":"result_20200116",
+  "mgoentdb":"mxs",
+  "extractcoll":"test",
   "standardata":{
 	"winner":{
 		"standarent":"winner_enterprisenew",
@@ -16,7 +16,7 @@
 		"redisdb":1
 	},
     "buyer":{
-      "standarent":"buyer_agency_enterprise",
+      "standarent":"buyer_enterprise",
       "standarerr":"buyer_err",
       "redisdb":2
     },

+ 19 - 1
standardata/src/standarbuyer.go

@@ -49,6 +49,15 @@ func buyerStandarData(db string, query map[string]interface{}) {
 				ps = append(ps, v)
 				data := comHisMegerNewData(buyer, "buyer", ps)
 				if data != nil {
+					//提取固话和手机号
+					contactArr := []interface{}{}
+					contactArr = append(contactArr, v)
+					latestFixedPhone, latestMobilePhone, timesFixedPhone, timesMobilePhone := getPhone(contactArr)
+					data["latestfixedphone"] = latestFixedPhone
+					data["latestmobilephone"] = latestMobilePhone
+					data["fixedphone"] = timesFixedPhone
+					data["mobilephone"] = timesMobilePhone
+					data["institute_type"] = "企业"
 					_id := MongoTo.Save(buyerent, data)
 					redis.PutRedis("buyer", buyerbd, buyer, _id.(primitive.ObjectID).Hex(), -1)
 					savetoerr = false
@@ -215,7 +224,7 @@ func buyerStandarHistory(db string) {
 
 //企业数据整合(已有标注信息)
 func buyerMegerBuyerclass(id string, ps map[string]interface{}) map[string]interface{} {
-	tmp := MongoEnt.FindById(buyerent, id, bson.M{"buyerclass": 1})
+	tmp := MongoEnt.FindById(buyerent, id, bson.M{"buyerclass": 1, "contact": 1})
 	if len(tmp) < 1 {
 		return nil
 	}
@@ -233,5 +242,14 @@ func buyerMegerBuyerclass(id string, ps map[string]interface{}) map[string]inter
 	}
 	data["buyerclass"] = newbuyerclass
 	data["updatetime"] = time.Now().Unix()
+	//contact
+	contact := tmp["contact"].(primitive.A)
+	contact = append(contact, ps)
+	//提取固话和手机号
+	latestFixedPhone, latestMobilePhone, timesFixedPhone, timesMobilePhone := getPhone(contact)
+	data["latestfixedphone"] = latestFixedPhone
+	data["latestmobilephone"] = latestMobilePhone
+	data["fixedphone"] = timesFixedPhone
+	data["mobilephone"] = timesMobilePhone
 	return data
 }

+ 95 - 0
standardata/src/util.go

@@ -2,11 +2,31 @@ package main
 
 import (
 	"fmt"
+	qu "qfw/util"
 	"regexp"
 	"strconv"
 	"strings"
 )
 
+var (
+	//固话
+	FixedPhone1 = regexp.MustCompile(`^\d{2,4}-\d{7,8}([-,,、转]\d{3,5}){0,}$`)                     //0411-83622266;020-87258495-306、301
+	FixedPhone2 = regexp.MustCompile(`^[((\[【]{1}\d{2,4}[))\]】]{1}\d{7,8}([-,,、转×]\d{3,5}){0,}$`) //(0411)83622266;(020)87768198-172
+	FixedPhone3 = regexp.MustCompile(`^\d{6,8}([-,,、转×]\d{3,5}){0,}$`)                            //83622266;87768198-818
+	FixedPhone4 = regexp.MustCompile(`^(0)\d{9,12}([-,,、转]\d{3,5}){0,}$`)                         //051082222549;02037619082-805
+	//手机号
+	MobilePhone1 = regexp.MustCompile(`^(1)\d{10}$`)                                    //15136526299
+	MobilePhone2 = regexp.MustCompile(`^[((\[【]{1}\d{2,4}[))\]】]{1}\d{11}$`)            //(0411)15136526299
+	MobilePhone3 = regexp.MustCompile(`^\d{2,4}-\d{11}$`)                               //0771-13878601988
+	MobilePhone4 = regexp.MustCompile(`^(1)\d{2}([\s\\u3000\\u2003\\u00a0]+\d{4}){2}$`) //138 0565 9091
+	//
+	RegAreaCode = regexp.MustCompile(`^\d{2,4}$`) //区号
+	RegSpace    = regexp.MustCompile("[\\s\u3000\u2003\u00a0]+")
+	RegReplace1 = regexp.MustCompile("(-|—|-|―|×){1,}[\\s\u3000\u2003\u00a0]{0,}") //将一些符号替换为-
+	RegReplace2 = regexp.MustCompile("[((\\[【]+")
+	RegReplace3 = regexp.MustCompile("[))\\]】]+")
+	RegSplit    = regexp.MustCompile("[\\s\u3000\u2003\u00a0,,、;/]") //[\\s\u3000\u2003\u00a0,,、;/]
+)
 var (
 	regOperator, _ = regexp.Compile(`[*|+|)*)]`)
 	regNumFloat, _ = regexp.Compile(`([1-9]\d*|0)(\.\d+)?`)
@@ -30,6 +50,7 @@ var (
 	cutAllSpace, _ = regexp.Compile(`\s*`)
 	spaces         = []string{"\u3000", "\u2003", "\u00a0", "\t", "\r", "\n"}
 )
+
 //大写数子金额转换
 func capitalMoney(data []interface{}) []interface{} {
 	nodes := []float64{}
@@ -245,3 +266,77 @@ func replaceSymbol(con string, rep []string) string {
 	return con
 }
 
+//提取固话和手机号
+func getPhone(contactArr []interface{}) (latestFixedPhone, latestMobilePhone, timesFixedPhone, timesMobilePhone string) {
+	// latestFixedPhone, latestMobilePhone := "", ""                       //记录最新抽取的固话和手机号
+	// timesFixedPhone, timesMobilePhone := "", ""                         //记录出现次数最多的固话和手机号
+	timeNumFp, timesNumMp := 0, 0                                       //记录固话和手机号出现最多的次数
+	fixedPhoneMap, mobilePhoneMap := map[string]int{}, map[string]int{} //记录所有固话和手机号出现的次数
+	for i := len(contactArr) - 1; i >= 0; i-- {
+		conMap := contactArr[i].(map[string]interface{})
+		if phone := qu.ObjToString(conMap["phone"]); phone != "" {
+			fixedPhoneArr, mobilePhoneArr, _ := PhoneStandard(phone) //提取固话、手机号
+			for _, fp := range fixedPhoneArr {
+				fixedPhoneMap[fp]++
+				fpTimes := fixedPhoneMap[fp]
+				if fpTimes > timeNumFp {
+					timeNumFp = fpTimes
+					timesFixedPhone = fp
+				}
+				if latestFixedPhone == "" {
+					latestFixedPhone = fp
+				}
+			}
+			for _, mp := range mobilePhoneArr {
+				mobilePhoneMap[mp]++
+				mpTimes := mobilePhoneMap[mp]
+				if mpTimes > timesNumMp {
+					timesNumMp = mpTimes
+					timesMobilePhone = mp
+				}
+				if latestMobilePhone == "" {
+					latestMobilePhone = mp
+				}
+			}
+		}
+	}
+	return
+}
+
+//提取固话、手机号
+func PhoneStandard(text string) (FixedPhone, MobilePhone, Others []string) {
+	defer qu.Catch()
+	text = RegReplace1.ReplaceAllString(text, "-") //替换
+	//特殊处理手机号
+	if mp := MobilePhone4.FindString(text); mp != "" {
+		mp = RegSpace.ReplaceAllString(mp, "")
+		MobilePhone = append(MobilePhone, mp)
+		return
+	}
+	for _, t := range RegSplit.Split(text, -1) {
+		if t != "" {
+			if mp := MobilePhone1.FindString(t); mp != "" { //手机号
+				MobilePhone = append(MobilePhone, mp)
+			} else if mp := MobilePhone2.FindString(t); mp != "" { //手机号
+				mp = RegReplace2.ReplaceAllString(mp, "")
+				mp = RegReplace3.ReplaceAllString(mp, "-")
+				MobilePhone = append(MobilePhone, mp)
+			} else if mp := MobilePhone3.FindString(t); mp != "" { //手机号
+				MobilePhone = append(MobilePhone, mp)
+			} else if fp := FixedPhone3.FindString(t); fp != "" { //固话
+				FixedPhone = append(FixedPhone, fp)
+			} else if fp := FixedPhone2.FindString(t); fp != "" { //固话
+				fp = RegReplace2.ReplaceAllString(fp, "")
+				fp = RegReplace3.ReplaceAllString(fp, "-")
+				FixedPhone = append(FixedPhone, fp)
+			} else if fp := FixedPhone1.FindString(t); fp != "" { //固话
+				FixedPhone = append(FixedPhone, fp)
+			} else if fp := FixedPhone4.FindString(t); fp != "" { //固话
+				FixedPhone = append(FixedPhone, fp)
+			} else { //其他
+				Others = append(Others, t)
+			}
+		}
+	}
+	return
+}