|
@@ -1,43 +1,19 @@
|
|
|
package main
|
|
|
|
|
|
import (
|
|
|
+ "github.com/donnie4w/go-logger/logger"
|
|
|
+ "go.mongodb.org/mongo-driver/bson/primitive"
|
|
|
+ "gopkg.in/mgo.v2/bson"
|
|
|
"log"
|
|
|
qu "qfw/util"
|
|
|
"regexp"
|
|
|
- "strconv"
|
|
|
"strings"
|
|
|
"time"
|
|
|
- "util/mgodb"
|
|
|
-
|
|
|
- "github.com/donnie4w/go-logger/logger"
|
|
|
- "go.mongodb.org/mongo-driver/bson/primitive"
|
|
|
- bson "gopkg.in/mgo.v2/bson"
|
|
|
)
|
|
|
|
|
|
var LetterCase = regexp.MustCompile("[A-Za-z]")
|
|
|
var LetterCase2 = regexp.MustCompile("[A-Za-z0-9]")
|
|
|
var FilteReg = regexp.MustCompile("[()(){}]*")
|
|
|
-var (
|
|
|
- regNumFloat, _ = regexp.Compile(`([1-9]\d*|0)(\.\d+)?`)
|
|
|
- regStrUnit, _ = regexp.Compile(`[元|万|亿]`)
|
|
|
- regStrChar = `[〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]`
|
|
|
- moneyRegChar, _ = regexp.Compile(regStrChar)
|
|
|
-
|
|
|
- contentUnit, _ = regexp.Compile(`(万元|单位/万)`)
|
|
|
- numCapitals, _ = regexp.Compile(`([〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]{4,40})`)
|
|
|
- regQianw, _ = regexp.Compile(`\d{1,2}千万`)
|
|
|
- moneyChar = map[string]interface{}{ //"〇": "0", "零": "0",
|
|
|
- "一": float64(1), "壹": float64(1), "二": float64(2), "贰": float64(2), "三": float64(3), "叁": float64(3), "四": float64(4), "肆": float64(4), "五": float64(5), "伍": float64(5),
|
|
|
- "六": float64(6), "陆": float64(6), "七": float64(7), "柒": float64(7), "八": float64(8), "捌": float64(8), "九": float64(9), "玖": float64(9), "十": float64(10), "拾": float64(10),
|
|
|
- "百": float64(100), "佰": float64(100), "千": float64(1000), "仟": float64(1000), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000),
|
|
|
- "零": float64(0), "点": ".", "角": float64(0.1), "分": float64(0.01),
|
|
|
- }
|
|
|
- moneyUnit = map[string]float64{
|
|
|
- "元": float64(1), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000), //单位
|
|
|
- }
|
|
|
- cutAllSpace, _ = regexp.Compile(`\s*`)
|
|
|
- spaces = []string{"\u3000", "\u2003", "\u00a0", "\t", "\r", "\n"}
|
|
|
-)
|
|
|
|
|
|
//匹配方式map
|
|
|
var MatchType = map[string]interface{}{
|
|
@@ -220,7 +196,6 @@ func GetIdRange() (bson.M, bool) {
|
|
|
esquery := `{"query": {"bool": {"must": [{"range": {"id": {"gt": "` + LatestId + `" , "lte": "` + endId + `"}}}]}}, "sort": [{"comeintime": "desc"}]}`
|
|
|
if Es.Count(Index, Itype, esquery) > 0 { //有数据返回id区间
|
|
|
list := Es.Get(Index, Itype, esquery)
|
|
|
- endId = qu.ObjToString((*list)[0]["_id"])
|
|
|
tmpRange := bson.M{
|
|
|
"range": bson.M{
|
|
|
"id": bson.M{
|
|
@@ -229,7 +204,7 @@ func GetIdRange() (bson.M, bool) {
|
|
|
},
|
|
|
},
|
|
|
}
|
|
|
- LatestId = endId
|
|
|
+ LatestId = qu.ObjToString((*list)[0]["_id"])
|
|
|
return tmpRange, true
|
|
|
}
|
|
|
} else { //结束id不大于起始id 退出
|
|
@@ -248,7 +223,7 @@ func GetIdRange() (bson.M, bool) {
|
|
|
|
|
|
// query := bson.M{
|
|
|
// "_id": bson.M{
|
|
|
- // "$gt": mgodb.StringTOBsonId(LatestId),
|
|
|
+ // "$gt": mongodb.StringTOBsonId(LatestId),
|
|
|
// },
|
|
|
// }
|
|
|
// sort := bson.M{
|
|
@@ -260,7 +235,7 @@ func GetIdRange() (bson.M, bool) {
|
|
|
// //查抽取表最后一个id
|
|
|
// extData, err := MgoExt.FindByLimit(ExtColl, query, sort, fields, 0, 1)
|
|
|
// if len(extData) == 1 && err == nil {
|
|
|
- // endId := mgodb.BsonTOStringId(extData[0]["_id"])
|
|
|
+ // endId := mongodb.BsonTOStringId(extData[0]["_id"])
|
|
|
// if endId > LatestId {
|
|
|
// tmpRange := bson.M{
|
|
|
// "range": bson.M{
|
|
@@ -412,6 +387,10 @@ func MergeData(history, tmp map[string]interface{}, isTagRule, isDepartRmvRep bo
|
|
|
ruleid2 := qu.ObjToString(tmp["ruleid"])
|
|
|
history["ruleid"] = MergeField(ruleid1, ruleid2)
|
|
|
|
|
|
+ rulename1 := qu.ObjToString(history["rulename"])
|
|
|
+ rulename2 := qu.ObjToString(tmp["rulename"])
|
|
|
+ history["rulename"] = MergeField(rulename1, rulename2)
|
|
|
+
|
|
|
if isTagRule { //标签模式 tagname、tagid合并
|
|
|
tagname1 := qu.ObjToString(history["tagname"])
|
|
|
tagname2 := qu.ObjToString(tmp["tagname"])
|
|
@@ -429,6 +408,9 @@ func MergeData(history, tmp map[string]interface{}, isTagRule, isDepartRmvRep bo
|
|
|
departname1 := qu.ObjToString(history["departname"])
|
|
|
departname2 := qu.ObjToString(tmp["departname"])
|
|
|
history["departname"] = MergeField(departname1, departname2)
|
|
|
+ departid1 := qu.ObjToString(history["departid"])
|
|
|
+ departid2 := qu.ObjToString(tmp["departid"])
|
|
|
+ history["departid"] = MergeField(departid1, departid2)
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -563,218 +545,6 @@ func SearchEnterpriseInfo(tmp map[string]interface{}) {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-//金额转换
|
|
|
-func ObjToMoney(text string) float64 {
|
|
|
- isfindUnit := true
|
|
|
- ret := capitalMoney(text)
|
|
|
- if ret < float64(10000) || ret > float64(50000000000) {
|
|
|
- ret2, b := numMoney(text)
|
|
|
- isfindUnit = b
|
|
|
- if ret2 > ret {
|
|
|
- ret = ret2
|
|
|
- }
|
|
|
- }
|
|
|
- f, _ := strconv.ParseFloat(strconv.FormatFloat(ret, 'f', 4, 64), 64)
|
|
|
- if f < 1 {
|
|
|
- f = 0
|
|
|
- }
|
|
|
- //如果金额小于50,全文检索单位:万
|
|
|
- if f < 50 && f > 0 && isfindUnit {
|
|
|
- rep := contentUnit.FindAllStringIndex(text, -1)
|
|
|
- if len(rep) > 0 {
|
|
|
- f = f * 10000
|
|
|
- }
|
|
|
- }
|
|
|
- return f
|
|
|
-}
|
|
|
-func capitalMoney(text string) float64 {
|
|
|
- nodes := []float64{}
|
|
|
- node := float64(0)
|
|
|
- tmp := float64(0)
|
|
|
- decimals := 0.0
|
|
|
- ishaspoint := false //是否含小数点
|
|
|
- fnum := float64(0)
|
|
|
- end := false
|
|
|
- //str := fmt.Sprint(data[0])
|
|
|
- //提取第一个大写信息
|
|
|
- strmatch := numCapitals.FindAllStringSubmatch(text, -1)
|
|
|
- if len(strmatch) > 0 {
|
|
|
- text = strmatch[0][0]
|
|
|
- }
|
|
|
- suffixUnit := float64(1)
|
|
|
- if strings.HasSuffix(text, "万") || strings.HasSuffix(text, "万元") || strings.HasSuffix(text, "万元整") {
|
|
|
- index := strings.LastIndex(text, "万")
|
|
|
- text = text[0:index]
|
|
|
- suffixUnit = float64(10000)
|
|
|
- }
|
|
|
- moneyRegChar.ReplaceAllStringFunc(text, func(key string) string {
|
|
|
- if key == "元" || key == "圆" || key == "点" {
|
|
|
- ishaspoint = true
|
|
|
- }
|
|
|
- if v, ok := moneyChar[key].(float64); ok && !end {
|
|
|
- if ishaspoint && v > 10 { //排除后面有其他的单位
|
|
|
- return ""
|
|
|
- }
|
|
|
- //fmt.Println(key, v, fnum)
|
|
|
- if v < 10 && v >= 0 {
|
|
|
- if ishaspoint { //小数部分
|
|
|
- if v >= 1 {
|
|
|
- fnum = v
|
|
|
- } else if v < 1 && v > 0 {
|
|
|
- decimals += fnum * v
|
|
|
- }
|
|
|
- } else {
|
|
|
- if tmp != float64(0) {
|
|
|
- node += tmp
|
|
|
- }
|
|
|
- tmp = float64(v)
|
|
|
- }
|
|
|
- } else if v == 10000 || v == 100000000 { //单位万、亿
|
|
|
- if tmp != float64(0) {
|
|
|
- node += tmp
|
|
|
- tmp = float64(0)
|
|
|
- }
|
|
|
- nodes = append(nodes, node*float64(v))
|
|
|
- node = float64(0)
|
|
|
- } else {
|
|
|
- if v == 10 && tmp == 0 {
|
|
|
- tmp = 1
|
|
|
- }
|
|
|
- tmp = tmp * float64(v)
|
|
|
- node += tmp
|
|
|
- tmp = float64(0)
|
|
|
- }
|
|
|
- }
|
|
|
- if key == "整" || key == "正" || key == "分" {
|
|
|
- end = true
|
|
|
- }
|
|
|
- return ""
|
|
|
- })
|
|
|
- nodes = append(nodes, node, tmp)
|
|
|
- ret := float64(0)
|
|
|
- for _, v := range nodes {
|
|
|
- ret += v
|
|
|
- }
|
|
|
- return (ret + decimals) * suffixUnit
|
|
|
-}
|
|
|
-
|
|
|
-//数字金额转换
|
|
|
-func numMoney(text string) (moneyFloat float64, flag bool) {
|
|
|
- //tmp := fmt.Sprintf("%f", data[0])
|
|
|
- repUnit := float64(1)
|
|
|
- if regQianw.MatchString(text) {
|
|
|
- text = strings.Replace(text, "千万", "万", -1)
|
|
|
- repUnit = float64(1000)
|
|
|
- }
|
|
|
- text = replaceSymbol(text, []string{",", ",", "(", ")", "(", ")", ":", "\n"})
|
|
|
- text = replaceString(text, []string{"万元", "亿元", "."}, []string{"万", "亿", "."})
|
|
|
- text = CutAllSpace(text)
|
|
|
- rets := regNumFloat.FindAllString(text, -1)
|
|
|
- fnums := []float64{}
|
|
|
- unitstrs := []string{}
|
|
|
- if len(rets) > 0 {
|
|
|
- pindex := 0 //单位前置
|
|
|
- for k, v := range rets {
|
|
|
- f, err := strconv.ParseFloat(v, 64)
|
|
|
- if err == nil {
|
|
|
- fnums = append(fnums, f)
|
|
|
- index := strings.Index(text, v)
|
|
|
- //单位后置
|
|
|
- start := index + len(v)
|
|
|
- end := start + 3
|
|
|
- //log.Println("vvv", tmp, v, pindex, index, start)
|
|
|
- if k > 0 {
|
|
|
- if start >= pindex+3 {
|
|
|
- pstart := pindex + 3
|
|
|
- if pstart >= index {
|
|
|
- pstart = index
|
|
|
- }
|
|
|
- if len(text) > end {
|
|
|
- unitstrs = append(unitstrs, text[pstart:index]+text[start:end])
|
|
|
- } else {
|
|
|
- unitstrs = append(unitstrs, text[pstart:index]+text[start:])
|
|
|
- }
|
|
|
- } else {
|
|
|
- if len(text) > end {
|
|
|
- unitstrs = append(unitstrs, text[start:end])
|
|
|
- } else {
|
|
|
- unitstrs = append(unitstrs, text[start:])
|
|
|
- }
|
|
|
- }
|
|
|
- } else {
|
|
|
- if len(text) > end {
|
|
|
- if index-3 >= 0 {
|
|
|
- unitstrs = append(unitstrs, text[index-3:index]+text[start:end])
|
|
|
- } else {
|
|
|
- unitstrs = append(unitstrs, text[start:end])
|
|
|
- }
|
|
|
- } else {
|
|
|
- if index-3 >= 0 {
|
|
|
- unitstrs = append(unitstrs, text[index-3:index]+text[start:])
|
|
|
- } else {
|
|
|
- unitstrs = append(unitstrs, text[start:])
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- pindex = start
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- //log.Println("unitstrs", fnums, unitstrs)
|
|
|
- unit := float64(0)
|
|
|
- fnum := float64(0)
|
|
|
- for k, v := range fnums {
|
|
|
- fnum = v
|
|
|
- units := regStrUnit.FindAllString(unitstrs[k], -1)
|
|
|
- for _, v := range units {
|
|
|
- if moneyUnit[v] != 0 {
|
|
|
- unit = moneyUnit[v]
|
|
|
- break
|
|
|
- }
|
|
|
- }
|
|
|
- if unit != float64(0) { //取第一个
|
|
|
- break
|
|
|
- }
|
|
|
- }
|
|
|
- fnum = fnum * repUnit
|
|
|
- if unit == float64(0) {
|
|
|
- moneyFloat = fnum
|
|
|
- } else {
|
|
|
- moneyFloat = fnum * unit
|
|
|
- }
|
|
|
- if unit == 10000 {
|
|
|
- flag = false
|
|
|
- } else {
|
|
|
- flag = true
|
|
|
- }
|
|
|
- return
|
|
|
-}
|
|
|
-
|
|
|
-//清理所有空白符
|
|
|
-func CutAllSpace(text string) string {
|
|
|
- tmp := cutAllSpace.ReplaceAllString(text, "")
|
|
|
- tmp = replaceSymbol(tmp, spaces)
|
|
|
- return tmp
|
|
|
-}
|
|
|
-
|
|
|
-//符号替换
|
|
|
-func replaceString(con string, ret, rep []string) string {
|
|
|
- for k, v := range ret {
|
|
|
- if len(rep) > k {
|
|
|
- con = strings.Replace(con, v, rep[k], -1)
|
|
|
- }
|
|
|
- }
|
|
|
- return con
|
|
|
-}
|
|
|
-
|
|
|
-//过滤符号
|
|
|
-func replaceSymbol(con string, rep []string) string {
|
|
|
- for _, v := range rep {
|
|
|
- con = strings.Replace(con, v, "", -1)
|
|
|
- }
|
|
|
- return con
|
|
|
-}
|
|
|
-
|
|
|
//数据存库
|
|
|
func SaveMgo() {
|
|
|
log.Println("Mgo Save...")
|
|
@@ -897,124 +667,6 @@ func PrintLog(cus *Customer) {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-//查mongo程序测试
|
|
|
-func MgoDataTest(sr *SearchRule, dm *Department, c *Customer) {
|
|
|
- qu.Debug("开始数据测试...")
|
|
|
- data, _ := MgoTag.Find("test", nil, nil, nil)
|
|
|
- for _, tmp := range data {
|
|
|
- id := mgodb.BsonTOStringId(tmp["_id"])
|
|
|
- matchKey := map[string]bool{} //记录所有匹配上的关键词
|
|
|
- matchKeyType := map[string]bool{} //记录关键词对应的匹配方式
|
|
|
- //先获取用到的所有字段值
|
|
|
- fieldText := map[string]interface{}{}
|
|
|
- for field, _ := range sr.Fields {
|
|
|
- text := qu.ObjToString(tmp[field])
|
|
|
- text = ProcessData(text) //处理文本(字母转大写,删除一些符号)
|
|
|
- fieldText[field] = text
|
|
|
- }
|
|
|
- //清理词清理
|
|
|
- for _, cwm := range sr.GCW.MatchType {
|
|
|
- if text := qu.ObjToString(fieldText[cwm]); text != "" {
|
|
|
- for _, gcw_reg := range sr.GCW.KeyReg {
|
|
|
- text = gcw_reg.ReplaceAllString(text, "")
|
|
|
- }
|
|
|
- fieldText[cwm] = text
|
|
|
- }
|
|
|
- }
|
|
|
- /*
|
|
|
- 因为要记录所有匹配上的关键词,所有优先匹配附加词,在匹配关键词
|
|
|
- */
|
|
|
- //1.附加词匹配
|
|
|
- IsMatch := false
|
|
|
- for i, aw := range sr.AW {
|
|
|
- qu.Debug("-------------------------开始附加词匹配--------------------------")
|
|
|
- IsMatchAddKey := RegMatchTest(fieldText, aw.MatchType, aw.KeyReg, nil, nil, false, true)
|
|
|
- qu.Debug(IsMatchAddKey, "------------------------------------------------------------")
|
|
|
-
|
|
|
- //2.关键词匹配
|
|
|
- if IsMatchAddKey {
|
|
|
- kw := sr.KW[i]
|
|
|
- qu.Debug("-------------------------开始关键词匹配--------------------------")
|
|
|
- IsMatchKey := RegMatchTest(fieldText, kw.MatchType, kw.KeyReg, matchKey, matchKeyType, true, false)
|
|
|
- qu.Debug(IsMatchKey, "------------------------------------------------------------")
|
|
|
- if IsMatchKey {
|
|
|
- IsMatch = true
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- qu.Debug(IsMatch, matchKey)
|
|
|
- if IsMatch { //匹配成功,数据上新增规则id,matchKey,item并临时保存数据
|
|
|
- tmpMatchKey := MapDataToArr(matchKey)
|
|
|
- tmpMatchKeyType := MapDataToArr(matchKeyType)
|
|
|
- tmp["matchkey"] = strings.Join(tmpMatchKey, ",")
|
|
|
- tmp["matchtype"] = strings.Join(tmpMatchKeyType, ",")
|
|
|
- tmp["ruleid"] = sr.ID
|
|
|
- //item
|
|
|
- switch c.PushModel {
|
|
|
- case 0:
|
|
|
- tmp["item"] = "数据"
|
|
|
- case 1:
|
|
|
- tmp["item"] = dm.Name
|
|
|
- case 2:
|
|
|
- tmp["item"] = sr.Name
|
|
|
- case 3:
|
|
|
- tmp["item"] = dm.Name + "_" + sr.Name
|
|
|
- case 4:
|
|
|
- tmp["item"] = sr.ID
|
|
|
- }
|
|
|
- //开始打标签
|
|
|
- qu.Debug("++++++++++++++++++++++++++++开始打标签+++++++++++++++++++++++++++++++")
|
|
|
- if c.IsTagRule {
|
|
|
- tagNameMap := map[string]bool{}
|
|
|
- tagIdMap := map[string]bool{}
|
|
|
- qu.Debug("c.TagRules---", len(c.TagRules))
|
|
|
- for _, tr := range c.TagRules {
|
|
|
- if tr.DepartRuleIds[sr.ID] {
|
|
|
- //先获取用到的所有字段值
|
|
|
- for field, _ := range tr.Fields {
|
|
|
- if fieldText[field] == nil { //补充fieldText
|
|
|
- text := qu.ObjToString(tmp[field])
|
|
|
- text = ProcessData(text) //处理文本(字母转大写,删除一些符号)
|
|
|
- fieldText[field] = text
|
|
|
- }
|
|
|
- }
|
|
|
- qu.Debug("-------------------------开始排除词匹配--------------------------")
|
|
|
- qu.Debug("tr.NW---", len(tr.NW))
|
|
|
- for j, tag_nw := range tr.NW { //排除词匹配
|
|
|
- IsMatchNotKey := RegMatchTest(fieldText, tag_nw.MatchType, tag_nw.KeyReg, nil, nil, false, false)
|
|
|
- qu.Debug(IsMatchNotKey, "------------------------------------------------------------")
|
|
|
- if !IsMatchNotKey { //排除词未匹配,匹配附加词关键词
|
|
|
- if RegMatchTest(fieldText, tr.AW[j].MatchType, tr.AW[j].KeyReg, nil, nil, false, true) && RegMatchTest(fieldText, tr.KW[j].MatchType, tr.KW[j].KeyReg, nil, nil, false, false) {
|
|
|
- tagname := tr.TagNames[j]
|
|
|
- qu.Debug("tagname-----", tagname)
|
|
|
- tagNameMap[tagname] = true
|
|
|
- tagIdMap[tr.ID] = true
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- //tagname
|
|
|
- tagNameArr := MapDataToArr(tagNameMap)
|
|
|
- tagIDArr := MapDataToArr(tagIdMap)
|
|
|
- if len(tagNameArr) > 0 {
|
|
|
- tmp["tagname"] = strings.Join(tagNameArr, ",")
|
|
|
- tmp["tagid"] = strings.Join(tagIDArr, ",")
|
|
|
- }
|
|
|
- }
|
|
|
- //appid
|
|
|
- tmp["appid"] = c.AppId
|
|
|
- //客户名称
|
|
|
- tmp["departname"] = dm.Name
|
|
|
- //存储数据
|
|
|
- dm.DataLock.Lock()
|
|
|
- qu.Debug("tmp---", tmp)
|
|
|
- } else {
|
|
|
- qu.Debug("未匹配数据---", id)
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
//匹配
|
|
|
func RegMatchTest(fieldText map[string]interface{}, matchType []string, matchReg *Reg, matchKey map[string]bool, matchKeyType map[string]bool, goon, isAddWord bool) (match bool) {
|
|
|
defer qu.Catch()
|
|
@@ -1097,3 +749,12 @@ func RegMatchTest(fieldText map[string]interface{}, matchType []string, matchReg
|
|
|
}
|
|
|
return
|
|
|
}
|
|
|
+
|
|
|
+//发布时间不在范围内(7天)不要这条数据
|
|
|
+func SkipData(tmp map[string]interface{}) bool {
|
|
|
+ comeIn := qu.Int64All(tmp["comeintime"])
|
|
|
+ if qu.Int64All(tmp["publishtime"]) > (comeIn - 7*24*60*60 ) {
|
|
|
+ return true
|
|
|
+ }
|
|
|
+ return false
|
|
|
+}
|