util.go 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811
  1. package main
  2. import (
  3. "fmt"
  4. "log"
  5. qu "qfw/util"
  6. "regexp"
  7. "strings"
  8. "sync"
  9. "time"
  10. "github.com/donnie4w/go-logger/logger"
  11. "go.mongodb.org/mongo-driver/bson/primitive"
  12. "gopkg.in/mgo.v2/bson"
  13. )
  14. var LetterCase = regexp.MustCompile("[A-Za-z]")
  15. var LetterCase2 = regexp.MustCompile("[A-Za-z0-9]")
  16. var FilteReg = regexp.MustCompile("[()(){}]*")
  17. //匹配方式map
  18. var MatchType = map[string]interface{}{
  19. "1": "title",
  20. "2": "detail",
  21. "3": "purchasing",
  22. "4": "filetext",
  23. "5": "projectname",
  24. "6": "buyer",
  25. "7": "s_winner",
  26. }
  27. //加载排除词、附加词、关键词
  28. func GetNotkeyAndKeyAddWord(list primitive.A, fieldMap map[string]interface{}, hasNotKey bool) (kws []*KeyWord, aws []*AddWord, nkws []*NotKeyWord, tagnames []string) {
  29. defer qu.Catch()
  30. for _, rules := range list {
  31. ruleMap := rules.(map[string]interface{})
  32. if hasNotKey { //是否处理排除词
  33. //排除词匹配方式
  34. nkm := qu.ObjToString(ruleMap["s_notkeymatch"])
  35. nkmArr := []string{}
  36. for _, nv := range strings.Split(nkm, ",") {
  37. if field := qu.ObjToString(MatchType[nv]); field != "" {
  38. nkmArr = append(nkmArr, field)
  39. fieldMap[field] = true
  40. }
  41. }
  42. //排除词
  43. nkw := &NotKeyWord{}
  44. nkw.MatchType = nkmArr
  45. notkeyword := qu.ObjToString(ruleMap["s_notkey"])
  46. nkw_commaArr := strings.Split(notkeyword, ",")
  47. nkw_reg := &Reg{}
  48. for _, comma := range nkw_commaArr {
  49. nkw_addArr := strings.Split(comma, "&&")
  50. if len(nkw_addArr) == 1 { //,
  51. tmp_nkw := nkw_addArr[0]
  52. if tmp_nkw != "" {
  53. cr := &CommonReg{}
  54. cr.CsVal = tmp_nkw //记录原值
  55. if LetterCase.MatchString(tmp_nkw) { //判断附加词中是否有英文
  56. tmp_nkw = strings.ToUpper(tmp_nkw) //附加词中有英文全部转为大写
  57. cr.IsLetter = true //含字母
  58. }
  59. cr.CrVal = regexp.MustCompile(tmp_nkw) //记录reg值
  60. nkw_reg.CReg = append(nkw_reg.CReg, cr)
  61. }
  62. } else { //&&
  63. arp := &AndRegPre{}
  64. arp.CsVal = comma //记录原值
  65. for _, and := range nkw_addArr {
  66. if and != "" {
  67. ar := &AndReg{}
  68. if LetterCase.MatchString(and) { //判断附加词中是否有英文
  69. and = strings.ToUpper(and) //附加词中有英文全部转为大写
  70. ar.IsLetter = true //含字母
  71. }
  72. ar.CrVal = regexp.MustCompile(and)
  73. arp.AndRegKid = append(arp.AndRegKid, ar)
  74. }
  75. }
  76. nkw_reg.AReg = append(nkw_reg.AReg, arp)
  77. }
  78. }
  79. nkw.KeyReg = nkw_reg
  80. nkws = append(nkws, nkw)
  81. //获取每组关键词的标签名称
  82. tagname := qu.ObjToString(ruleMap["s_tagname"])
  83. tagnames = append(tagnames, tagname)
  84. }
  85. //附加词匹配方式
  86. awm := qu.ObjToString(ruleMap["s_addkeymatch"])
  87. awmArr := []string{}
  88. for _, av := range strings.Split(awm, ",") {
  89. if field := qu.ObjToString(MatchType[av]); field != "" {
  90. awmArr = append(awmArr, field)
  91. fieldMap[field] = true
  92. }
  93. }
  94. //附加词
  95. aw := &AddWord{}
  96. aw.MatchType = awmArr
  97. aw_reg := &Reg{}
  98. addword := qu.ObjToString(ruleMap["s_addkey"])
  99. aw_commaArr := strings.Split(addword, ",")
  100. for _, comma := range aw_commaArr {
  101. aw_addArr := strings.Split(comma, "&&")
  102. if len(aw_addArr) == 1 { //,
  103. tmp_aw := aw_addArr[0]
  104. if tmp_aw != "" {
  105. cr := &CommonReg{}
  106. cr.CsVal = tmp_aw //记录原值
  107. if LetterCase.MatchString(tmp_aw) { //判断附加词中是否有英文
  108. tmp_aw = strings.ToUpper(tmp_aw) //附加词中有英文全部转为大写
  109. cr.IsLetter = true //含字母
  110. }
  111. cr.CrVal = regexp.MustCompile(tmp_aw) //记录reg值
  112. aw_reg.CReg = append(aw_reg.CReg, cr)
  113. }
  114. } else { //&&
  115. arp := &AndRegPre{}
  116. arp.CsVal = comma //记录原值
  117. for _, and := range aw_addArr {
  118. if and != "" {
  119. ar := &AndReg{}
  120. if LetterCase.MatchString(and) { //判断附加词中是否有英文
  121. and = strings.ToUpper(and) //附加词中有英文全部转为大写
  122. ar.IsLetter = true //含字母
  123. }
  124. ar.CrVal = regexp.MustCompile(and)
  125. arp.AndRegKid = append(arp.AndRegKid, ar)
  126. }
  127. }
  128. aw_reg.AReg = append(aw_reg.AReg, arp)
  129. }
  130. }
  131. aw.KeyReg = aw_reg
  132. aws = append(aws, aw)
  133. //关键词匹配方式
  134. kwm := qu.ObjToString(ruleMap["s_keymatch"])
  135. kwmArr := []string{}
  136. for _, kv := range strings.Split(kwm, ",") {
  137. if field := qu.ObjToString(MatchType[kv]); field != "" {
  138. kwmArr = append(kwmArr, field)
  139. fieldMap[field] = true
  140. }
  141. }
  142. //关键词
  143. kw := &KeyWord{}
  144. kw.MatchType = kwmArr
  145. kw_reg := &Reg{}
  146. keyword := qu.ObjToString(ruleMap["s_matchkey"])
  147. kw_commaArr := strings.Split(keyword, ",")
  148. for _, comma := range kw_commaArr {
  149. kw_addArr := strings.Split(comma, "&&")
  150. if len(kw_addArr) == 1 { //,
  151. tmp_kw := kw_addArr[0]
  152. if tmp_kw != "" {
  153. cr := &CommonReg{}
  154. cr.CsVal = tmp_kw //记录原值
  155. if LetterCase.MatchString(tmp_kw) {
  156. tmp_kw = strings.ToUpper(tmp_kw)
  157. cr.IsLetter = true //含字母
  158. }
  159. cr.CrVal = regexp.MustCompile(tmp_kw) //记录reg值
  160. kw_reg.CReg = append(kw_reg.CReg, cr)
  161. }
  162. } else { //&&
  163. arp := &AndRegPre{}
  164. arp.CsVal = comma //记录原值
  165. for _, and := range kw_addArr {
  166. if and != "" {
  167. ar := &AndReg{}
  168. if LetterCase.MatchString(and) {
  169. and = strings.ToUpper(and) //附加词中有英文全部转为大写
  170. ar.IsLetter = true //含字母
  171. }
  172. ar.CrVal = regexp.MustCompile(and)
  173. arp.AndRegKid = append(arp.AndRegKid, ar)
  174. }
  175. }
  176. kw_reg.AReg = append(kw_reg.AReg, arp)
  177. }
  178. }
  179. kw.KeyReg = kw_reg
  180. kws = append(kws, kw)
  181. }
  182. return
  183. }
  184. //根据时间获取起始和终止ID范围
  185. func GetIdRange() (bson.M, bool) {
  186. defer qu.Catch()
  187. now := time.Now().Unix()
  188. for { //当前时间一直向前推半小时,直到取到数据
  189. now = now - 600 //10分钟前
  190. endTime := time.Unix(now, 0)
  191. endId := bson.NewObjectIdWithTime(endTime).Hex()
  192. if endId > LatestId {
  193. esquery := `{"query": {"bool": {"must": [{"range": {"id": {"gt": "` + LatestId + `" , "lte": "` + endId + `"}}}]}}, "sort": [{"comeintime": "desc"}]}`
  194. if Es.Count(Index, Itype, esquery) > 0 { //有数据返回id区间
  195. list := Es.Get(Index, Itype, esquery)
  196. tmpRange := bson.M{
  197. "range": bson.M{
  198. "id": bson.M{
  199. "lte": endId,
  200. "gt": LatestId,
  201. },
  202. },
  203. }
  204. LatestId = qu.ObjToString((*list)[0]["_id"])
  205. return tmpRange, true
  206. }
  207. } else { //结束id不大于起始id 退出
  208. logger.Debug("Search End ID Range Error. Sid:", LatestId, "Eid:", endId)
  209. break
  210. }
  211. }
  212. return bson.M{}, false
  213. // now := time.Now()
  214. // end := now.Unix() - int64(60*now.Minute()) - int64(now.Second())
  215. // start := end - TaskTime*3600
  216. // endTime := time.Unix(end, 0)
  217. // startTime := time.Unix(start, 0)
  218. // eid := bson.NewObjectIdWithTime(endTime).Hex()
  219. // sid := bson.NewObjectIdWithTime(startTime).Hex()
  220. // query := bson.M{
  221. // "_id": bson.M{
  222. // "$gt": mongodb.StringTOBsonId(LatestId),
  223. // },
  224. // }
  225. // sort := bson.M{
  226. // "_id": -1,
  227. // }
  228. // fields := bson.M{
  229. // "_id": 1,
  230. // }
  231. // //查抽取表最后一个id
  232. // extData, err := MgoExt.FindByLimit(ExtColl, query, sort, fields, 0, 1)
  233. // if len(extData) == 1 && err == nil {
  234. // endId := mongodb.BsonTOStringId(extData[0]["_id"])
  235. // if endId > LatestId {
  236. // tmpRange := bson.M{
  237. // "range": bson.M{
  238. // "id": bson.M{
  239. // "lte": endId,
  240. // "gt": LatestId,
  241. // },
  242. // },
  243. // }
  244. // LatestId = endId
  245. // return tmpRange, true
  246. // } else {
  247. // logger.Debug("ID Range Error,Start ID:", LatestId, "End ID:", endId)
  248. // return bson.M{}, false
  249. // }
  250. // }
  251. // logger.Debug("Search End ID No Data", query, "Error:", err)
  252. }
  253. //处理文本
  254. func ProcessData(text string) string {
  255. defer qu.Catch()
  256. text = strings.ToUpper(text) //文本中的英文全转为大写
  257. text = FilteReg.ReplaceAllString(text, "") //去除一些特殊符号
  258. return text
  259. }
  260. //校验字母
  261. func CheckLetter(text string, reg *regexp.Regexp, indexArr [][]int) (flag bool) {
  262. defer qu.Catch()
  263. for _, tmpArr := range indexArr {
  264. sIndex := tmpArr[0]
  265. eIndex := tmpArr[1]
  266. sbyte := ""
  267. ebyte := ""
  268. //log.Println("---", sIndex, eIndex)
  269. if sIndex != 0 {
  270. sbyte = text[sIndex-1 : sIndex]
  271. if eIndex != len(text) { //BAIB
  272. ebyte = text[eIndex : eIndex+1]
  273. } /*else { //BAI
  274. }*/
  275. } else {
  276. if eIndex != len(text) { //AIB
  277. ebyte = text[eIndex : eIndex+1]
  278. } /*else { //AI
  279. }*/
  280. }
  281. //log.Println("sssss", "s:", sbyte, "e:", ebyte, LetterCase2.Match([]byte(sbyte)), LetterCase2.Match([]byte(ebyte)))
  282. if !LetterCase2.Match([]byte(sbyte)) && !LetterCase2.Match([]byte(ebyte)) {
  283. flag = true
  284. break
  285. }
  286. }
  287. return
  288. }
  289. //匹配
  290. func RegMatch(fieldText map[string]interface{}, matchType []string, matchReg *Reg, matchKey map[string]bool, matchKeyType map[string]bool, goon, isAddWord bool) (match bool) {
  291. defer qu.Catch()
  292. if len(matchType) == 0 && isAddWord { //特殊处理附加词为空的情况
  293. match = true
  294. return
  295. }
  296. for _, mt := range matchType {
  297. if text := qu.ObjToString(fieldText[mt]); text != "" {
  298. for _, cr := range matchReg.CReg { //逗号分隔,任意一个匹配表示匹配成功
  299. if goon && matchKey[cr.CsVal] { //matchkey已存在不在匹配
  300. continue
  301. }
  302. if indexArr := cr.CrVal.FindAllStringIndex(text, -1); len(indexArr) > 0 { //匹配成功
  303. if !cr.IsLetter { //reg无字母
  304. if goon {
  305. matchKey[cr.CsVal] = true
  306. matchKeyType[mt] = true
  307. match = true
  308. } else {
  309. match = true
  310. return
  311. }
  312. } else if cr.IsLetter && CheckLetter(text, cr.CrVal, indexArr) { //reg有字母,判断是否是包含关系(AAAIBBB or AI){//
  313. if goon {
  314. matchKey[cr.CsVal] = true
  315. matchKeyType[mt] = true
  316. match = true
  317. } else {
  318. match = true
  319. return
  320. }
  321. }
  322. }
  323. }
  324. for _, ar := range matchReg.AReg { //&&分割,所有匹配表示匹配成功
  325. if goon && matchKey[ar.CsVal] {
  326. continue
  327. }
  328. IsAregMatch := false
  329. for n, arc := range ar.AndRegKid { //ar.AndRegKid若有值必不小于2
  330. if indexArr := arc.CrVal.FindAllStringIndex(text, -1); len(indexArr) < 1 { //匹配失败(ar.AndRegKid中任意一个未匹配则失败)
  331. break
  332. } else { //匹配成功,判断字母
  333. if arc.IsLetter && !CheckLetter(text, arc.CrVal, indexArr) { //reg有字母,判断是否是包含关系(AAAIBBB or AI)
  334. break
  335. }
  336. }
  337. if n == len(ar.AndRegKid)-1 {
  338. IsAregMatch = true
  339. }
  340. }
  341. if IsAregMatch {
  342. if goon {
  343. matchKey[ar.CsVal] = true
  344. matchKeyType[mt] = true
  345. match = true
  346. } else {
  347. match = true
  348. return
  349. }
  350. }
  351. }
  352. }
  353. }
  354. return
  355. }
  356. //map数据转数组
  357. func MapDataToArr(tmpMap map[string]bool) (tmpArr []string) {
  358. for tm, _ := range tmpMap {
  359. tmpArr = append(tmpArr, tm)
  360. }
  361. return
  362. }
  363. //合并数据
  364. func MergeData(history, tmp map[string]interface{}, isTagRule, isDepartRmvRep bool) {
  365. //matchkey、matchtype、ruleid均非空
  366. matchkey1 := qu.ObjToString(history["matchkey"])
  367. matchkey2 := qu.ObjToString(tmp["matchkey"])
  368. history["matchkey"] = MergeField(matchkey1, matchkey2)
  369. matchkeytype1 := qu.ObjToString(history["matchtype"])
  370. matchkeytype2 := qu.ObjToString(tmp["matchtype"])
  371. history["matchtype"] = MergeField(matchkeytype1, matchkeytype2)
  372. ruleid1 := qu.ObjToString(history["ruleid"])
  373. ruleid2 := qu.ObjToString(tmp["ruleid"])
  374. history["ruleid"] = MergeField(ruleid1, ruleid2)
  375. rulename1 := qu.ObjToString(history["rulename"])
  376. rulename2 := qu.ObjToString(tmp["rulename"])
  377. history["rulename"] = MergeField(rulename1, rulename2)
  378. if isTagRule { //标签模式 tagname、tagid合并
  379. tagname1 := qu.ObjToString(history["tagname"])
  380. tagname2 := qu.ObjToString(tmp["tagname"])
  381. if tagNameResult := MergeField(tagname1, tagname2); tagNameResult != "" {
  382. history["tagname"] = tagNameResult
  383. }
  384. tagid1 := qu.ObjToString(history["tagid"])
  385. tagid2 := qu.ObjToString(tmp["tagid"])
  386. if tagIdResult := MergeField(tagid1, tagid2); tagIdResult != "" {
  387. history["tagid"] = tagIdResult
  388. }
  389. }
  390. if isDepartRmvRep { //全局模式 部门合并
  391. departname1 := qu.ObjToString(history["departname"])
  392. departname2 := qu.ObjToString(tmp["departname"])
  393. history["departname"] = MergeField(departname1, departname2)
  394. departid1 := qu.ObjToString(history["departid"])
  395. departid2 := qu.ObjToString(tmp["departid"])
  396. history["departid"] = MergeField(departid1, departid2)
  397. }
  398. }
  399. //合并字段
  400. func MergeField(str1, str2 string) string {
  401. if str1 == "" {
  402. return str2
  403. } else if str2 == "" {
  404. return str1
  405. }
  406. slice1 := strings.Split(str1, ",")
  407. slice2 := strings.Split(str2, ",")
  408. m := make(map[string]int)
  409. for _, v := range slice1 {
  410. m[v]++
  411. }
  412. for _, v := range slice2 {
  413. times, _ := m[v]
  414. if times == 0 {
  415. slice1 = append(slice1, v)
  416. }
  417. }
  418. return strings.Join(slice1, ",")
  419. }
  420. //补充信息并保存
  421. func AssembelSave(tmp map[string]interface{}, IsSearchHosp, IsSearchEnps bool, appid string) bool {
  422. if IsSearchHosp { //医院信息
  423. SearchHospInfo(tmp)
  424. }
  425. if IsSearchEnps { //企业信息
  426. SearchEnterpriseInfo(tmp)
  427. }
  428. tmp["createtime"] = time.Now().Unix()
  429. id := qu.ObjToString(tmp["id"])
  430. tmp["jybxhref"] = `https://www.jianyu360.com/article/content/` + qu.CommonEncodeArticle("content", id) + `.html`
  431. if publishtime, ok := tmp["publishtime"].(float64); ok && publishtime > 0 {
  432. tmp["publishtime"] = qu.Int64All(publishtime)
  433. }
  434. if bidopentime, ok := tmp["bidopentime"].(float64); ok && bidopentime > 0 {
  435. tmp["bidopentime"] = qu.Int64All(bidopentime)
  436. }
  437. //
  438. if appid == "jyOh1XQgUJBQ5bTUlKCyZ1" {
  439. projectId := GetProjectId(id)
  440. if projectId != "" {
  441. tmp["projectId"] = projectId
  442. if _, ok := projectIdMap.Load(projectId); ok {
  443. MgoSave.Save(SaveCollProject, tmp)
  444. return false
  445. }
  446. projectIdMap.Store(projectId, true)
  447. }
  448. }
  449. //
  450. MgoSaveCache <- tmp
  451. return true
  452. }
  453. //查询第三方医院等级信息和招标客户的社会征信代码
  454. func SearchHospInfo(tmp map[string]interface{}) {
  455. if buyer := qu.ObjToString(tmp["buyer"]); buyer != "" { //buyer存在
  456. //医院等级
  457. hospData := MgoBuyer.FindOne(HospColl, bson.M{"name": buyer})
  458. if hospData != nil && len(hospData) > 0 {
  459. if rank := qu.ObjToString(hospData["rank"]); rank != "" {
  460. tmp["rank"] = hospData["rank"]
  461. }
  462. }
  463. //招标客户的社会征信代码
  464. buyerEntData := MgoBuyer.FindOne(BuyerEntColl, bson.M{"company_name": buyer})
  465. if buyerEntData != nil && len(buyerEntData) > 0 {
  466. if credit_no := qu.ObjToString(buyerEntData["credit_no"]); credit_no != "" {
  467. tmp["buyer_credit_no"] = credit_no
  468. }
  469. }
  470. }
  471. }
  472. //查询第三方中标企业信息
  473. func SearchEnterpriseInfo(tmp map[string]interface{}) {
  474. if s_winner := qu.ObjToString(tmp["s_winner"]); s_winner != "" { //buyer存在
  475. winner := strings.Split(s_winner, ",")[0] //取第一个中标单位
  476. data := MgoEnps.FindOne(EnpsColl, bson.M{"company_name": winner})
  477. if data != nil && len(data) > 0 {
  478. //中标单位联系人
  479. if legal_person := qu.ObjToString(data["legal_person"]); legal_person != "" {
  480. tmp["legal_person"] = legal_person
  481. }
  482. //中标单位的注册地址
  483. if company_address := qu.ObjToString(data["company_address"]); company_address != "" {
  484. tmp["company_address"] = company_address
  485. }
  486. //注册资金"capital" : "324 万","capital" : 124.8,
  487. if capital := qu.ObjToString(data["capital"]); capital != "" {
  488. tmp["capital"] = capital
  489. //if capitalStr, ok := data["capital"].(string); ok && capitalStr != "" {
  490. // if capital := ObjToMoney(capitalStr); capital != 0 { //金额转换
  491. // tmp["capital"] = capital
  492. // }
  493. //} else if capitalFloat, ok := data["capital"].(float64); ok && capitalFloat != 0 {
  494. // tmp["capital"] = capitalFloat
  495. //}
  496. }
  497. //注册时间"establish_date" : ISODate("1949-10-01T00:00:00.000+0000")
  498. if data["establish_date"] != nil {
  499. if establish_date, ok := data["establish_date"].(primitive.DateTime); ok {
  500. t := establish_date.Time()
  501. tmp["establish_date"] = qu.FormatDate(&t, qu.Date_Short_Layout)
  502. }
  503. }
  504. //经营范围
  505. if business_scope := qu.ObjToString(data["business_scope"]); business_scope != "" {
  506. tmp["business_scope"] = business_scope
  507. }
  508. //中标单位的社会征信代码
  509. if credit_no := qu.ObjToString(data["credit_no"]); credit_no != "" {
  510. tmp["winner_credit_no"] = credit_no
  511. }
  512. //股东名单
  513. if partners, ok := data["partners"].(primitive.A); ok && len(partners) > 0 {
  514. stock_name_arr := []string{}
  515. for _, partner := range partners {
  516. p := partner.(map[string]interface{})
  517. if stock_name := qu.ObjToString(p["stock_name"]); stock_name != "" {
  518. stock_name_arr = append(stock_name_arr, stock_name)
  519. }
  520. }
  521. if len(stock_name_arr) > 0 {
  522. tmp["stock_name"] = strings.Join(stock_name_arr, ",")
  523. }
  524. }
  525. //从最新年报中获取 中标单位联系电话、中标单位邮箱
  526. if annual_reports, ok := data["annual_reports"].(primitive.A); ok && len(annual_reports) > 0 {
  527. if anreport, ok := annual_reports[0].(map[string]interface{}); ok { //最新年报
  528. if email := qu.ObjToString(anreport["company_email"]); email != "" {
  529. tmp["company_email"] = email
  530. }
  531. if phone := qu.ObjToString(anreport["company_phone"]); phone != "" {
  532. tmp["company_phone"] = phone
  533. }
  534. }
  535. }
  536. }
  537. }
  538. }
  539. //数据存库
  540. func SaveMgo() {
  541. log.Println("Mgo Save...")
  542. arru := make([]map[string]interface{}, 500)
  543. indexu := 0
  544. for {
  545. select {
  546. case v := <-MgoSaveCache:
  547. arru[indexu] = v
  548. indexu++
  549. if indexu == 500 {
  550. SP <- true
  551. go func(arru []map[string]interface{}) {
  552. defer func() {
  553. <-SP
  554. }()
  555. MgoSave.SaveBulk(SaveColl, arru...)
  556. }(arru)
  557. arru = make([]map[string]interface{}, 500)
  558. indexu = 0
  559. }
  560. case <-time.After(1000 * time.Millisecond):
  561. if indexu > 0 {
  562. SP <- true
  563. go func(arru []map[string]interface{}) {
  564. defer func() {
  565. <-SP
  566. }()
  567. MgoSave.SaveBulk(SaveColl, arru...)
  568. }(arru[:indexu])
  569. arru = make([]map[string]interface{}, 500)
  570. indexu = 0
  571. }
  572. }
  573. }
  574. }
  575. //打印初始化信息
  576. func PrintLog(cus *Customer) {
  577. qu.Debug("----------------------打标签规则----------------------------")
  578. for i, tr := range cus.TagRules {
  579. qu.Debug("tagrules:", i, tr.ID, tr.Name, tr.CustomerId, tr.DepartRuleIds, tr.Fields)
  580. for j, kw := range tr.KW {
  581. qu.Debug("kw_matchkey---", kw.MatchType, len(kw.MatchType))
  582. qu.Debug("------------CReg--------------")
  583. for ck1, cr := range kw.KeyReg.CReg {
  584. qu.Debug("CR---", ck1, cr.CrVal, cr.CsVal, cr.IsLetter)
  585. }
  586. qu.Debug("------------AReg--------------")
  587. for ck1, ar := range kw.KeyReg.AReg {
  588. qu.Debug("AR---", ck1, ar.CsVal)
  589. for _, arc := range ar.AndRegKid {
  590. qu.Debug("ARC---", arc.CrVal, arc.IsLetter)
  591. }
  592. }
  593. qu.Debug("+++++++++++++++++++++AW++++++++++++++++++++++")
  594. qu.Debug("aw_matchkey---", tr.AW[j].MatchType, len(tr.AW[j].MatchType))
  595. qu.Debug("------------CReg--------------")
  596. for ck2, cr := range tr.AW[j].KeyReg.CReg {
  597. qu.Debug("CR---", ck2, cr.CrVal, cr.CsVal, cr.IsLetter)
  598. }
  599. qu.Debug("------------AReg--------------")
  600. for ck2, ar := range tr.AW[j].KeyReg.AReg {
  601. qu.Debug("AR---", ck2, ar.CsVal)
  602. for _, arc := range ar.AndRegKid {
  603. qu.Debug("ARC---", arc.CrVal, arc.IsLetter)
  604. }
  605. }
  606. qu.Debug("++++++++++++++++++++NW+++++++++++++++++++++++")
  607. qu.Debug("nw_matchkey---", tr.NW[j].MatchType, len(tr.NW[j].MatchType))
  608. qu.Debug("------------CReg--------------")
  609. for ck1, cr := range tr.NW[j].KeyReg.CReg {
  610. qu.Debug("CR---", ck1, cr.CrVal, cr.CsVal, cr.IsLetter)
  611. }
  612. qu.Debug("------------AReg--------------")
  613. for ck1, ar := range tr.NW[j].KeyReg.AReg {
  614. qu.Debug("AR---", ck1, ar.CsVal)
  615. for _, arc := range ar.AndRegKid {
  616. qu.Debug("ARC---", arc.CrVal, arc.IsLetter)
  617. }
  618. }
  619. qu.Debug("tagname-------------", tr.TagNames[j])
  620. qu.Debug("-------------------------------------------------")
  621. }
  622. }
  623. qu.Debug("----------------------查询规则----------------------------")
  624. for i, dm := range cus.Departments {
  625. qu.Debug("deparment:", i, dm.ID, dm.Name, dm.CustomerID)
  626. for j, sr := range dm.Rules {
  627. qu.Debug("ck---", j, sr.ID, sr.Name, sr.CustomerID, sr.DepartmentID, sr.GCW.KeyReg, len(sr.GCW.KeyReg), sr.GCW.MatchType, len(sr.GCW.MatchType), sr.Fields, len(sr.Fields), sr.EsQuery)
  628. for j, kw := range sr.KW {
  629. qu.Debug("kw_matchkey---", kw.MatchType, len(kw.MatchType))
  630. qu.Debug("------------CReg--------------")
  631. for ck1, cr := range kw.KeyReg.CReg {
  632. qu.Debug("CR---", ck1, cr.CrVal, cr.CsVal, cr.IsLetter)
  633. }
  634. qu.Debug("------------AReg--------------")
  635. for ck1, ar := range kw.KeyReg.AReg {
  636. qu.Debug("AR---", ck1, ar.CsVal)
  637. for _, arc := range ar.AndRegKid {
  638. qu.Debug("ARC---", arc.CrVal, arc.IsLetter)
  639. }
  640. }
  641. qu.Debug("+++++++++++++++++++++AW++++++++++++++++++++++")
  642. qu.Debug("aw_matchkey---", sr.AW[j].MatchType, len(sr.AW[j].MatchType))
  643. qu.Debug("------------CReg--------------")
  644. for ck2, cr := range sr.AW[j].KeyReg.CReg {
  645. qu.Debug("CR---", ck2, cr.CrVal, cr.CsVal, cr.IsLetter)
  646. }
  647. qu.Debug("------------AReg--------------")
  648. for ck2, ar := range sr.AW[j].KeyReg.AReg {
  649. qu.Debug("AR---", ck2, ar.CsVal)
  650. for _, arc := range ar.AndRegKid {
  651. qu.Debug("ARC---", arc.CrVal, arc.IsLetter)
  652. }
  653. }
  654. qu.Debug("--------------------------------------------------------------------------------------")
  655. }
  656. }
  657. }
  658. }
  659. //匹配
  660. func RegMatchTest(fieldText map[string]interface{}, matchType []string, matchReg *Reg, matchKey map[string]bool, matchKeyType map[string]bool, goon, isAddWord bool) (match bool) {
  661. defer qu.Catch()
  662. qu.Debug("matchType---", matchType)
  663. if len(matchType) == 0 && isAddWord { //特殊处理附加词为空的情况
  664. match = true
  665. return
  666. }
  667. for _, mt := range matchType {
  668. if text := qu.ObjToString(fieldText[mt]); text != "" {
  669. qu.Debug("匹配方式---", mt, "text---", text)
  670. qu.Debug("--------------开始查找逗号分隔----------------", len(matchReg.CReg))
  671. for i, cr := range matchReg.CReg { //逗号分隔,任意一个匹配表示匹配成功
  672. qu.Debug("iiii---", i, cr.CrVal, goon, matchKey)
  673. if goon && matchKey[cr.CsVal] { //matchkey已存在不在匹配
  674. continue
  675. }
  676. if indexArr := cr.CrVal.FindAllStringIndex(text, -1); len(indexArr) > 0 { //匹配成功
  677. if !cr.IsLetter { //reg无字母
  678. qu.Debug("goon---", goon)
  679. if goon {
  680. qu.Debug("key++++++++++ ++++++++++", cr.CsVal)
  681. matchKey[cr.CsVal] = true
  682. matchKeyType[mt] = true
  683. match = true
  684. } else {
  685. match = true
  686. return
  687. }
  688. } else if cr.IsLetter && CheckLetter(text, cr.CrVal, indexArr) { //reg有字母,判断是否是包含关系(AAAIBBB or AI){//
  689. qu.Debug("goon---", goon)
  690. if goon {
  691. qu.Debug("key++++++++++++++++++++", cr.CsVal)
  692. matchKey[cr.CsVal] = true
  693. matchKeyType[mt] = true
  694. match = true
  695. } else {
  696. match = true
  697. return
  698. }
  699. }
  700. }
  701. }
  702. qu.Debug("--------------开始查找&&分隔----------------", len(matchReg.AReg))
  703. for j, ar := range matchReg.AReg { //&&分割,所有匹配表示匹配成功
  704. qu.Debug("jjjj---", j, ar.CsVal, goon, matchKey)
  705. if goon && matchKey[ar.CsVal] {
  706. continue
  707. }
  708. IsAregMatch := false
  709. qu.Debug("ar.AndRegKid---", j, ar.AndRegKid, len(ar.AndRegKid))
  710. for n, arc := range ar.AndRegKid { //ar.AndRegKid若有值必不小于2
  711. qu.Debug("nnnn---", n, arc.CrVal, arc.IsLetter)
  712. if indexArr := arc.CrVal.FindAllStringIndex(text, -1); len(indexArr) < 1 { //匹配失败(ar.AndRegKid中任意一个未匹配则失败)
  713. break
  714. } else { //匹配成功,判断字母
  715. if arc.IsLetter && !CheckLetter(text, arc.CrVal, indexArr) { //reg有字母,判断是否是包含关系(AAAIBBB or AI)
  716. break
  717. }
  718. }
  719. if n == len(ar.AndRegKid)-1 {
  720. IsAregMatch = true
  721. }
  722. }
  723. qu.Debug("IsAregMatch---", IsAregMatch)
  724. if IsAregMatch {
  725. qu.Debug("goon---", goon)
  726. if goon {
  727. qu.Debug("key++++++++++++++++++++", ar.CsVal)
  728. matchKey[ar.CsVal] = true
  729. matchKeyType[mt] = true
  730. match = true
  731. } else {
  732. match = true
  733. return
  734. }
  735. }
  736. }
  737. }
  738. }
  739. return
  740. }
  741. //发布时间不在范围内(7天)不要这条数据
  742. func SkipData(tmp map[string]interface{}) bool {
  743. comeIn := qu.Int64All(tmp["comeintime"])
  744. if qu.Int64All(tmp["publishtime"]) > (comeIn - 7*24*60*60) {
  745. return true
  746. }
  747. return false
  748. }
  749. func GetProjectId(id string) string {
  750. query := `{"query": {"bool": {"must": [{"term": {"projectset.ids": "%s"}}],"must_not": [],"should": []}},"size": 1}`
  751. querys := fmt.Sprintf(query, id)
  752. projectId := ""
  753. data := Es.Get("projectset", "projectset", querys)
  754. if data != nil && *data != nil {
  755. projectId = qu.ObjToString((*data)[0]["id"])
  756. }
  757. return projectId
  758. }
  759. func InitProjectId() {
  760. session := MgoSave.GetMgoConn()
  761. count := 0
  762. defer func() {
  763. MgoSave.DestoryMongoConn(session)
  764. log.Printf("本次共取到%d个projectId\n", count)
  765. }()
  766. save := Sysconfig["save"].(map[string]interface{})
  767. query := map[string]interface{}{"appid": "jyOh1XQgUJBQ5bTUlKCyZ1"}
  768. field := map[string]interface{}{"projectId": 1}
  769. iter := session.DB(qu.ObjToString(save["db"])).C(SaveColl).Find(query).Select(field).Sort("_id").Iter()
  770. thisData := map[string]interface{}{}
  771. for {
  772. if !iter.Next(&thisData) {
  773. break
  774. }
  775. projectId := qu.ObjToString(thisData["projectId"])
  776. projectIdMap.Store(projectId, true)
  777. count++
  778. thisData = map[string]interface{}{}
  779. }
  780. }