task.go 38 KB


  1. package main
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "log"
  7. qu "qfw/util"
  8. "qfw/util/redis"
  9. "regexp"
  10. . "sqlmodel"
  11. "strings"
  12. "sync"
  13. "time"
  14. mgoutil "util/mgodb"
  15. "gopkg.in/mgo.v2/bson"
  16. "github.com/cron"
  17. esv "es"
  18. "github.com/antonmedv/expr"
  19. "github.com/donnie4w/go-logger/logger"
  20. "go.mongodb.org/mongo-driver/bson/primitive"
  21. esV7 "github.com/olivere/elastic"
  22. )
  23. func TimeTask() {
  24. StartTask()
  25. c := cron.New()
  26. //cronstr := "0 */" + fmt.Sprint(TaskTime) + " * * * ?"
  27. cronstr := "0 5 */" + fmt.Sprint(TaskTime) + " * * ?" //每TaskTime小时执行一次
  28. c.AddFunc(cronstr, func() { StartTask() })
  29. c.Start()
  30. }
  31. func StartTask() {
  32. GetCustomerData()
  33. }
  34. // 加载客户
  35. func GetCustomerData() {
  36. defer qu.Catch()
  37. log.Println("Init Customer...")
  38. idRange, idRange2, ok, endTime := GetIdRange() //获取id区间
  39. if !ok {
  40. return
  41. }
  42. logger.Debug("此次任务区间:", idRange, idRange2)
  43. //查询企业库开启推送的客户
  44. customers, _ := MgoTag.Find("euser", map[string]interface{}{"i_push": 1, "b_delete": false}, nil, nil)
  45. for _, c := range customers {
  46. customerId := mgoutil.BsonTOStringId(c["_id"])
  47. customer := qu.ObjToString(c["s_name"]) //客户名称
  48. appId := qu.ObjToString(c["s_appid"]) //appid
  49. extends := qu.ObjToString(c["s_extends"]) //扩展信息
  50. pushModel := qu.IntAll(c["i_pushmodel"]) //推送模式
  51. dataSave := qu.ObjToString(c["s_dataSave"])
  52. log.Println("当前客户 ", customer)
  53. cus := &Customer{}
  54. cus.SaveDataMap = map[string]map[string]interface{}{}
  55. cus.SaveDataArr = map[string]map[string]interface{}{}
  56. cus.IdRange = idRange
  57. cus.IdRanges = idRange2
  58. cus.ID = customerId
  59. cus.Name = customer
  60. cus.PushModel = pushModel
  61. cus.AppId = appId
  62. cus.DataSave = dataSave
  63. for _, v := range strings.Split(extends, ",") {
  64. if v == "hospitalgrade" {
  65. cus.IsSearchHosp = true
  66. } else if v == "enterpise" {
  67. cus.IsSearchEnps = true
  68. }
  69. }
  70. //
  71. if projectAppidMap[appId] {
  72. start := time.Now().Unix()
  73. log.Println("加载projectId---开始")
  74. InitProjectId(appId)
  75. end := time.Now().Unix()
  76. log.Println("加载projectId---结束,耗时", end-start, "秒")
  77. } else {
  78. projectIdMap = sync.Map{}
  79. }
  80. //
  81. cus.GetTagRules() //获取客户打标签规则
  82. cus.GetDepartments("") //获取客户信息
  83. //PrintLog(cus) //打印查看初始化的信息
  84. qu.Debug("customer:", cus.ID, cus.Name, cus.PushModel, cus.AppId, cus.IsTagRule, cus.IsTagRule2, cus.IsTagRule3, cus.IsSearchHosp, cus.IsSearchEnps, len(cus.TagRules), len(cus.TagRules2), len(cus.TagRules3), len(cus.Departments))
  85. cus.GetData("") //获取数据
  86. cus.RemoveRepeatData() //数据去重
  87. cus.AssembelAndSaveData() //组装、保存数据
  88. }
  89. Sysconfig.LatestId = LatestId
  90. Sysconfig.LatestTime = endTime
  91. qu.WriteSysConfig(Sysconfig)
  92. logger.Debug("定时任务结束-endId-Sysconfig.LatestTime ", endTime)
  93. }
  94. // 获取客户打标签规则
  95. func (c *Customer) GetTagRules() {
  96. log.Println("开始加载标签规则...")
  97. defer qu.Catch()
  98. tagRules, _ := MgoTag.Find("eusertagrule", map[string]interface{}{"s_userid": c.ID, "i_isuse": 1, "b_delete": false}, nil, nil)
  99. if len(tagRules) > 0 {
  100. for _, tr := range tagRules {
  101. tagType := qu.ObjToString(tr["tagType"])
  102. if tagType == "" || tagType == "1" {
  103. c.IsTagRule = true //查到打标签规则,表示打标签
  104. TR := &TagRule{}
  105. TR.Fields = make(map[string]interface{})
  106. TR.DepartRuleIds = make(map[string]bool)
  107. id := mgoutil.BsonTOStringId(tr["_id"])
  108. name := qu.ObjToString(tr["s_name"])
  109. TR.ID = id
  110. TR.Name = name
  111. TR.CustomerId = c.ID
  112. //部门规则id组
  113. if departRuleIds := qu.ObjToString(tr["o_departruleids"]); departRuleIds != "" {
  114. for _, drid := range strings.Split(departRuleIds, ",") {
  115. TR.DepartRuleIds[drid] = true
  116. }
  117. }
  118. //规则
  119. if o_list, ok := tr["o_list"].(primitive.A); ok && len(o_list) > 0 {
  120. TR.GetKeyAddNotKeyWord(o_list)
  121. }
  122. c.TagRules = append(c.TagRules, TR)
  123. }
  124. }
  125. }
  126. tagRules2, _ := MgoTag.Find("eusertagrule", map[string]interface{}{"s_userid": c.ID, "i_isuse": 1, "b_delete": false, "tagType": "2"}, nil, nil)
  127. if len(tagRules2) > 0 {
  128. c.IsTagRule2 = true //查到打标签规则,表示打标签
  129. for _, tr := range tagRules2 {
  130. TR := &TagRule{}
  131. TR.Fields = make(map[string]interface{})
  132. TR.DepartRuleIds = make(map[string]bool)
  133. id := mgoutil.BsonTOStringId(tr["_id"])
  134. name := qu.ObjToString(tr["s_name"])
  135. TR.ID = id
  136. TR.Name = name
  137. TR.CustomerId = c.ID
  138. //部门规则id组
  139. if departRuleIds := qu.ObjToString(tr["o_departruleids"]); departRuleIds != "" {
  140. for _, drid := range strings.Split(departRuleIds, ",") {
  141. TR.DepartRuleIds[drid] = true
  142. }
  143. }
  144. //规则
  145. if o_list, ok := tr["o_list"].(primitive.A); ok && len(o_list) > 0 {
  146. TR.GetKeyAddNotKeyWord(o_list)
  147. }
  148. c.TagRules2 = append(c.TagRules2, TR)
  149. }
  150. }
  151. tagRules3, _ := MgoTag.Find("eusertagrule", map[string]interface{}{"s_userid": c.ID, "i_isuse": 1, "b_delete": false, "tagType": "3"}, nil, nil)
  152. if len(tagRules3) > 0 {
  153. c.IsTagRule3 = true //查到打标签规则,表示打标签
  154. for _, tr := range tagRules3 {
  155. TR := &TagRule{}
  156. TR.Fields = make(map[string]interface{})
  157. TR.DepartRuleIds = make(map[string]bool)
  158. id := mgoutil.BsonTOStringId(tr["_id"])
  159. name := qu.ObjToString(tr["s_name"])
  160. TR.ID = id
  161. TR.Name = name
  162. TR.CustomerId = c.ID
  163. //部门规则id组
  164. if departRuleIds := qu.ObjToString(tr["o_departruleids"]); departRuleIds != "" {
  165. for _, drid := range strings.Split(departRuleIds, ",") {
  166. TR.DepartRuleIds[drid] = true
  167. }
  168. }
  169. //规则
  170. if o_list, ok := tr["o_list"].(primitive.A); ok && len(o_list) > 0 {
  171. TR.GetKeyAddNotKeyWord(o_list)
  172. }
  173. c.TagRules3 = append(c.TagRules3, TR)
  174. }
  175. }
  176. }
  177. // 获取部门信息
  178. func (c *Customer) GetDepartments(stype string) {
  179. log.Println("开始获取部门信息...")
  180. defer qu.Catch()
  181. departments, _ := MgoTag.Find("euserdepart", map[string]interface{}{"s_userid": c.ID, "i_isuse": 1, "b_delete": false}, nil, nil)
  182. if len(departments) > 0 {
  183. for _, ds := range departments {
  184. DM := &Department{}
  185. DM.DataLock = &sync.Mutex{}
  186. DM.DepartmentData = map[string][]map[string]interface{}{}
  187. DM.SaveDataMap = map[string]map[string]interface{}{}
  188. id := mgoutil.BsonTOStringId(ds["_id"])
  189. name := qu.ObjToString(ds["s_name"])
  190. DM.ID = id
  191. DM.Name = name
  192. DM.CustomerID = c.ID
  193. DM.GetSearchRules(c.ID, stype, c.IdRange, c.IdRanges) //获取某个部门的所有规则
  194. c.Departments = append(c.Departments, DM)
  195. //qu.Debug("Departments---", DM.ID, DM.Name, DM.CustomerID, len(DM.Rules))
  196. }
  197. }
  198. }
  199. // 获取数据
  200. func (c *Customer) GetData(stype string) {
  201. log.Println("开始匹配数据...")
  202. defer qu.Catch()
  203. esConfig := Sysconfig.Es
  204. esversion := qu.ObjToString(esConfig["version"])
  205. if esversion == "v1" {
  206. } else {
  207. esCon := esv.VarEs.(*esv.EsV7)
  208. c.EsConGetDataV7(stype, esCon)
  209. }
  210. }
  211. type MySource struct {
  212. Querys string
  213. }
  214. func (m *MySource) Source() (interface{}, error) {
  215. mp := make(map[string]interface{})
  216. json.Unmarshal([]byte(m.Querys), &mp)
  217. return mp["query"], nil
  218. }
  219. func (c *Customer) EsConGetDataV7(stype string, esCon *esv.EsV7) {
  220. client := esCon.GetEsConn()
  221. defer esCon.DestoryEsConn(client)
  222. ctx, _ := context.WithTimeout(context.Background(), 30*time.Minute)
  223. for _, dm := range c.Departments {
  224. for _, sr := range dm.Rules {
  225. for {
  226. listLen := redis.GetInt("session", "es_status")
  227. if listLen == 0 {
  228. log.Println("es空闲!")
  229. break
  230. } else if listLen == 1 || listLen == 2 {
  231. log.Println("系统繁忙,请稍后再试 ", listLen)
  232. }
  233. time.Sleep(5 * time.Second)
  234. }
  235. //测试
  236. // MgoDataTest(sr, dm, c)
  237. // return
  238. // ch := make(chan bool, 10)
  239. // wg := &sync.WaitGroup{}
  240. escount := Es.Count(Index, Itype, sr.EsQuery)
  241. log.Println("查询总数:", escount, "规则ID:", sr.ID, "EsQuery:", sr.EsQuery)
  242. if escount == 0 {
  243. continue
  244. }
  245. //查询条件类型转换
  246. // var q esV7.Query
  247. //sr.EsQuery = `{"query":{"filtered":{"filter":{"bool":{"must":[{"bool":{"should":[{"terms":{"city":["上海市"]}}]}},{"terms":{"toptype":["招标","结果"]}},{"range":{"publishtime":{"gte":1588262400,"lt":1608825600}}}]}},"query":{"bool":{"must":[{"bool":{"should":[{"bool":{"must":[{"bool":{"should":[{"bool":{"must":[{"terms":{"buyer":["上海城建职业学院","上海市第一人民医院","中国银联股份有限公司","上海立信会计金融学院法规处","中国东方航空股份有限公司","上海外国语大学","上海小昆山环卫服务有限公司","国家税务总局上海市税务局","中国浦东干部学院","上海市浦东新区老港镇人民政府","咪咕视讯科技有限公司","上海交通大学医学院附属新华医院","交通运输部上海打捞局","松江区体育局","复旦大学附属中山医院","上海交通大学医学院附属瑞金医院","中国科学院上海应用物理研究所"]}}]}}]}}]}}]}}],"must_not":[{"constant_score":{"filter":{"missing":{"field":"filetext"}}}}]}}}}}`
  248. cc := &MySource{
  249. Querys: sr.EsQuery,
  250. }
  251. //游标查询,index不支持别名,只能写索引库的名称
  252. res, err := client.Scroll(Index).Query(cc).Size(200).Do(ctx) //查询一条获取游标
  253. if err == nil {
  254. numDocs := 0
  255. scrollId := res.ScrollId
  256. count := 1
  257. for {
  258. if scrollId == "" {
  259. log.Println("ScrollId Is Error")
  260. break
  261. }
  262. var searchResult *esV7.SearchResult
  263. var err error
  264. if count == 1 {
  265. searchResult = res
  266. } else {
  267. searchResult, err = client.Scroll(Index).Size(200).ScrollId(scrollId).Do(ctx) //查询
  268. if err != nil {
  269. if err.Error() == "EOS" { //迭代完毕
  270. log.Println("Es Search Data Over:", err)
  271. } else {
  272. log.Println("Es Search Data Error:", err)
  273. }
  274. break
  275. }
  276. }
  277. log.Println("此次处理条数 ", len(searchResult.Hits.Hits))
  278. if err != nil {
  279. if err.Error() == "EOS" { //迭代完毕
  280. log.Println("Es Search Data Over:", err)
  281. } else {
  282. log.Println("Es Search Data Error:", err)
  283. }
  284. break
  285. }
  286. for _, hit := range searchResult.Hits.Hits {
  287. //开始处理数据
  288. tmp := make(map[string]interface{})
  289. if json.Unmarshal(hit.Source, &tmp) == nil {
  290. if stype != "history" {
  291. if !SkipData(tmp) {
  292. qu.Debug("跳过该条数据,发布时间在入库时间7天之前,", qu.ObjToString(tmp["_id"]))
  293. continue
  294. }
  295. }
  296. id := qu.ObjToString(tmp["id"])
  297. //亚信
  298. if CheckBidOpenAppidMap[c.AppId] {
  299. if tmp["bidopentime"] != nil {
  300. bidopentime := qu.Int64All(tmp["bidopentime"])
  301. comeintime := qu.Int64All(tmp["comeintime"])
  302. if bidopentime-comeintime <= 7*24*60*60 {
  303. qu.Debug("跳过该条数据,开标时间-入库时间<=7天,", id)
  304. continue
  305. }
  306. }
  307. }
  308. //河南移动,过滤掉中国移动采购网招标数据
  309. if CheckBidHrefRuleIdMap[dm.ID] {
  310. if strings.Contains(qu.ObjToString(tmp["href"]), "b2b.10086.cn") {
  311. qu.Debug("跳过该条数据,公告原网址中包含 b2b.10086.cn,", id)
  312. continue
  313. }
  314. }
  315. isExists, err := redis.Exists("datag", c.AppId+"_"+id)
  316. if err != nil {
  317. log.Println("redis信息id判重出错 ", err)
  318. } else if isExists {
  319. log.Println("信息id重复 ", id)
  320. continue
  321. }
  322. tmp["id"] = id //记录数据原有id
  323. delete(tmp, "_id")
  324. if sr.ExtFieldType == 2 {
  325. findwinner := ""
  326. s_winner := strings.Split(qu.ObjToString(tmp["s_winner"]), ",")
  327. if len(s_winner) > 0 {
  328. for i := 0; i < len(s_winner); i++ {
  329. findwinners := strings.TrimSpace(s_winner[i])
  330. if findwinners != "" {
  331. for _, v := range Sysconfig.SWinnerFilter {
  332. strings.ReplaceAll(findwinners, v, "")
  333. }
  334. if findwinners != "" {
  335. findwinner = findwinners
  336. break
  337. }
  338. }
  339. }
  340. }
  341. // findwinner := strings.TrimSpace(qu.ObjToString(tmp["winner"]))
  342. if findwinner != "" {
  343. finddata := MgoEnps.FindOne(EnpsColl, bson.M{"company_name": findwinner})
  344. if finddata != nil {
  345. if legal_person := qu.ObjToString(finddata["legal_person"]); legal_person != "" {
  346. tmp["legal_person"] = legal_person
  347. }
  348. if email := qu.ObjToString(finddata["company_email"]); email != "" {
  349. tmp["company_email"] = email
  350. }
  351. if phone := qu.ObjToString(finddata["company_phone"]); phone != "" {
  352. tmp["company_phone"] = phone
  353. }
  354. //从最新年报中获取 中标单位联系电话、中标单位邮箱
  355. // if annual_reports, ok := finddata["annual_reports"].(primitive.A); ok && len(annual_reports) > 0 {
  356. // anreport := Sort_year_report(annual_reports)
  357. // if len(anreport) > 0 {
  358. // if email := qu.ObjToString(anreport["company_email"]); email != "" {
  359. // tmp["company_email"] = email
  360. // }
  361. // if phone := qu.ObjToString(anreport["company_phone"]); phone != "" {
  362. // tmp["company_phone"] = phone
  363. // }
  364. // }
  365. // }
  366. }
  367. }
  368. }
  369. matchKey := map[string]bool{} //记录所有匹配上的关键词
  370. matchKeyType := map[string]bool{} //记录关键词对应的匹配方式
  371. //先获取用到的所有字段值
  372. fieldText := map[string]interface{}{}
  373. for field, _ := range sr.Fields {
  374. text := qu.ObjToString(tmp[field])
  375. text = ProcessData(text) //处理文本(字母转大写,删除一些符号)
  376. fieldText[field] = text
  377. }
  378. //清理词清理
  379. for _, cwm := range sr.GCW.MatchType {
  380. if text := qu.ObjToString(fieldText[cwm]); text != "" {
  381. for _, gcw_reg := range sr.GCW.KeyReg {
  382. text = gcw_reg.ReplaceAllString(text, "")
  383. }
  384. fieldText[cwm] = text
  385. }
  386. }
  387. //精准筛选规则2022-10-19
  388. if c.Exact == 1 && sr.ExactRule != "" {
  389. nameArr := []string{}
  390. data, _ := MgoTag.Find("groups", map[string]interface{}{"ruleId": sr.ID}, nil, nil)
  391. if data != nil && len(data) > 0 {
  392. for _, v := range data {
  393. nameArr = append(nameArr, qu.ObjToString(v["name"]))
  394. }
  395. }
  396. exactResult := exactMatchs(sr.ExactRule, qu.ObjToString(tmp["title"]), qu.ObjToString(tmp["detail"]), sr.Maths, nameArr)
  397. qu.Debug("-------------------精准匹配", id, exactResult)
  398. if !exactResult {
  399. continue
  400. }
  401. }
  402. /*
  403. 因为要记录所有匹配上的关键词,所有优先匹配附加词,在匹配关键词
  404. */
  405. //1.附加词匹配
  406. IsMatch := false
  407. //qu.Debug("sr.AW---", len(sr.AW))
  408. for i, aw := range sr.AW {
  409. //qu.Debug("-------------------------开始附加词匹配--------------------------")
  410. IsMatchAddKey := RegMatch(fieldText, aw.MatchType, aw.KeyReg, nil, nil, false, true)
  411. //qu.Debug(IsMatchAddKey, "------------------------------------------------------------")
  412. //2.关键词匹配
  413. if IsMatchAddKey {
  414. kw := sr.KW[i]
  415. //qu.Debug("-------------------------开始关键词匹配--------------------------")
  416. IsMatchKey := RegMatch(fieldText, kw.MatchType, kw.KeyReg, matchKey, matchKeyType, true, false)
  417. //qu.Debug(IsMatchKey, "------------------------------------------------------------")
  418. if IsMatchKey {
  419. IsMatch = true
  420. }
  421. }
  422. }
  423. if len(sr.AW) == 0 {
  424. IsMatch = true
  425. }
  426. /*
  427. 到此已经匹配完数据
  428. */
  429. qu.Debug("---------------------", id, IsMatch, matchKey)
  430. if IsMatch { //匹配成功,数据上新增规则id,matchKey,item并临时保存数据
  431. // tmpMatchKey := MapDataToArr(matchKey)
  432. tmpMatchKeyType := MapDataToArr(matchKeyType)
  433. tmp["matchkey"] = GetMactchKeys(sr.Maths, tmp)
  434. tmp["matchtype"] = strings.Join(tmpMatchKeyType, ",")
  435. tmp["ruleid"] = sr.ID
  436. tmp["rulename"] = sr.Name
  437. tmpBuyerClass := qu.ObjToString(tmp["buyerclass"])
  438. //开始打标签
  439. //qu.Debug("c.IsTagRule+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
  440. if c.IsTagRule {
  441. tagNameMap := map[string]bool{}
  442. tagIdMap := map[string]bool{}
  443. //qu.Debug("c.TagRules---", len(c.TagRules))
  444. //log.Println(c.TagRules,"=========",)
  445. for _, tr := range c.TagRules {
  446. if tr.DepartRuleIds[sr.ID] {
  447. // log.Println(tr.TagNames, "===========打标签")
  448. //先获取用到的所有字段值
  449. for field, _ := range tr.Fields {
  450. if fieldText[field] == nil { //补充fieldText
  451. text := qu.ObjToString(tmp[field])
  452. text = ProcessData(text) //处理文本(字母转大写,删除一些符号)
  453. fieldText[field] = text
  454. }
  455. }
  456. //qu.Debug("-------------------------开始排除词匹配--------------------------")
  457. //qu.Debug("tr.NW---", len(tr.NW))
  458. matchKeyTag := map[string]bool{} //记录所有标签里的匹配上的关键词
  459. matchKeyTypeTag := map[string]bool{} //记录标签里的关键词对应的匹配方式
  460. for j, tag_nw := range tr.NW { //排除词匹配
  461. IsMatchNotKey := RegMatch(fieldText, tag_nw.MatchType, tag_nw.KeyReg, nil, nil, false, false)
  462. //qu.Debug(IsMatchNotKey, "------------------------------------------------------------")
  463. if !IsMatchNotKey { //排除词未匹配,匹配附加词关键词
  464. // log.Println(j, tr.TagNames[j])
  465. if RegMatch(fieldText, tr.AW[j].MatchType, tr.AW[j].KeyReg, nil, nil, false, true) && RegMatch(fieldText, tr.KW[j].MatchType, tr.KW[j].KeyReg, matchKeyTag, matchKeyTypeTag, true, false) {
  466. tagname := tr.TagNames[j]
  467. tagBuyerClass := tr.BuyerClass[j]
  468. if tagBuyerClass != "" {
  469. if strings.Contains(tagBuyerClass, tmpBuyerClass) {
  470. if tagname == "" {
  471. tempList := []string{}
  472. for k, _ := range matchKeyTag {
  473. tempList = append(tempList, k)
  474. }
  475. tagname = strings.Join(tempList, ",")
  476. log.Println("=====tagname为空取匹配词为标签名称", tagname)
  477. }
  478. //qu.Debug("tagname-----", tagname)
  479. tagNameMap[tagname] = true
  480. tagIdMap[tr.ID] = true
  481. }
  482. } else {
  483. if tagname == "" {
  484. tempList := []string{}
  485. for k, _ := range matchKeyTag {
  486. tempList = append(tempList, k)
  487. }
  488. tagname = strings.Join(tempList, ",")
  489. log.Println("=====tagname为空取匹配词为标签名称", tagname)
  490. }
  491. //qu.Debug("tagname-----", tagname)
  492. tagNameMap[tagname] = true
  493. tagIdMap[tr.ID] = true
  494. }
  495. }
  496. }
  497. }
  498. }
  499. }
  500. //tagname
  501. tagNameArr := MapDataToArr(tagNameMap)
  502. tagIdArr := MapDataToArr(tagIdMap)
  503. if len(tagNameArr) > 0 {
  504. tmp["tagname"] = strings.Join(tagNameArr, ",")
  505. if DisPackageAppidMap[c.AppId] {
  506. tmp["buyer_type"] = strings.Join(tagNameArr, ",")
  507. }
  508. if c.PushModel == 2 {
  509. tmp["item"] = strings.Join(tagNameArr, ",")
  510. }
  511. tmp["tagid"] = strings.Join(tagIdArr, ",")
  512. }
  513. }
  514. if c.IsTagRule2 {
  515. tagNameMap := map[string]bool{}
  516. tagIdMap := map[string]bool{}
  517. //qu.Debug("c.TagRules---", len(c.TagRules))
  518. //log.Println(c.TagRules,"=========",)
  519. for _, tr := range c.TagRules2 {
  520. if tr.DepartRuleIds[sr.ID] {
  521. // log.Println(tr.TagNames, "===========打标签")
  522. //先获取用到的所有字段值
  523. for field, _ := range tr.Fields {
  524. if fieldText[field] == nil { //补充fieldText
  525. text := qu.ObjToString(tmp[field])
  526. text = ProcessData(text) //处理文本(字母转大写,删除一些符号)
  527. fieldText[field] = text
  528. }
  529. }
  530. //qu.Debug("-------------------------开始排除词匹配--------------------------")
  531. //qu.Debug("tr.NW---", len(tr.NW))
  532. matchKeyTag := map[string]bool{} //记录所有标签里的匹配上的关键词
  533. matchKeyTypeTag := map[string]bool{} //记录标签里的关键词对应的匹配方式
  534. for j, tag_nw := range tr.NW { //排除词匹配
  535. IsMatchNotKey := RegMatch(fieldText, tag_nw.MatchType, tag_nw.KeyReg, nil, nil, false, false)
  536. //qu.Debug(IsMatchNotKey, "------------------------------------------------------------")
  537. if !IsMatchNotKey { //排除词未匹配,匹配附加词关键词
  538. // log.Println(j, tr.TagNames[j])
  539. if RegMatch(fieldText, tr.AW[j].MatchType, tr.AW[j].KeyReg, nil, nil, false, true) && RegMatch(fieldText, tr.KW[j].MatchType, tr.KW[j].KeyReg, matchKeyTag, matchKeyTypeTag, true, false) {
  540. tagname := tr.TagNames[j]
  541. tagBuyerClass := tr.BuyerClass[j]
  542. if tagBuyerClass != "" {
  543. if strings.Contains(tagBuyerClass, tmpBuyerClass) {
  544. if tagname == "" {
  545. tempList := []string{}
  546. for k, _ := range matchKeyTag {
  547. tempList = append(tempList, k)
  548. }
  549. tagname = strings.Join(tempList, ",")
  550. log.Println("=====tagname为空取匹配词为标签名称", tagname)
  551. }
  552. //qu.Debug("tagname-----", tagname)
  553. tagNameMap[tagname] = true
  554. tagIdMap[tr.ID] = true
  555. }
  556. } else {
  557. if tagname == "" {
  558. tempList := []string{}
  559. for k, _ := range matchKeyTag {
  560. tempList = append(tempList, k)
  561. }
  562. tagname = strings.Join(tempList, ",")
  563. log.Println("=====tagname为空取匹配词为标签名称", tagname)
  564. }
  565. //qu.Debug("tagname-----", tagname)
  566. tagNameMap[tagname] = true
  567. tagIdMap[tr.ID] = true
  568. }
  569. }
  570. }
  571. }
  572. }
  573. }
  574. //tagname
  575. tagNameArr := MapDataToArr(tagNameMap)
  576. tagIdArr := MapDataToArr(tagIdMap)
  577. if len(tagNameArr) > 0 {
  578. tmp["tagname2"] = strings.Join(tagNameArr, ",")
  579. if DisPackageAppidMap[c.AppId] {
  580. tmp["buyer_type2"] = strings.Join(tagNameArr, ",")
  581. }
  582. if c.PushModel == 2 {
  583. tmp["item2"] = strings.Join(tagNameArr, ",")
  584. }
  585. tmp["tagid2"] = strings.Join(tagIdArr, ",")
  586. }
  587. }
  588. if c.IsTagRule3 {
  589. tagNameMap := map[string]bool{}
  590. tagIdMap := map[string]bool{}
  591. //qu.Debug("c.TagRules---", len(c.TagRules))
  592. //log.Println(c.TagRules,"=========",)
  593. for _, tr := range c.TagRules3 {
  594. if tr.DepartRuleIds[sr.ID] {
  595. // log.Println(tr.TagNames, "===========打标签")
  596. //先获取用到的所有字段值
  597. for field, _ := range tr.Fields {
  598. if fieldText[field] == nil { //补充fieldText
  599. text := qu.ObjToString(tmp[field])
  600. text = ProcessData(text) //处理文本(字母转大写,删除一些符号)
  601. fieldText[field] = text
  602. }
  603. }
  604. //qu.Debug("-------------------------开始排除词匹配--------------------------")
  605. //qu.Debug("tr.NW---", len(tr.NW))
  606. matchKeyTag := map[string]bool{} //记录所有标签里的匹配上的关键词
  607. matchKeyTypeTag := map[string]bool{} //记录标签里的关键词对应的匹配方式
  608. for j, tag_nw := range tr.NW { //排除词匹配
  609. IsMatchNotKey := RegMatch(fieldText, tag_nw.MatchType, tag_nw.KeyReg, nil, nil, false, false)
  610. //qu.Debug(IsMatchNotKey, "------------------------------------------------------------")
  611. if !IsMatchNotKey { //排除词未匹配,匹配附加词关键词
  612. // log.Println(j, tr.TagNames[j])
  613. if RegMatch(fieldText, tr.AW[j].MatchType, tr.AW[j].KeyReg, nil, nil, false, true) && RegMatch(fieldText, tr.KW[j].MatchType, tr.KW[j].KeyReg, matchKeyTag, matchKeyTypeTag, true, false) {
  614. tagname := tr.TagNames[j]
  615. tagBuyerClass := tr.BuyerClass[j]
  616. if tagBuyerClass != "" {
  617. if strings.Contains(tagBuyerClass, tmpBuyerClass) {
  618. if tagname == "" {
  619. tempList := []string{}
  620. for k, _ := range matchKeyTag {
  621. tempList = append(tempList, k)
  622. }
  623. tagname = strings.Join(tempList, ",")
  624. log.Println("=====tagname为空取匹配词为标签名称", tagname)
  625. }
  626. //qu.Debug("tagname-----", tagname)
  627. tagNameMap[tagname] = true
  628. tagIdMap[tr.ID] = true
  629. }
  630. } else {
  631. if tagname == "" {
  632. tempList := []string{}
  633. for k, _ := range matchKeyTag {
  634. tempList = append(tempList, k)
  635. }
  636. tagname = strings.Join(tempList, ",")
  637. log.Println("=====tagname为空取匹配词为标签名称", tagname)
  638. }
  639. //qu.Debug("tagname-----", tagname)
  640. tagNameMap[tagname] = true
  641. tagIdMap[tr.ID] = true
  642. }
  643. }
  644. }
  645. }
  646. }
  647. }
  648. //tagname
  649. tagNameArr := MapDataToArr(tagNameMap)
  650. tagIdArr := MapDataToArr(tagIdMap)
  651. if len(tagNameArr) > 0 {
  652. tmp["tagname3"] = strings.Join(tagNameArr, ",")
  653. if DisPackageAppidMap[c.AppId] {
  654. tmp["buyer_type3"] = strings.Join(tagNameArr, ",")
  655. }
  656. if c.PushModel == 2 {
  657. tmp["item3"] = strings.Join(tagNameArr, ",")
  658. }
  659. tmp["tagid3"] = strings.Join(tagIdArr, ",")
  660. }
  661. }
  662. //item
  663. switch c.PushModel {
  664. case 0:
  665. tmp["item"] = "数据"
  666. case 1:
  667. tmp["item"] = dm.Name
  668. case 2:
  669. //tmp["item"] = sr.Name
  670. case 3:
  671. tmp["item"] = dm.Name + "_" + sr.Name
  672. case 4:
  673. tmp["item"] = sr.Name
  674. }
  675. //appid
  676. tmp["appid"] = c.AppId
  677. //部门名称
  678. tmp["departname"] = dm.Name
  679. tmp["departid"] = dm.ID
  680. //存储数据
  681. dm.DataLock.Lock()
  682. //qu.Debug("tmp---", tmp)
  683. tmpMap := map[string]interface{}{id: tmp}
  684. dm.DepartmentData[sr.ID] = append(dm.DepartmentData[sr.ID], tmpMap)
  685. dm.DataLock.Unlock()
  686. } else {
  687. qu.Debug("------------", id, IsMatch)
  688. }
  689. }
  690. numDocs += 1
  691. if numDocs%500 == 0 {
  692. log.Println("Current:", numDocs)
  693. }
  694. }
  695. scrollId = searchResult.ScrollId
  696. count++
  697. }
  698. // wg.Wait()
  699. client.ClearScroll().ScrollId(scrollId).Do(ctx) //清理游标
  700. log.Println("SearchRule ID", sr.ID, "Result Data Count:", numDocs)
  701. } else {
  702. log.Println("Customer:", c.Name, "Departmnet", dm.Name, "TagName", sr.Name, "Es Search Data Error,Tag ID:", sr.ID)
  703. }
  704. time.Sleep(2 * time.Second)
  705. }
  706. }
  707. }
  708. // 数据去重
  709. func (c *Customer) RemoveRepeatData() {
  710. log.Println("开始数据去重...")
  711. defer qu.Catch()
  712. for _, dm := range c.Departments {
  713. for _, dataMapArr := range dm.DepartmentData { //一个部门的所有数据
  714. for _, dataMap := range dataMapArr {
  715. for dataId, data := range dataMap {
  716. tmp := data.(map[string]interface{})
  717. if c.PushModel == 0 { //全局模式所有数据去重
  718. if c.SaveDataMap[dataId] == nil {
  719. c.SaveDataMap[dataId] = tmp
  720. } else { //数据重复
  721. cus_history := c.SaveDataMap[dataId]
  722. MergeData(cus_history, tmp, c.IsTagRule, true, c.PushModel) //合并字段
  723. //c.SaveDataMap[dataId] = cus_history
  724. }
  725. } else if c.PushModel == 2 || c.PushModel == 3 { //部门内部去重
  726. if dm.SaveDataMap[dataId] == nil {
  727. dm.SaveDataMap[dataId] = tmp
  728. } else { //数据重复
  729. dm_history := dm.SaveDataMap[dataId]
  730. MergeData(dm_history, tmp, c.IsTagRule, false, c.PushModel) //合并字段
  731. //dm.SaveDataMap[dataId] = dm_history
  732. }
  733. } else if c.PushModel == 4 { //规则模式不去重
  734. //c.SaveDataArr = append(c.SaveDataArr, tmp)
  735. if c.SaveDataArr[dataId] == nil {
  736. tmp["itemdist"] = map[string]interface{}{qu.ObjToString(tmp["item"]): qu.ObjToString(tmp["matchkey"])}
  737. c.SaveDataArr[dataId] = tmp
  738. } else { //数据重复
  739. dm_history := c.SaveDataArr[dataId]
  740. MergeDatas(dm_history, tmp, c.IsTagRule, false) //合并字段
  741. //dm.SaveDataMap[dataId] = dm_history
  742. }
  743. } else if c.PushModel == 1 {
  744. if c.SaveDataMap[dataId] == nil {
  745. tmp["itemdist"] = map[string]interface{}{qu.ObjToString(tmp["item"]): qu.ObjToString(tmp["matchkey"])}
  746. c.SaveDataMap[dataId] = tmp
  747. } else { //数据重复
  748. dm_history := c.SaveDataMap[dataId]
  749. MergeData(dm_history, tmp, c.IsTagRule, true, c.PushModel) //合并字段
  750. }
  751. }
  752. }
  753. }
  754. }
  755. //将部门数据清空
  756. dm.DepartmentData = map[string][]map[string]interface{}{}
  757. }
  758. }
  759. // 组装保存数据
  760. func (c *Customer) AssembelAndSaveData() {
  761. log.Println("开始组装保存数据...")
  762. defer qu.Catch()
  763. ch := make(chan bool, 10)
  764. wg := &sync.WaitGroup{}
  765. n := 0
  766. if (c.PushModel == 0 || c.PushModel == 1) && len(c.SaveDataMap) > 0 {
  767. for _, tmp := range c.SaveDataMap {
  768. wg.Add(1)
  769. ch <- true
  770. go func(data map[string]interface{}) {
  771. defer func() {
  772. <-ch
  773. wg.Done()
  774. }()
  775. ok := AssembelSave(data, c.IsSearchHosp, c.IsSearchEnps, c.AppId, c.DataSave)
  776. if !ok {
  777. n--
  778. }
  779. }(tmp)
  780. n++
  781. if n%500 == 0 {
  782. log.Println("Current:", n)
  783. }
  784. }
  785. wg.Wait()
  786. } else if c.PushModel == 2 || c.PushModel == 3 {
  787. for _, dm := range c.Departments {
  788. if len(dm.SaveDataMap) > 0 {
  789. for _, tmp := range dm.SaveDataMap {
  790. wg.Add(1)
  791. ch <- true
  792. go func(data map[string]interface{}) {
  793. defer func() {
  794. <-ch
  795. wg.Done()
  796. }()
  797. ok := AssembelSave(data, c.IsSearchHosp, c.IsSearchEnps, c.AppId, c.DataSave)
  798. if !ok {
  799. n--
  800. }
  801. }(tmp)
  802. n++
  803. if n%500 == 0 {
  804. log.Println("Current:", n)
  805. }
  806. }
  807. }
  808. }
  809. wg.Wait()
  810. } else if c.PushModel == 4 && len(c.SaveDataArr) > 0 {
  811. for _, tmp := range c.SaveDataArr {
  812. wg.Add(1)
  813. ch <- true
  814. go func(data map[string]interface{}) {
  815. defer func() {
  816. <-ch
  817. wg.Done()
  818. }()
  819. ok := AssembelSave(data, c.IsSearchHosp, c.IsSearchEnps, c.AppId, c.DataSave)
  820. if !ok {
  821. n--
  822. }
  823. }(tmp)
  824. n++
  825. if n%500 == 0 {
  826. log.Println("Current:", n)
  827. }
  828. }
  829. wg.Wait()
  830. }
  831. log.Println("数据保存完毕... Save Number:", n)
  832. }
  833. // 获取用户所有规则
  834. func (d *Department) GetSearchRules(cid, stype string, idRange, idRanges bson.M) {
  835. defer qu.Catch()
  836. searchRules, _ := MgoTag.Find("euserdepartrule", map[string]interface{}{"s_userid": cid, "s_departid": d.ID, "i_isuse": 1, "b_delete": false}, nil, nil)
  837. if len(searchRules) > 0 {
  838. for _, sr := range searchRules {
  839. SR := &SearchRule{}
  840. SR.Fields = make(map[string]interface{})
  841. id := mgoutil.BsonTOStringId(sr["_id"])
  842. name := qu.ObjToString(sr["s_name"])
  843. SR.ID = id
  844. SR.Name = name
  845. SR.CustomerID = cid
  846. SR.DepartmentID = d.ID
  847. SR.ExtFieldType = qu.IntAll(sr["i_extfieldstype"])
  848. //SR.RuleData = &sync.Map{}
  849. esquery := qu.ObjToString(sr["s_esquery"])
  850. if IsNewSql != 0 {
  851. esquery = qu.ObjToString(sr["s_esquery_search"])
  852. }
  853. clearKey := qu.ObjToString(sr["s_globalclearkey"])
  854. clearKeyMatch := qu.ObjToString(sr["s_globalclearkeymatch"])
  855. //获取es
  856. if stype == "history" {
  857. SR.EsQuery = esquery
  858. } else {
  859. SR.GetEs(d.Name, esquery, idRange, idRanges)
  860. }
  861. //获取关键词和附加词
  862. if o_rules, ok := sr["o_rules"].(primitive.A); ok && len(o_rules) > 0 {
  863. SR.GetKeyAddWord(o_rules)
  864. for _, v := range o_rules {
  865. orule, _ := v.(map[string]interface{})
  866. SR.Maths = append(SR.Maths, map[string]string{
  867. "s_matchkey": qu.ObjToString(orule["s_matchkey"]),
  868. "s_keymatch": qu.ObjToString(orule["s_keymatch"]),
  869. "s_group": qu.ObjToString(orule["s_group"]),
  870. })
  871. }
  872. }
  873. //获取全局清理词
  874. SR.GetClearWord(clearKey, clearKeyMatch)
  875. d.Rules = append(d.Rules, SR)
  876. }
  877. }
  878. }
  879. // 获取转换后的es语句
  880. func (sr *SearchRule) GetEs(department, esquery string, tmpRange, tmpRanges bson.M) {
  881. defer qu.Catch()
  882. query := map[string]*QueryObjecct{}
  883. if json.Unmarshal([]byte(esquery), &query) == nil {
  884. qb := query["query"]
  885. filter := qb.Bool
  886. if filter != nil { //有filter
  887. index := -1 //记录range的位置
  888. for i, m := range filter.Must {
  889. mMap := m.(map[string]interface{})
  890. if esRange, ok := mMap["range"].(map[string]interface{}); ok && esRange != nil { //有range
  891. if esRange["publishtime"] != nil {
  892. index = i
  893. break
  894. }
  895. }
  896. }
  897. if index > -1 {
  898. filter.Must[index] = tmpRange
  899. } else {
  900. filter.Must = append(filter.Must, tmpRange)
  901. }
  902. if len(tmpRanges) > 0 {
  903. filter.Must = append(filter.Must, tmpRanges)
  904. }
  905. } else { //无filter则添加
  906. bo := &BoolObject{}
  907. bo.Must = append(bo.Must, tmpRange)
  908. if len(tmpRanges) > 0 {
  909. bo.Must = append(bo.Must, tmpRanges)
  910. }
  911. // tmpFilter := &Filter{
  912. // Bool: bo,
  913. // }
  914. qb.Bool = bo
  915. }
  916. strquery, err := json.Marshal(query)
  917. if err == nil {
  918. sr.EsQuery = string(strquery)
  919. } else {
  920. log.Println("Department:", department, "Es Error,Tag ID:", sr.ID)
  921. }
  922. } else {
  923. log.Println("Department:", department, "Es Error,Tag ID:", sr.ID)
  924. }
  925. }
  926. // 全局清理词处理
  927. func (sr *SearchRule) GetClearWord(key, match string) {
  928. defer qu.Catch()
  929. //匹配方式
  930. cwmArr := []string{}
  931. for _, mv := range strings.Split(match, ",") {
  932. if field := qu.ObjToString(MatchType[mv]); field != "" {
  933. cwmArr = append(cwmArr, field)
  934. sr.Fields[field] = true
  935. }
  936. }
  937. //清理词正则
  938. cwkArr := []*regexp.Regexp{}
  939. for _, kv := range strings.Split(key, ",") {
  940. if LetterCase.MatchString(kv) { //字母转大写
  941. kv = strings.ToUpper(kv)
  942. }
  943. reg := regexp.MustCompile(kv)
  944. cwkArr = append(cwkArr, reg)
  945. }
  946. cw := &ClearWord{
  947. KeyReg: cwkArr,
  948. MatchType: cwmArr,
  949. }
  950. sr.GCW = cw
  951. }
  952. // 关键词、附加词处理
  953. func (sr *SearchRule) GetKeyAddWord(o_rules primitive.A) {
  954. defer qu.Catch()
  955. kw, aw, _, _, _ := GetNotkeyAndKeyAddWord(o_rules, sr.Fields, false)
  956. sr.KW = kw
  957. sr.AW = aw
  958. //sr.Fields = fields
  959. }
  960. // 排除词、关键词、附加词处理
  961. func (tr *TagRule) GetKeyAddNotKeyWord(o_list primitive.A) {
  962. defer qu.Catch()
  963. kw, aw, nkw, tagnames, buyerclass := GetNotkeyAndKeyAddWord(o_list, tr.Fields, true)
  964. tr.NW = nkw
  965. tr.KW = kw
  966. tr.AW = aw
  967. //tr.Fields = fields
  968. tr.TagNames = tagnames
  969. tr.BuyerClass = buyerclass
  970. }
  971. func GetMactchKeys(match []map[string]string, data map[string]interface{}) string {
  972. keyWord := []string{}
  973. for _, keys := range match {
  974. types := keys["s_keymatch"]
  975. key := keys["s_matchkey"]
  976. if strings.Contains(types, "1") {
  977. title := qu.ObjToString(data["title"])
  978. keyWord = KeyWordToDatas(types, title, key, keyWord)
  979. }
  980. if strings.Contains(types, "2") {
  981. detail := qu.ObjToString(data["detail"])
  982. keyWord = KeyWordToDatas(types, detail, key, keyWord)
  983. }
  984. if strings.Contains(types, "3") {
  985. purchasing := qu.ObjToString(data["purchasing"])
  986. keyWord = KeyWordToDatas(types, purchasing, key, keyWord)
  987. }
  988. if strings.Contains(types, "4") {
  989. filetext := qu.ObjToString(data["filetext"])
  990. keyWord = KeyWordToDatas(types, filetext, key, keyWord)
  991. }
  992. if strings.Contains(types, "5") {
  993. projectname := qu.ObjToString(data["projectname"])
  994. keyWord = KeyWordToDatas(types, projectname, key, keyWord)
  995. }
  996. if strings.Contains(types, "6") || strings.Contains(types, "8") {
  997. buyer := qu.ObjToString(data["buyer"])
  998. keyWord = KeyWordToDatas(types, buyer, key, keyWord)
  999. }
  1000. if strings.Contains(types, "7") || strings.Contains(types, "9") {
  1001. winner := qu.ObjToString(data["s_winner"])
  1002. keyWord = KeyWordToDatas(types, winner, key, keyWord)
  1003. }
  1004. }
  1005. keyMap := map[string]bool{}
  1006. keyArr := []string{}
  1007. for _, key := range keyWord {
  1008. keyMap[key] = true
  1009. }
  1010. for k, _ := range keyMap {
  1011. keyArr = append(keyArr, k)
  1012. }
  1013. return strings.Join(keyArr, ",")
  1014. }
  1015. func KeyWordToDatas(types, item, key string, keyWord []string) []string {
  1016. for _, mk := range strings.Split(key, ",") {
  1017. if strings.Contains(mk, "&&") {
  1018. arr := strings.Split(mk, "&&")
  1019. isok := true
  1020. for _, s := range arr {
  1021. if s != "" {
  1022. if !strings.Contains(strings.ToUpper(item), strings.ToUpper(s)) {
  1023. isok = false
  1024. }
  1025. }
  1026. }
  1027. if isok {
  1028. keyWord = append(keyWord, mk)
  1029. }
  1030. } else {
  1031. if strings.Contains(strings.ToUpper(item), strings.ToUpper(mk)) {
  1032. keyWord = append(keyWord, mk)
  1033. }
  1034. }
  1035. }
  1036. return keyWord
  1037. }
  1038. func exactMatchs(rule, title, detail string, match []map[string]string, nameArr []string) bool {
  1039. realdata := map[string]float64{}
  1040. for _, v := range nameArr {
  1041. realdata["title_"+v] = 0
  1042. realdata["content_"+v] = 0
  1043. }
  1044. mapping := map[string]string{
  1045. " and ": " && ",
  1046. " or ": " || ",
  1047. " not ": " ! ",
  1048. }
  1049. for k, v := range mapping {
  1050. rule = strings.ReplaceAll(rule, k, v)
  1051. }
  1052. //可以将编译后的表达式,存放在缓存中
  1053. program, err := expr.Compile(rule, expr.Env(realdata))
  1054. if err != nil {
  1055. log.Println("表达式错误 ", err)
  1056. return false
  1057. }
  1058. for _, keys := range match {
  1059. types := keys["s_keymatch"] // 1,2,3
  1060. key := keys["s_matchkey"] //软件,工程
  1061. group := keys["s_group"]
  1062. if strings.Contains(types, "1") {
  1063. for _, v := range strings.Split(key, ",") {
  1064. if strings.Contains(strings.ToUpper(title), strings.ToUpper(v)) {
  1065. realdata["title_"+group] = realdata["title_"+group] + 1
  1066. }
  1067. }
  1068. }
  1069. if strings.Contains(types, "2") {
  1070. for _, v := range strings.Split(key, ",") {
  1071. if strings.Contains(strings.ToUpper(detail), strings.ToUpper(v)) {
  1072. realdata["content_"+group] = realdata["content_"+group] + 1
  1073. }
  1074. }
  1075. }
  1076. }
  1077. log.Println("匹配结果 ", realdata)
  1078. output, err := expr.Run(program, realdata)
  1079. if err != nil {
  1080. log.Println("表达式执行错误 ", err)
  1081. return false
  1082. }
  1083. return output.(bool)
  1084. }