|
@@ -0,0 +1,121 @@
|
|
|
+package main
|
|
|
+
|
|
|
+import (
|
|
|
+ "log"
|
|
|
+ "regexp"
|
|
|
+ "time"
|
|
|
+ //"qfw/util"
|
|
|
+ "sync"
|
|
|
+
|
|
|
+ . "gopkg.in/mgo.v2/bson"
|
|
|
+)
|
|
|
+
|
|
|
+var wg *sync.WaitGroup
|
|
|
+var lock *sync.Mutex
|
|
|
+var letter *regexp.Regexp = regexp.MustCompile(`^[a-zA-Z]+&?`)
|
|
|
+
|
|
|
+func InitBrand() {
|
|
|
+ //初始化db
|
|
|
+ sess := brandMgo.GetMgoConn()
|
|
|
+ defer brandMgo.DestoryMongoConn(sess)
|
|
|
+
|
|
|
+ BrandDFA = &DFA{
|
|
|
+ Link: make(map[string]interface{}),
|
|
|
+ }
|
|
|
+ //查品牌库品牌
|
|
|
+ var res []M
|
|
|
+ //c, _ := sess.DB("spider").C("JD_commodity").Count()
|
|
|
+ sess.DB(brandDbname).C(brandCollname).Pipe([]M{M{"$group": M{"_id": "$brand"}}}).All(&res)
|
|
|
+ n := 0
|
|
|
+ for _, b := range res {
|
|
|
+ brand := b["_id"].(string)
|
|
|
+ if len(brand) > 50 || len(brand) == 1 || (len(brand) == 3 && !letter.MatchString(brand)) {
|
|
|
+ log.Println("err brand:", brand)
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ n++
|
|
|
+ BrandDFA.AddWord(brand) //将品牌库加入DFA
|
|
|
+ }
|
|
|
+ log.Println("brand num :", n)
|
|
|
+}
|
|
|
+
|
|
|
+func UdpTask(sid, eid string) {
|
|
|
+ t := time.Now()
|
|
|
+ log.Println("执行任务")
|
|
|
+ query := M{"_id": M{"$gte": ObjectIdHex(sid), "$lte": ObjectIdHex(eid)}}
|
|
|
+ //附件库
|
|
|
+ sess := appendixMgo.GetMgoConn()
|
|
|
+ defer appendixMgo.DestoryMongoConn(sess)
|
|
|
+
|
|
|
+ poolSize := make(chan bool, pool)
|
|
|
+ wg = &sync.WaitGroup{}
|
|
|
+ lock = &sync.Mutex{} //控制读写
|
|
|
+ update := [][]map[string]interface{}{} //批量更新的数据
|
|
|
+
|
|
|
+ data := sess.DB(appendixDbname).C(appendixCollname).Find(query).Sort("_id").Iter()
|
|
|
+ sum := 0
|
|
|
+ for tmp := make(map[string]interface{}); data.Next(tmp); sum++ {
|
|
|
+ if sum%100 == 0 {
|
|
|
+ log.Println("current:", sum)
|
|
|
+ }
|
|
|
+ poolSize <- true
|
|
|
+ wg.Add(1)
|
|
|
+ go func(d map[string]interface{}) {
|
|
|
+ defer func() {
|
|
|
+ <-poolSize
|
|
|
+ wg.Done()
|
|
|
+ }()
|
|
|
+
|
|
|
+ brandArr := GetBrand(d)
|
|
|
+ if len(brandArr) > 0 { //匹配到品牌再处理
|
|
|
+ tmpArr := []map[string]interface{}{} //存储某条数据的id和要更新内容
|
|
|
+ _id := map[string]interface{}{
|
|
|
+ "_id": d["_id"],
|
|
|
+ }
|
|
|
+ tmpArr = append(tmpArr, _id)
|
|
|
+ // pushAll := map[string]interface{}{
|
|
|
+ // "$pushAll": map[string]interface{}{
|
|
|
+ // "conbrand": brandArr,
|
|
|
+ // },
|
|
|
+ // }
|
|
|
+ addToSet := map[string]interface{}{
|
|
|
+ "$addToSet": map[string]interface{}{
|
|
|
+ "conbrand": map[string]interface{}{
|
|
|
+ "$each": brandArr,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ tmpArr = append(tmpArr, addToSet)
|
|
|
+ lock.Lock()
|
|
|
+ update = append(update, tmpArr)
|
|
|
+ if len(update) > savesize {
|
|
|
+ appendixMgo.UpdateBulk(appendixCollname, update...)
|
|
|
+ update = [][]map[string]interface{}{} //更新后把数据置空
|
|
|
+ }
|
|
|
+ lock.Unlock()
|
|
|
+ }
|
|
|
+ }(tmp)
|
|
|
+ tmp = make(map[string]interface{})
|
|
|
+ }
|
|
|
+ wg.Wait()
|
|
|
+ lock.Lock()
|
|
|
+ if len(update) > 0 {
|
|
|
+ appendixMgo.UpdateBulk(appendixCollname, update...)
|
|
|
+ update = [][]map[string]interface{}{} //更新后把数据置空
|
|
|
+ }
|
|
|
+ lock.Unlock()
|
|
|
+ log.Println("--task over--", time.Since(t).Seconds())
|
|
|
+}
|
|
|
+
|
|
|
+func GetBrand(data map[string]interface{}) (brandArr []string) {
|
|
|
+ if projectinfo, ok := data["projectinfo"].(map[string]interface{}); ok {
|
|
|
+ attachments := projectinfo["attachments"].(map[string]interface{})
|
|
|
+ for _, m := range attachments {
|
|
|
+ val := m.(map[string]interface{})
|
|
|
+ if content, ok := val["content"].(string); ok { //附件文本
|
|
|
+ brandArr = append(brandArr, BrandDFA.CheckSensitiveWord(content)...)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|