package main import ( "github.com/go-xweb/log" qu "qfw/util" "regexp" "unicode/utf8" ) var effective *regexp.Regexp = regexp.MustCompile("^(([A-Za-z]+)?[\u4e00-\u9fa5]+)$") func resourceCleanWordsInfo() { datas := *MysqlDevTool.Find("seo_resource", nil, "", "id", -1, -1) log.Println("最终清洗阶段~", len(datas)) isok := 0 /* 1、保留全汉字的标的物 2、保留标的物开头为英文字母的词,比如dsp信号发生器、abs管材、B超 3、长度均在2-6 */ for k, v := range datas { if k%1000 == 0 { log.Println("cur index ", k, "~", isok) } name := qu.ObjToString(v["name"]) letter := qu.ObjToString(v["letter"]) l := utf8.RuneCountInString(name) if l < 2 || l > 6 || letter == "" { continue } //是否全中文 if !effective.MatchString(name) { continue } isok++ info := v delete(info, "id") InsertMysqlDevData("seo_resource_copy", info, qu.ObjToString(v["id"])) Source_Mgo.Save("seo_resource_words", info) } log.Println("is over ", len(datas), "~", isok) }