duxin 1 năm trước cách đây
mục cha
commit
aa368a663a
7 tập tin đã thay đổi với 347 bổ sung146 xóa
  1. 1 2
      config.json
  2. 82 0
      config/config.go
  3. 110 0
      exclusionary.txt
  4. 2 5
      go.mod
  5. 3 4
      main.go
  6. 77 135
      timedTask/timed.go
  7. 72 0
      util/match.go

+ 1 - 2
config.json

@@ -85,12 +85,11 @@
   }
  ],
 
- "userMail": "duxin@topnet.net.cn",
+ "userMail": "wangchaojun@topnet.net.cn",
 
  "subWordLimit": 10,
  "everydayWordLimit": 2,
  "userTimeLimit": 1,
  "wordDuration": 3
-
 }
 

+ 82 - 0
config/config.go

@@ -0,0 +1,82 @@
+package config
+
+import (
+	qutil "app.yhyue.com/moapp/jybase/common"
+	"app.yhyue.com/moapp/jybase/mail"
+	mg "app.yhyue.com/moapp/jybase/mongodb"
+)
+
+type config struct {
+	Mail []struct {
+		Addr string `json:"addr"`
+		Port int    `json:"port"`
+		Pwd  string `json:"pwd"`
+		User string `json:"user"`
+	} `json:"mail"`
+
+	UserTimeLimit     int `json:"userTimeLimit"`
+	EverydayWordLimit int `json:"everydayWordLimit"`
+	Mongodb           struct {
+		Main struct {
+			Address string `json:"address"`
+			Size    int    `json:"size"`
+			DbName  string `json:"dbName"`
+		} `json:"main"`
+		Log struct {
+			Address  string `json:"address"`
+			Size     int    `json:"size"`
+			DbName   string `json:"dbName"`
+			UserName string `json:"userName"`
+			Password string `json:"password"`
+		} `json:"log"`
+	} `json:"mongodb"`
+	Redis struct {
+		Main struct {
+			Address string `json:"address"`
+		} `json:"main"`
+	} `json:"redis"`
+	SubWordLimit int `json:"subWordLimit"`
+	WordDuration int `json:"wordDuration"`
+
+	UserMail string `json:"userMail"`
+}
+
+var (
+	Mgo    *mg.MongodbSim
+	MgoLog *mg.MongodbSim
+
+	Config *config
+
+	GmailAuth []*mail.GmailAuth
+)
+
+func init() {
+	qutil.ReadConfig(&Config)
+	Mgo = &mg.MongodbSim{
+		MongodbAddr: Config.Mongodb.Main.Address,
+		Size:        Config.Mongodb.Main.Size,
+		DbName:      Config.Mongodb.Main.DbName,
+		UserName:    "",
+		Password:    "",
+		ReplSet:     "",
+	}
+	MgoLog = &mg.MongodbSim{
+		MongodbAddr: Config.Mongodb.Log.Address,
+		Size:        Config.Mongodb.Log.Size,
+		DbName:      Config.Mongodb.Log.DbName,
+		UserName:    Config.Mongodb.Log.UserName,
+		Password:    Config.Mongodb.Log.Password,
+		ReplSet:     "",
+	}
+	MgoLog.InitPool()
+	Mgo.InitPool()
+
+	for _, v := range Config.Mail {
+		GmailAuth = append(GmailAuth, &mail.GmailAuth{
+			SmtpHost: v.Addr,
+			SmtpPort: v.Port,
+			User:     v.User,
+			Pwd:      v.Pwd,
+		})
+	}
+}

+ 110 - 0
exclusionary.txt

@@ -0,0 +1,110 @@
+服务
+项目
+综合
+公司
+关于
+有限公司
+信息
+一期
+二期
+三期
+招标
+及安装
+及配套
+及附属
+及相关
+及设备
+及维修
+及其他
+及维护
+及配送
+及安全
+及室外
+及绿化
+及设施
+及系统
+及周边
+及附件
+及运营
+及综合
+及调试
+及消防
+及监控
+及保养
+及网络
+及辅助
+及验收
+及应用
+及部分
+及环境
+及集成
+及设计
+活动
+名称
+工作
+2022年
+2023年
+2024年
+关于为
+专项
+专用
+时间
+公共
+单位
+一次
+二次
+三次
+第二次
+第三次
+年度
+相关
+人员
+服务项目
+等设备
+普通
+总成
+及其
+中央
+分类
+有限责任
+公告
+与安装
+高效
+重大
+残疾人
+第二批
+全国
+的服务
+等工程
+等服务
+等维修
+委员会
+室设备
+内容
+性租赁
+类设备
+等材料
+等物资
+查询
+基于
+效果
+项目名称
+与管理
+类物资
+与应用
+发展有限公司
+情况
+与服务
+材料及
+问题
+以及
+编号
+品类
+以下
+与节水
+的询价
+总价
+入口
+小修
+超低
+有限

+ 2 - 5
go.mod

@@ -6,14 +6,12 @@ require (
 	app.yhyue.com/moapp/jybase v0.0.0-20231120053339-0b7406056861
 	github.com/robfig/cron v1.2.0
 	github.com/tealeg/xlsx v1.0.5
+	gopkg.in/yaml.v2 v2.4.0
 )
 
 require (
-	github.com/go-sql-driver/mysql v1.6.0 // indirect
 	github.com/go-stack/stack v1.8.0 // indirect
 	github.com/golang/snappy v0.0.4 // indirect
-	github.com/jinzhu/inflection v1.0.0 // indirect
-	github.com/jinzhu/now v1.1.1 // indirect
 	github.com/klauspost/compress v1.13.6 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/xdg-go/pbkdf2 v1.0.0 // indirect
@@ -24,6 +22,5 @@ require (
 	golang.org/x/crypto v0.0.0-20210920023735-84f357641f63 // indirect
 	golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect
 	golang.org/x/text v0.3.7 // indirect
-	gorm.io/driver/mysql v1.0.5 // indirect
-	gorm.io/gorm v1.21.3 // indirect
+	gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect
 )

+ 3 - 4
main.go

@@ -10,7 +10,6 @@ func main() {
 	c.AddFunc("0 0 23 * * *", func() {
 		//每日搜索统计
 		timedTask.DailySearchWordStatistics()
-		timedTask.WordXlsx()
 	})
 
 	c.AddFunc("0 0 0 1 * *", func() {
@@ -20,12 +19,12 @@ func main() {
 		timedTask.SearchWordStatistics()
 	})
 
-	c.AddFunc("0 0 8 * * *", func() {
-		//每日发送邮件
+	c.AddFunc("0 0 9 * * 1", func() {
+		timedTask.WordXlsx()
+		//每周一发送邮件
 		timedTask.SendMail()
 	})
 	c.Start()
 	defer c.Stop()
 	select {}
-
 }

+ 77 - 135
timedTask/timed.go

@@ -3,10 +3,10 @@ package timedTask
 import (
 	qutil "app.yhyue.com/moapp/jybase/common"
 	"app.yhyue.com/moapp/jybase/mail"
-	mg "app.yhyue.com/moapp/jybase/mongodb"
 	"encoding/json"
 	"fmt"
 	"github.com/tealeg/xlsx"
+	. "hostkword/config"
 	"hostkword/util"
 	"io"
 	"log"
@@ -18,41 +18,6 @@ import (
 	"unicode/utf8"
 )
 
-type config struct {
-	Mail []struct {
-		Addr string `json:"addr"`
-		Port int    `json:"port"`
-		Pwd  string `json:"pwd"`
-		User string `json:"user"`
-	} `json:"mail"`
-
-	UserTimeLimit     int `json:"userTimeLimit"`
-	EverydayWordLimit int `json:"everydayWordLimit"`
-	Mongodb           struct {
-		Main struct {
-			Address string `json:"address"`
-			Size    int    `json:"size"`
-			DbName  string `json:"dbName"`
-		} `json:"main"`
-		Log struct {
-			Address  string `json:"address"`
-			Size     int    `json:"size"`
-			DbName   string `json:"dbName"`
-			UserName string `json:"userName"`
-			Password string `json:"password"`
-		} `json:"log"`
-	} `json:"mongodb"`
-	Redis struct {
-		Main struct {
-			Address string `json:"address"`
-		} `json:"main"`
-	} `json:"redis"`
-	SubWordLimit int `json:"subWordLimit"`
-	WordDuration int `json:"wordDuration"`
-
-	UserMail string `json:"userMail"`
-}
-
 type keyAllData struct {
 	AItems []struct {
 		AKey []struct {
@@ -63,50 +28,47 @@ type keyAllData struct {
 	} `json:"a_items"`
 }
 
-var (
-	Mgo    *mg.MongodbSim
-	MgoLog *mg.MongodbSim
+type UserKeyWord struct {
+	OMemberJy struct {
+		AItems []struct {
+			AKey []struct {
+				Key       []string `json:"key"`
+				Appendkey []string `json:"appendkey"`
+			} `json:"a_key"`
+		} `json:"a_items"`
+	} `json:"o_member_jy"`
+	OVipjy struct {
+		AItems []struct {
+			AKey []struct {
+				Appendkey []string `json:"appendkey"`
+				Key       []string `json:"key"`
+			} `json:"a_key"`
+		} `json:"a_items"`
+	} `json:"o_vipjy"`
+	OJy struct {
+		AKey []struct {
+			Appendkey []string `json:"appendkey"`
+			Key       []string `json:"key"`
+		} `json:"a_key"`
+	} `json:"o_jy"`
+}
 
-	Config        *config
+var (
 	entWordNumber map[string]int
 	chineseRegex  = regexp.MustCompile(`\p{Han}`)
+)
 
-	GmailAuth []*mail.GmailAuth
+const (
+	filePath = "./hotWordFile/jyHotWord.xlsx"
 )
 
 func init() {
 	entWordNumber = make(map[string]int)
-	qutil.ReadConfig(&Config)
-	Mgo = &mg.MongodbSim{
-		MongodbAddr: Config.Mongodb.Main.Address,
-		Size:        Config.Mongodb.Main.Size,
-		DbName:      Config.Mongodb.Main.DbName,
-		UserName:    "",
-		Password:    "",
-		ReplSet:     "",
-	}
-	MgoLog = &mg.MongodbSim{
-		MongodbAddr: Config.Mongodb.Log.Address,
-		Size:        Config.Mongodb.Log.Size,
-		DbName:      Config.Mongodb.Log.DbName,
-		UserName:    Config.Mongodb.Log.UserName,
-		Password:    Config.Mongodb.Log.Password,
-		ReplSet:     "",
-	}
-	MgoLog.InitPool()
-	Mgo.InitPool()
 	//每月用户订阅词
 	UserWord()
 	//每月搜索统计搜索词
 	SearchWordStatistics()
-	for _, v := range Config.Mail {
-		GmailAuth = append(GmailAuth, &mail.GmailAuth{
-			SmtpHost: v.Addr,
-			SmtpPort: v.Port,
-			User:     v.User,
-			Pwd:      v.Pwd,
-		})
-	}
+	//注册过滤词
 }
 
 func timeFmt() (int64, int64) {
@@ -123,17 +85,17 @@ func timeFmt() (int64, int64) {
 // 每月1号统计新搜索词频
 func SearchWordStatistics() {
 	tm := time.Now()
-	startTime := tm.AddDate(0, -Config.WordDuration, 0)
-	currentTime := startTime
-	participleMap := make(map[string]int)
-	glossaryMap := make(map[string]int)
 	if MgoLog.Count("jy_hot_word", map[string]interface{}{
 		"statistical_type": 1,
-		"year":             time.Now().Year(),
-		"month":            int(time.Now().Month()),
+		"year":             tm.Year(),
+		"month":            int(tm.Month()),
 	}) > 0 { //已统计过当月数据
 		return
-	}
+	} // 避免本月重复统计
+	startTime := tm.AddDate(0, -Config.WordDuration, 0)
+	currentTime := startTime
+	participleMap := make(map[string]int)
+	glossaryMap := make(map[string]int)
 	pool := make(chan bool, 10)
 	wait := &sync.WaitGroup{}
 	var lock sync.Mutex
@@ -453,36 +415,15 @@ func UserMonthWord() {
 	}
 }
 
-type UserKeyWord struct {
-	OMemberJy struct {
-		AItems []struct {
-			AKey []struct {
-				Key       []string `json:"key"`
-				Appendkey []string `json:"appendkey"`
-			} `json:"a_key"`
-		} `json:"a_items"`
-	} `json:"o_member_jy"`
-	OVipjy struct {
-		AItems []struct {
-			AKey []struct {
-				Appendkey []string `json:"appendkey"`
-				Key       []string `json:"key"`
-			} `json:"a_key"`
-		} `json:"a_items"`
-	} `json:"o_vipjy"`
-	OJy struct {
-		AKey []struct {
-			Appendkey []string `json:"appendkey"`
-			Key       []string `json:"key"`
-		} `json:"a_key"`
-	} `json:"o_jy"`
-}
-
 func UserWordAll() []map[string]interface{} {
+	_, lt := IsOneDay()
 	sess := Mgo.GetMgoConn()
 	defer Mgo.DestoryMongoConn(sess)
 	keyWordMap := make(map[string]int)
 	iter := sess.DB(Mgo.DbName).C("user").Find(map[string]interface{}{
+		"l_registedate": map[string]interface{}{
+			"$lt": lt.Unix(),
+		},
 		"$or": []map[string]interface{}{
 			{"o_jy.a_key": map[string]interface{}{"$exists": 1}},
 			{"o_vipjy.a_items": map[string]interface{}{"$exists": 1}},
@@ -542,13 +483,15 @@ func UserWord() {
 		userArr, entArr         []string
 		userWordArr, entWordArr []map[string]int
 	)
-
 	dataMap := make(map[string]bool)
 	dataEntMap := make(map[string]bool)
 	UserMonthWord()          //个人月词
 	EntWord()                //企业月词
 	wordAll := UserWordAll() //个人总词
-
+	var userName, entName []string
+	if len(wordAll) > 0 {
+		userName = append(userName, "词频")
+	}
 	dMap := make(map[string]int)
 	for _, v := range wordAll { //个人订阅词频
 		keyWord := qutil.InterfaceToStr(v["keyWord"])
@@ -559,20 +502,25 @@ func UserWord() {
 		}
 	}
 	userWordArr = append(userWordArr, dMap)
+	var year, month int
+	switch time.Now().Month() {
+	case time.January: //一月统计往年全部数据
+		year = time.Now().Year() - 1
+		month = 13
+	default:
+		year = time.Now().Year()
+		month = int(time.Now().Month())
+	}
 
-	tm := time.Now()
-	var userStaMon, entStaMon int
-	for i := 1; i < int(tm.Month()); i++ {
+	for i := 1; i < month; i++ {
 		data, ok := MgoLog.Find("jy_hot_word", map[string]interface{}{
-			"year":             tm.Year(),
+			"year":             year,
 			"month":            i,
 			"statistical_type": 5,
 		}, `{"number":-1}`, "", false, -1, -1)
 		if ok && len(*data) > 0 {
 			dMonMap := make(map[string]int)
-			if userStaMon == 0 {
-				userStaMon = i
-			}
+			userName = append(userName, fmt.Sprintf("%d月", i))
 			for _, v := range *data {
 				keyWord := qutil.InterfaceToStr(v["keyWord"])
 				dMonMap[keyWord] = qutil.IntAll(v["number"])
@@ -584,14 +532,12 @@ func UserWord() {
 			userWordArr = append(userWordArr, dMonMap)
 		}
 		data, ok = MgoLog.Find("jy_hot_word", map[string]interface{}{
-			"year":             tm.Year(),
+			"year":             year,
 			"month":            i,
 			"statistical_type": 6,
 		}, `{"number":-1}`, "", false, -1, -1)
 		if ok && len(*data) > 0 {
-			if entStaMon == 0 {
-				entStaMon = i
-			}
+			entName = append(entName, fmt.Sprintf("%d月", i))
 			dEntMap := make(map[string]int)
 			for _, v := range *data {
 				keyWord := qutil.InterfaceToStr(v["keyWord"])
@@ -609,7 +555,6 @@ func UserWord() {
 		file           *xlsx.File
 	)
 	//生成xlex
-	filePath := fmt.Sprintf("./hotWordFile/jyHotWord_%d%d.xlsx", time.Now().Year(), int(time.Now().Month()))
 	_, err := os.Stat(filePath)
 	sheetName1, sheetName2 := "个人订阅词", "企业订阅词"
 	if err == nil {
@@ -661,8 +606,8 @@ func UserWord() {
 		log.Println("文件校验失败:", err.Error())
 		return
 	}
-	TableDataMonth(sheet1, userArr, userWordArr, false, userStaMon) //个人订阅词
-	TableDataMonth(sheet2, entArr, entWordArr, true, entStaMon)     //企业订阅词
+	TableDataMonth(sheet1, userArr, userWordArr, userName) //个人订阅词
+	TableDataMonth(sheet2, entArr, entWordArr, entName)    //企业订阅词
 	// 保存 Excel 文件
 	err = file.Save(filePath)
 	if err != nil {
@@ -709,7 +654,6 @@ func WordXlsx() {
 		sheetName1, sheetName2 = "搜查词条", "搜索分词"
 	)
 
-	filePath := fmt.Sprintf("./hotWordFile/jyHotWord_%d%d.xlsx", time.Now().Year(), int(time.Now().Month()))
 	_, err := os.Stat(filePath)
 	if err == nil {
 		log.Printf("文件 %s 存在\n", filePath)
@@ -784,16 +728,16 @@ func WordXlsx() {
 	log.Println("Excel 文件生成成功")
 }
 
-func TableData(sheet *xlsx.Sheet, keyArr []string, data []map[string]int, staDay int) {
+func TableData(sheet *xlsx.Sheet, keyArr []string, data []map[string]int, staDay []string) {
 	// 设置表头
 	row := sheet.AddRow()
 	cell := row.AddCell()
 	cell.Value = "检索词"
 	cell = row.AddCell()
 	cell.Value = "词频"
-	for i := staDay; i <= time.Now().Day(); i++ {
+	for _, v := range staDay {
 		cell = row.AddCell()
-		cell.Value = fmt.Sprintf("%d号", i)
+		cell.Value = v
 	}
 	// 写入检索词和数据
 	for _, key := range keyArr {
@@ -807,18 +751,14 @@ func TableData(sheet *xlsx.Sheet, keyArr []string, data []map[string]int, staDay
 	}
 }
 
-func TableDataMonth(sheet *xlsx.Sheet, keyArr []string, data []map[string]int, isEnt bool, sta int) {
+func TableDataMonth(sheet *xlsx.Sheet, keyArr []string, data []map[string]int, name []string) {
 	// 设置表头
 	row := sheet.AddRow()
 	cell := row.AddCell()
 	cell.Value = "检索词"
-	if !isEnt {
-		cell = row.AddCell()
-		cell.Value = "词频"
-	}
-	for i := sta; i < int(time.Now().Month()); i++ {
+	for _, v := range name {
 		cell = row.AddCell()
-		cell.Value = fmt.Sprintf("%d月", i)
+		cell.Value = v
 	}
 	// 写入检索词和数据
 	for _, key := range keyArr {
@@ -832,17 +772,20 @@ func TableDataMonth(sheet *xlsx.Sheet, keyArr []string, data []map[string]int, i
 	}
 }
 
-func wordXlsxData(isParticiple bool) (strArr []string, statisticsMap []map[string]int, staDay int) {
+func wordXlsxData(isParticiple bool) (strArr []string, statisticsMap []map[string]int, staDay []string) {
 	dataMap := make(map[string]bool)
 	tm := time.Now()
-	for i := 0; i <= tm.Day(); i++ {
+	statistical1 := qutil.If(isParticiple, 1, 2)
+	statistical2 := qutil.If(isParticiple, 3, 4)
+
+	for i := 0; i < tm.Day(); i++ {
 		query := make(map[string]interface{})
 		if i == 0 { //首次统计词频
-			query["statistical_type"] = qutil.If(isParticiple, 1, 2)
+			query["statistical_type"] = statistical1
 			query["year"] = tm.Year()
 			query["month"] = int(tm.Month())
 		} else {
-			query["statistical_type"] = qutil.If(isParticiple, 3, 4)
+			query["statistical_type"] = statistical2
 			query["year"] = tm.Year()
 			query["month"] = int(tm.Month())
 			query["day"] = i
@@ -850,8 +793,8 @@ func wordXlsxData(isParticiple bool) (strArr []string, statisticsMap []map[strin
 		data, ok := MgoLog.Find("jy_hot_word", query, `{"number":-1}`, "", false, -1, -1)
 		if ok && len(*data) > 0 {
 			dMap := make(map[string]int)
-			if staDay == 0 && i > 0 {
-				staDay = i
+			if i > 0 {
+				staDay = append(staDay, fmt.Sprintf("%d号", i))
 			}
 			for _, v := range *data {
 				keyWord := qutil.InterfaceToStr(v["keyWord"])
@@ -870,11 +813,10 @@ func wordXlsxData(isParticiple bool) (strArr []string, statisticsMap []map[strin
 // 过滤不符合条件的词
 func HasChineseAndValidLength(str string) bool {
 	length := utf8.RuneCountInString(str)
-	return chineseRegex.MatchString(str) && length >= 2 && length < 20
+	return chineseRegex.MatchString(str) && length >= 2 && length < 20 && !util.FilterKey.Search(str)
 }
 
 func SendMail() {
-	filePath := fmt.Sprintf("./hotWordFile/jyHotWord_%d%d.xlsx", time.Now().Year(), int(time.Now().Month()))
 	f, err := os.Open(filePath)
 	if err != nil {
 		panic(err)

+ 72 - 0
util/match.go

@@ -0,0 +1,72 @@
+package util
+
+import (
+	"bufio"
+	"log"
+	"os"
+)
+
+type TrieNode struct {
+	children map[rune]*TrieNode
+	isEnd    bool
+}
+
+type filterWord struct {
+	KeyWord []string `yaml:"keyWord"`
+}
+
+var (
+	FilterKey = &TrieNode{}
+)
+
+func init() {
+	file, err := os.Open("./exclusionary.txt")
+	if err != nil {
+		log.Println("无法打开文件:", err)
+		return
+	}
+	defer file.Close()
+	var lines []string
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		line := scanner.Text()
+		lines = append(lines, line)
+	}
+
+	if scanner.Err() != nil {
+		log.Println("读取文件时发生错误:", scanner.Err())
+		return
+	}
+
+	for _, word := range lines {
+		FilterKey.Insert(word)
+	}
+}
+
+func (nd *TrieNode) Insert(word string) {
+	node := nd
+	for _, ch := range word {
+		if node.children == nil {
+			node.children = make(map[rune]*TrieNode)
+		}
+		if _, ok := node.children[ch]; !ok {
+			node.children[ch] = &TrieNode{}
+		}
+		node = node.children[ch]
+	}
+	node.isEnd = true
+}
+
+func (nd *TrieNode) Search(word string) bool {
+	node := nd
+	for _, ch := range word {
+		if node.children == nil {
+			return false
+		}
+		if _, ok := node.children[ch]; !ok {
+			return false
+		}
+		node = node.children[ch]
+	}
+	return node.isEnd
+}