123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288 |
- package main
- import (
- "bytes"
- "context"
- "encoding/json"
- "errors"
- "fmt"
- util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
- "log"
- "net/http"
- "strings"
- "sync"
- "time"
- )
- // getBidding2 获取bidding 1.3日无二级分类数据
- func getBidding2() {
- //2024-1-3日数据
- where := map[string]interface{}{
- "comeintime": map[string]interface{}{
- "$gte": 1704211200,
- "$lte": 1704297600,
- },
- "subtype": map[string]interface{}{
- "$exists": 1,
- },
- }
- sess := MgoB.GetMgoConn()
- defer MgoB.DestoryMongoConn(sess)
- //texts := make([]string, 0)
- query := sess.DB("qfw").C("bidding").Find(where).Select(map[string]interface{}{"title": 1, "toptype": 1, "subtype": 1, "href": 1, "detail": 1, "channel": 1}).Iter()
- count := 0
- for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
- if count%1000 == 0 {
- log.Println("current:", count)
- }
- if util.IntAll(tmp["extracttype"]) == -1 {
- continue
- }
- id := mongodb.BsonIdToSId(tmp["_id"])
- tmp["jyhref"] = GetJyURLByID(id)
- MgoB.SaveByOriID("wcc_bidding_20240103_subtype_exists", tmp)
- tmp = make(map[string]interface{})
- }
- log.Println("over")
- }
- // callAi 调用大模型
- func callAi() {
- sess := MgoB.GetMgoConn()
- defer MgoB.DestoryMongoConn(sess)
- //where := map[string]interface{}{
- // "subtype_a": map[string]interface{}{
- // "$exists": 0,
- // },
- //}
- query := sess.DB("qfw_data").C("wcc_bidding_20240103_subtype_exists").Find(nil).Select(nil).Iter()
- count := 0
- //ch := make(chan bool, 1)
- //wg := &sync.WaitGroup{}
- for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
- if count%1000 == 0 {
- log.Println("-------- current:", count, tmp["_id"], " ---------")
- }
- //ch <- true
- //wg.Add(1)
- //go func(tmp map[string]interface{}) {
- // defer func() {
- // <-ch
- // wg.Done()
- // }()
- id := mongodb.BsonIdToSId(tmp["_id"])
- title := util.ObjToString(tmp["title"])
- detail := util.ObjToString(tmp["detail"])
- data := map[string]interface{}{
- "title": title,
- "detail": detail,
- }
- reqData := map[string]interface{}{
- "texts": []interface{}{data},
- }
- now := time.Now()
- res := send(reqData)
- log.Println(time.Since(now).Seconds(), tmp["_id"])
- subtype := res["result"].([]interface{})
- result := subtype[0]
- types := strings.Split(util.ObjToString(result), "-")
- update := make(map[string]interface{})
- if len(types) == 2 {
- update["toptype_ai"] = types[0]
- update["subtype_ai"] = types[1]
- //没有内容
- if detail == "" {
- update["data_type"] = 1
- } else {
- update["data_type"] = 0
- }
- MgoB.UpdateById("wcc_bidding_20240103_subtype_exists", id, map[string]interface{}{"$set": update})
- }
- //}(tmp)
- tmp = make(map[string]interface{})
- }
- //wg.Wait()
- log.Println("over")
- }
- // getBidding 调用分类大模型
- func getBidding() {
- //2024-1-3日数据
- where := map[string]interface{}{
- "comeintime": map[string]interface{}{
- "$gte": 1704211200,
- "$lte": 1704297600,
- },
- "subtype": map[string]interface{}{
- "$exists": 0,
- },
- }
- sess := MgoB.GetMgoConn()
- defer MgoB.DestoryMongoConn(sess)
- //texts := make([]string, 0)
- query := sess.DB("qfw").C("bidding").Find(where).Select(map[string]interface{}{"title": 1, "toptype": 1, "href": 1, "detail": 1}).Iter()
- count := 0
- ch := make(chan bool, 10)
- wg := &sync.WaitGroup{}
- for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
- if count%1000 == 0 {
- log.Println("current:", count)
- }
- if util.IntAll(tmp["extracttype"]) == -1 {
- continue
- }
- ch <- true
- wg.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-ch
- wg.Done()
- }()
- id := mongodb.BsonIdToSId(tmp["_id"])
- title := util.ObjToString(tmp["title"])
- detail := util.ObjToString(tmp["detail"])
- tmp["bidding_id"] = id
- data := map[string]interface{}{
- "title": title,
- "detail": detail,
- }
- reqData := map[string]interface{}{
- "texts": []interface{}{data},
- }
- res := SendAi(reqData)
- subtype := res["result"].([]interface{})
- result := subtype[0]
- types := strings.Split(util.ObjToString(result), "-")
- if len(types) == 2 {
- tmp["new_toptype"] = types[0]
- tmp["new_subtype"] = types[1]
- }
- tmp["jyhref"] = GetJyURLByID(id)
- //没有内容
- if detail == "" {
- tmp["data_type"] = 1
- } else {
- tmp["data_type"] = 0
- }
- MgoB.Save("wcc_20240103-2", tmp)
- }(tmp)
- tmp = make(map[string]interface{})
- }
- wg.Wait()
- log.Println("over")
- }
- func send(data map[string]interface{}) (res map[string]interface{}) {
- url := "http://192.168.3.109:16688"
- jsonData, err := json.Marshal(data)
- if err != nil {
- fmt.Println("JSON marshal error:", err)
- return
- }
- req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
- if err != nil {
- fmt.Println("Request error:", err)
- return
- }
- req.Header.Set("Content-Type", "application/json")
- client := &http.Client{}
- resp, err := client.Do(req)
- if err != nil {
- fmt.Println("Request error:", err)
- return
- }
- defer resp.Body.Close()
- err = json.NewDecoder(resp.Body).Decode(&res)
- if err != nil {
- fmt.Println("Response decoding error:", err)
- return
- }
- return
- }
- // SendAi 调用大模型招标分类
- func SendAi(data map[string]interface{}) (res map[string]interface{}) {
- // 设置 2 秒的超时
- ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
- defer cancel()
- url := "http://192.168.3.109:16688"
- jsonData, err := json.Marshal(data)
- if err != nil {
- fmt.Println("JSON marshal error:", err)
- return
- }
- req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
- if err != nil {
- fmt.Println("Request error:", err)
- return
- }
- req.Header.Set("Content-Type", "application/json")
- // 将请求与上下文关联
- req = req.WithContext(ctx)
- client := &http.Client{}
- resp, err := client.Do(req)
- if err != nil {
- // 使用 errors.Is 检查错误是否是超时错误
- if errors.Is(err, context.DeadlineExceeded) {
- fmt.Println("Request timed out")
- return
- }
- fmt.Println("Request error:", err)
- return
- }
- defer resp.Body.Close()
- err = json.NewDecoder(resp.Body).Decode(&res)
- if err != nil {
- fmt.Println("Response decoding error:", err)
- return
- }
- return
- }
|