Parcourir la source

1、标的物
2、oss
3、方法调整

zhengkun il y a 9 mois
Parent
commit
700e8c81ff
13 fichiers modifiés avec 378 ajouts et 108 suppressions
  1. 65 31
      extract/extract.go
  2. 40 3
      extract/full.go
  3. 23 2
      extract/test.go
  4. 8 0
      go.mod
  5. 51 0
      go.sum
  6. 46 52
      main.go
  7. 10 1
      prompt/prompt_class.go
  8. 5 1
      prompt/prompt_field.go
  9. 8 11
      tool/tool.go
  10. 0 1
      ul/global.go
  11. 49 4
      ul/init.go
  12. 1 2
      ul/md.go
  13. 72 0
      ul/oss.go

+ 65 - 31
extract/extract.go

@@ -41,7 +41,7 @@ func ExtractFieldInfo(sid string, eid string) {
 		}
 		tmpid := ul.BsonTOStringId(tmp["_id"])
 		infoformat := qu.IntAll(tmp["infoformat"])
-		if infoformat != 1 || dict[tmpid] == nil {
+		if infoformat > 1 || dict[tmpid] == nil {
 			tmp = make(map[string]interface{})
 			continue
 		}
@@ -65,52 +65,48 @@ func ExtractFieldInfo(sid string, eid string) {
 	}
 	wg_mgo.Wait()
 	log.Debug("ai is over ...", sid, "~", eid)
+
 }
 
 // 获取处理数据...
 func ResolveInfo(v map[string]interface{}) map[string]interface{} {
-	detail := qu.ObjToString(v["detail"])
-	filetext := qu.ObjToString(v["filetext"]) //此处为附件信息···
+	tmpid := ul.BsonTOStringId(v["_id"])
+	detail := getDetailText(v, tmpid) //获取正文文本
 	title := qu.ObjToString(v["title"])
-	if strings.Contains(title, "开标记录") { //开标记录舍弃
-		return map[string]interface{}{}
-	}
-	if v["jyfb_data"] != nil { //剑鱼发布舍弃qi
+	dl := utf8.RuneCountInString(detail) //文本长度
+	//过滤数据···
+	if strings.Contains(title, "开标记录") || v["jyfb_data"] != nil || dl < 20 {
 		return map[string]interface{}{}
 	}
-	fns := getpnsinfo(v) //获取附件名字
-	f_data := map[string]interface{}{}
-	if ul.IsTool && utf8.RuneCountInString(detail) < 100 {
-		detail = filetext
-	}
-	if utf8.RuneCountInString(detail) < 100 {
-		return f_data
+	//识别结构,短文本结构
+	f_data, shorText := map[string]interface{}{}, false
+	if dl < 100 {
+		shorText = true
 	}
 	//文本格式转换
 	detail = ul.HttpConvertToMarkdown(detail)
 	//获取外围字段数据
-	f_info := prompt.AcquireExtractFieldInfo(detail)
-	//获取分包信息
-	pkg := prompt.AcquireNewMultiplePackageInfo(detail)
-	if len(pkg) > 0 {
-		f_info["s_pkg"] = pkg
+	f_info := prompt.AcquireExtractFieldInfo(detail, shorText)
+
+	//非短文本
+	if !shorText {
+		//获取分包信息
+		if pkg := prompt.AcquireNewMultiplePackageInfo(detail); len(pkg) > 0 {
+			f_info["s_pkg"] = pkg
+		}
+		//获取分类字段数据
+		s_toptype, s_subtype := prompt.AcquireClassInfo(detail, title, qu.ObjToString(v["toptype"]))
+		f_info["s_toptype"] = s_toptype
+		f_info["s_subtype"] = s_subtype
 	}
 
-	//获取分类字段数据
-	s_toptype, s_subtype := "", ""
-	if qu.ObjToString(v["toptype"]) == "拟建" {
-		s_toptype, s_subtype = "拟建", "拟建"
-	} else if qu.ObjToString(v["toptype"]) == "产权" {
-		s_toptype, s_subtype = "产权", "产权"
-	} else if qu.ObjToString(v["toptype"]) == "采购意向" {
-		s_toptype, s_subtype = "采购意向", "采购意向"
-	} else {
-		s_toptype, s_subtype = prompt.AcquireClassInfo(detail, title)
+	//调用标的物识别
+	if p_list := getPurList(v, detail, f_info); len(p_list) > 0 {
+		f_info["purchasinglist"] = p_list
 	}
-	f_info["s_toptype"] = s_toptype
-	f_info["s_subtype"] = s_subtype
 
 	//字段清洗
+	fns := getpnsinfo(v) //获取附件名字
 	f_data = clean.CleanFieldInfo(f_info, fns)
 
 	//对于某些字段进行二级校验
@@ -160,6 +156,44 @@ func getpnsinfo(tmp map[string]interface{}) []string {
 	return arr
 }
 
+func getDetailText(v map[string]interface{}, tmpid string) string {
+	detail := qu.ObjToString(v["detail"])
+	if ul.IsTool {
+		detail = qu.ObjToString(v["details"])
+		filetext := qu.ObjToString(v["filetext"])
+		if utf8.RuneCountInString(detail) < 100 && filetext != "" {
+			detail = filetext
+		}
+	} else {
+		//if bs := ul.OssGetObject(tmpid); bs != "" {
+		//	detail = bs
+		//}
+	}
+	return detail
+}
+
+// 获取标的物-过滤产权-拟建
+func getPurList(v map[string]interface{}, detail string, f_info map[string]interface{}) []map[string]interface{} {
+	if qu.ObjToString(v["toptype"]) == "拟建" || qu.ObjToString(v["toptype"]) == "产权" {
+		return []map[string]interface{}{}
+	}
+	p_data := map[string]interface{}{}
+	p_data["detail"] = detail
+	p_data["site"] = v["site"]
+	p_data["attach_text"] = v["attach_text"]
+	p_data["toptype"] = v["toptype"]
+	if f_info["s_toptype"] != nil {
+		p_data["toptype"] = f_info["s_toptype"]
+	}
+	if p_info := ul.PostPurchasingList(p_data); len(p_info) > 0 {
+		if qu.IntAll(p_info["status"]) == 200 {
+			p_list := ul.IsMarkInterfaceMap(p_info["purchasinglist"])
+			return p_list
+		}
+	}
+	return []map[string]interface{}{}
+}
+
 // 暂时不启用...无限重试
 func RunResetUpdateFieldInfo(arr []string, name string, s_name string) {
 	//log.Debug("开始重置更新...", len(arr))

+ 40 - 3
extract/full.go

@@ -29,12 +29,12 @@ func getExistsInfo() map[string]interface{} {
 
 // 识别结构化字段
 func MovingFullInfo(sid string, eid string) {
-	dict := getExistsInfo()
 	q := map[string]interface{}{
 		"_id": map[string]interface{}{
 			"$lt": ul.StringTOBsonId(eid),
 		},
 	}
+	log.Debug("迁移语句:", q)
 	ul.FlashModel = "glm-4-flash"
 	pool_mgo := make(chan bool, ul.Reading)
 	wg_mgo := &sync.WaitGroup{}
@@ -47,7 +47,7 @@ func MovingFullInfo(sid string, eid string) {
 			log.Debug("cur ai index ", total, tmp["_id"])
 		}
 		tmpid := ul.BsonTOStringId(tmp["_id"])
-		if tmpid == "" || dict[tmpid] != nil { //已存在数据···不迁移
+		if tmpid == "" {
 			tmp = make(map[string]interface{})
 			continue
 		}
@@ -59,7 +59,7 @@ func MovingFullInfo(sid string, eid string) {
 				wg_mgo.Done()
 			}()
 			infoformat := qu.IntAll(tmp["infoformat"])
-			if infoformat == 1 { //正常数据处理···
+			if infoformat == 1 || infoformat == 0 { //正常数据处理···
 				data := ResolveInfo(tmp)
 				if len(data) > 0 {
 					tmp["ai_zhipu"] = data
@@ -76,6 +76,8 @@ func MovingFullInfo(sid string, eid string) {
 				}
 			}
 			//迁移数据···
+			delete(tmp, "detail")
+			delete(tmp, "contenthtml")
 			ul.SourceMgo.Save(ul.Bid_Name, tmp)
 		}(tmp)
 		tmp = make(map[string]interface{})
@@ -83,3 +85,38 @@ func MovingFullInfo(sid string, eid string) {
 	wg_mgo.Wait()
 	log.Debug("ai is over ...", total)
 }
+
+func MovingFullInfoCopy(sid string, eid string) {
+	q := map[string]interface{}{
+		"_id": map[string]interface{}{
+			"$gte": ul.StringTOBsonId(sid),
+		},
+	}
+
+	log.Debug("迁移语句:", q)
+	pool_mgo := make(chan bool, ul.Reading)
+	wg_mgo := &sync.WaitGroup{}
+	sess := ul.SourceMgo.GetMgoConn()
+	defer ul.SourceMgo.DestoryMongoConn(sess)
+	total := 0
+	it := sess.DB(ul.SourceMgo.DbName).C(ul.Bid_Name).Find(&q).Sort("_id").Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%1000 == 0 {
+			log.Debug("cur move index ", total, tmp["_id"])
+		}
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			delete(tmp, "detail")
+			delete(tmp, "contenthtml")
+			ul.SourceMgo.Save("bidding_copy", tmp)
+		}(tmp)
+		tmp = make(map[string]interface{})
+	}
+	wg_mgo.Wait()
+	log.Debug("move is over ...", total)
+}

+ 23 - 2
extract/test.go

@@ -23,13 +23,33 @@ func TestSingleFieldInfo(name string, tmpid string) {
 	}
 	data := ResolveInfo(tmp)
 	//最终结果...
-	for k, v := range data {
+	if data != nil {
+
+	}
+	log.Debug("耗时···", time.Now().Unix()-now)
+}
+
+// 验证单条分包数据···
+func TestSinglePurchasingInfo(name string, tmpid string) {
+	now := time.Now().Unix()
+	tmp := ul.BidMgo.FindById(name, tmpid)
+	if len(tmp) == 0 || tmp == nil {
+		log.Debug("未查询到数据...", tmpid)
+		return
+	}
+	detail := getDetailText(tmp, tmpid) //获取正文文本
+	p_list := getPurList(tmp, detail, map[string]interface{}{})
+	log.Debug(p_list)
+
+	//最终结果...
+	for k, v := range p_list {
 		log.Debug(k, "~", v)
 	}
+
 	log.Debug("耗时···", time.Now().Unix()-now)
 }
 
-// 验证单条数据···
+// 验证单条分包数据···
 func TestSinglePackageInfo(name string, tmpid string) {
 	now := time.Now().Unix()
 	tmp := ul.BidMgo.FindById(name, tmpid)
@@ -48,6 +68,7 @@ func TestSinglePackageInfo(name string, tmpid string) {
 	for k, v := range pkg {
 		log.Debug(k, "~", v)
 	}
+
 	log.Debug("耗时···", time.Now().Unix()-now)
 }
 

+ 8 - 0
go.mod

@@ -12,6 +12,8 @@ require (
 require (
 	github.com/JohannesKaufmann/html-to-markdown v1.6.0 // indirect
 	github.com/PuerkitoBio/goquery v1.9.2 // indirect
+	github.com/aliyun/alibaba-cloud-sdk-go v1.63.39 // indirect
+	github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible // indirect
 	github.com/andybalholm/cascadia v1.3.2 // indirect
 	github.com/aymerick/douceur v0.2.0 // indirect
 	github.com/baidubce/bce-sdk-go v0.9.164 // indirect
@@ -29,7 +31,9 @@ require (
 	github.com/gorilla/css v1.0.1 // indirect
 	github.com/hashicorp/hcl v1.0.0 // indirect
 	github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056 // indirect
+	github.com/jmespath/go-jmespath v0.4.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/k3a/html2text v1.2.1 // indirect
 	github.com/klauspost/compress v1.17.0 // indirect
 	github.com/kr/pretty v0.3.1 // indirect
@@ -39,9 +43,12 @@ require (
 	github.com/mattn/go-runewidth v0.0.9 // indirect
 	github.com/microcosm-cc/bluemonday v1.0.27 // indirect
 	github.com/mitchellh/mapstructure v1.5.0 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/montanaflynn/stats v0.7.1 // indirect
 	github.com/olekukonko/tablewriter v0.0.5 // indirect
 	github.com/olivere/elastic/v7 v7.0.32 // indirect
+	github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b // indirect
 	github.com/pelletier/go-toml/v2 v2.1.0 // indirect
 	github.com/peterbourgon/diskv/v3 v3.0.1 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
@@ -74,6 +81,7 @@ require (
 	golang.org/x/sync v0.7.0 // indirect
 	golang.org/x/sys v0.21.0 // indirect
 	golang.org/x/text v0.16.0 // indirect
+	golang.org/x/time v0.5.0 // indirect
 	gopkg.in/ini.v1 v1.67.0 // indirect
 	gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect

+ 51 - 0
go.sum

@@ -1,12 +1,20 @@
 cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/BurntSushi/toml v1.2.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
+github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
+github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo=
 github.com/JohannesKaufmann/html-to-markdown v1.6.0 h1:04VXMiE50YYfCfLboJCLcgqF5x+rHJnb1ssNmqpLH/k=
 github.com/JohannesKaufmann/html-to-markdown v1.6.0/go.mod h1:NUI78lGg/a7vpEJTz/0uOcYMaibytE4BUOQS8k78yPQ=
 github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
 github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
 github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4yPeE=
 github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk=
+github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
+github.com/aliyun/alibaba-cloud-sdk-go v1.63.39 h1:zlenrBGDiSEu7YnpWiAPscKNolgIo9Z6jvM5pcWAEL4=
+github.com/aliyun/alibaba-cloud-sdk-go v1.63.39/go.mod h1:SOSDHfe1kX91v3W5QiBsWSLqeLxImobbMX1mxrFHsVQ=
+github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible h1:8psS8a+wKfiLt1iVDX79F7Y6wUM49Lcha2FMXt4UM8g=
+github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible/go.mod h1:T/Aws4fEfogEE9v+HPhhw+CntffsBHJ8nXQCwKr0/g8=
 github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
 github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
 github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
@@ -37,14 +45,18 @@ github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymF
 github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
 github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
 github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
 github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
 github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
 github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
+github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
 github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
+github.com/goji/httpauth v0.0.0-20160601135302-2da839ab0f4d/go.mod h1:nnjvkQ9ptGaCkuDUx6wNykzzlUixGxvkme+H/lnzb+A=
+github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
 github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
 github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
@@ -75,11 +87,13 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
 github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE=
 github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
@@ -90,11 +104,15 @@ github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
 github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
 github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056 h1:iCHtR9CQyktQ5+f3dMVZfwD2KWJUgm7M0gdL9NGr8KA=
 github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk=
+github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
 github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
 github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
 github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
+github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
 github.com/k3a/html2text v1.2.1 h1:nvnKgBvBR/myqrwfLuiqecUtaK1lB9hGziIJKatNFVY=
 github.com/k3a/html2text v1.2.1/go.mod h1:ieEXykM67iT8lTvEWBh6fhpH4B23kB9OMKPdIBmgUqA=
 github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
@@ -117,6 +135,11 @@ github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwX
 github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
 github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
 github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
 github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8eaE=
 github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow=
@@ -128,6 +151,8 @@ github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6
 github.com/olivere/elastic/v7 v7.0.32 h1:R7CXvbu8Eq+WlsLgxmKVKPox0oOwAE/2T9Si5BnvK6E=
 github.com/olivere/elastic/v7 v7.0.32/go.mod h1:c7PVmLe3Fxq77PIfY/bZmxY/TAamBhCzZ8xDOE09a9k=
 github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
+github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b h1:FfH+VrHHk6Lxt9HdVS0PXzSXFyS2NbZKXv33FYPol0A=
+github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b/go.mod h1:AC62GU6hc0BrNm+9RK9VSiwa/EUe1bkIeFORAMcHvJU=
 github.com/pelletier/go-toml/v2 v2.1.0 h1:FnwAJ4oYMvbT/34k9zzHuZNrhlz48GB3/s6at6/MHO4=
 github.com/pelletier/go-toml/v2 v2.1.0/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc=
 github.com/peterbourgon/diskv/v3 v3.0.1 h1:x06SQA46+PKIUftmEujdwSEpIx8kR+M9eLYsUxeYveU=
@@ -192,6 +217,8 @@ github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSW
 github.com/tealeg/xlsx/v3 v3.3.7 h1:clTKeQJORkctZNvQp+94juO19nj1b12snyX6PFT8iZY=
 github.com/tealeg/xlsx/v3 v3.3.7/go.mod h1:KV4FTFtvGy0TBlOivJLZu/YNZk6e0Qtk7eOSglWksuA=
 github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
+github.com/uber/jaeger-client-go v2.30.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk=
+github.com/uber/jaeger-lib v2.4.1+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U=
 github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c=
 github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
 github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g=
@@ -221,6 +248,7 @@ go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI=
 go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ=
 go.uber.org/zap v1.22.0/go.mod h1:H4siCOZOrAolnUPJEkfaSjDqyP+BDS0DdDWzwcgt3+U=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
@@ -231,13 +259,23 @@ golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+
 golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
 golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
 golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
+golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY=
 golang.org/x/exp v0.0.0-20240205201215-2c58cdc269a3 h1:/RIbNt/Zr7rVhIkQhooTxCxFcdWLGIKnZA4IXNFSrvo=
 golang.org/x/exp v0.0.0-20240205201215-2c58cdc269a3/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08=
+golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
+golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
+golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
 golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
 golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
+golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
 golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
@@ -276,6 +314,7 @@ golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
 golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -318,12 +357,17 @@ golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
 golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
+golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
+golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
+golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
 golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
 golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
 golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
@@ -332,6 +376,10 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
+gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
+gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
+gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
 google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
 google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
@@ -355,6 +403,7 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b h1:QRR6H1YWRnHb4Y/HeNFCTJLFVxaq6wH4YuVdsUOr75U=
 gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
@@ -367,9 +416,11 @@ gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
 gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
 honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
 jygit.jydev.jianyu360.cn/data_processing/common_utils v0.0.0-20240202055658-e2ef72e18b40 h1:xTeRmpFgwOdu+NbWg/YntX3MnQpttm7jj33C1+JdBTk=
 jygit.jydev.jianyu360.cn/data_processing/common_utils v0.0.0-20240202055658-e2ef72e18b40/go.mod h1:1Rp0ioZBhikjXHYYXmnzL6RNfvTDM/2XvRB+vuPLurI=
+rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=

+ 46 - 52
main.go

@@ -2,79 +2,73 @@ package main
 
 import (
 	"data_ai/extract"
-	"data_ai/tool"
 	"data_ai/udp"
 	"data_ai/ul"
 	log "github.com/donnie4w/go-logger/logger"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"sync"
+	"unicode/utf8"
 )
 
 func init() {
-	//增量设置全部false
-	ul.IsTool = true
-	ul.IsFull = false
-	ul.IsLocal = true
+	ul.IsTool = false //是否工具
+	ul.IsLocal = true //是否本地
+	ul.IsFull = false //是否全量
 	if ul.IsTool {
-		log.Debug("工具版本···")
 		ul.InitToolVar()
 	} else {
 		ul.InitGlobalVar()
 		if !ul.IsFull {
-			log.Debug("正常版本···")
 			udp.InitProcessVar()
-		} else {
-			log.Debug("全量版本···")
 		}
 	}
+	ul.InitOss(ul.IsLocal)
 }
 
 func main() {
 	if ul.IsTool {
-		tool.StartToolInfo()
+		//tool.StartToolUpdateInfo()
 		return
-	} else {
-		if !ul.IsFull {
-			extract.TestSingleFieldInfo("bidding", "670cc446b25c3e1deb887d52")
-			return
-		}
 	}
+
+	extract.TestSinglePurchasingInfo("bidding", "664a2a4066cf0db42a39fb02")
+
 	lock := make(chan bool)
 	<-lock
 }
 
-//func test() {
-//	log.Debug("开始大模型验证内存数据···")
-//	q := map[string]interface{}{}
-//	pool_mgo := make(chan bool, ul.Reading)
-//	wg_mgo := &sync.WaitGroup{}
-//	sess := ul.SourceMgo.GetMgoConn()
-//	defer ul.SourceMgo.DestoryMongoConn(sess)
-//	total, isok := 0, 0
-//	it := sess.DB(ul.SourceMgo.DbName).C(ul.Ext_Name).Find(&q).Sort("_id").Iter()
-//	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-//		if total%100 == 0 {
-//			log.Debug("cur index ", total)
-//		}
-//		isok++
-//		pool_mgo <- true
-//		wg_mgo.Add(1)
-//		go func(tmp map[string]interface{}) {
-//			defer func() {
-//				<-pool_mgo
-//				wg_mgo.Done()
-//			}()
-//			detail := qu.ObjToString(tmp["detail"])
-//			filetext := qu.ObjToString(tmp["filetext"]) //此处为附件信息···
-//			if utf8.RuneCountInString(detail) < 100 {
-//				detail = filetext
-//			}
-//			detail = ul.HttpConvertToMarkdown(detail)
-//			//最终结果...
-//			if detail != "" {
-//
-//			}
-//		}(tmp)
-//		tmp = make(map[string]interface{})
-//	}
-//	wg_mgo.Wait()
-//	log.Debug("ai is over ...")
-//}
+func test() {
+	log.Debug("开始大模型验证内存数据···")
+	q := map[string]interface{}{}
+	pool_mgo := make(chan bool, 500)
+	wg_mgo := &sync.WaitGroup{}
+	sess := ul.SourceMgo.GetMgoConn()
+	defer ul.SourceMgo.DestoryMongoConn(sess)
+	total, isok := 0, 0
+	it := sess.DB(ul.SourceMgo.DbName).C(ul.Bid_Name).Find(&q).Sort("-_id").Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%1000 == 0 {
+			log.Debug("cur index ", total)
+		}
+		isok++
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			detail := qu.ObjToString(tmp["detail"])
+			if utf8.RuneCountInString(detail) < 100 {
+				data := extract.ResolveInfo(tmp)
+				if len(data) > 0 {
+					tmp["ai_zhipu"] = data
+					ul.SourceMgo.Save("zktest_1031", tmp)
+				}
+			}
+		}(tmp)
+		tmp = make(map[string]interface{})
+	}
+	wg_mgo.Wait()
+	log.Debug("ai is over ...")
+}

+ 10 - 1
prompt/prompt_class.go

@@ -30,7 +30,16 @@ JSON格式:
 结果JSON:`
 
 // 获取分类信息...
-func AcquireClassInfo(detail string, title string) (string, string) {
+func AcquireClassInfo(detail string, title string, top string) (string, string) {
+	if top == "拟建" {
+		return "拟建", "拟建"
+	} else if top == "产权" {
+		return "产权", "产权"
+	} else if top == "采购意向" {
+		return "采购意向", "采购意向"
+	} else {
+
+	}
 	top_content := PromptToptypeFieldText(detail, title)
 	top_zp, ok := ai.PostZhiPuClassInfo(top_content)
 	if !ok {

+ 5 - 1
prompt/prompt_field.go

@@ -7,6 +7,10 @@ import (
 	"unicode/utf8"
 )
 
+var pmt_field_prefix = `
+
+`
+
 var pmt_field = `
 你是一个文本处理模型,专门用于分析文本提取信息,你具备以下能力:
 1、实体识别抽取
@@ -40,7 +44,7 @@ var pmt_field = `
 正文内容:` + "\n" + `%s` + "\n结果JSON:"
 
 // 获取外围抽取字段
-func AcquireExtractFieldInfo(detail string) map[string]interface{} {
+func AcquireExtractFieldInfo(detail string, shorText bool) map[string]interface{} {
 	content := PromptFieldText(detail)
 	zp := ai.PostZhiPuInfo(content)
 	return zp

+ 8 - 11
tool/tool.go

@@ -94,20 +94,17 @@ func StartToolUpdateInfo() {
 				tmp["ai_zhipu"] = ai_zhipu
 				update_info := make(map[string]interface{}, 0)
 				is_unset := ul.ChooseCheckDataAI(tmp, &update_info)
-
 				update_info["ai_zhipu"] = ai_zhipu
 				//清洗与记录
-				if len(update_info) > 0 {
-					//$set
-					//ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
-					//	"$set": update_info,
-					//})
+				if len(update_info) > 0 { //$set
+					ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
+						"$set": update_info,
+					})
 				}
-				if is_unset {
-					//"$unset"
-					//ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
-					//	"$unset": ul.Unset_Check,
-					//})
+				if is_unset { //"$unset"
+					ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
+						"$unset": ul.Unset_Check,
+					})
 				}
 			}
 		}(tmp)

+ 0 - 1
ul/global.go

@@ -19,7 +19,6 @@ func ChooseCheckDataAI(tmp map[string]interface{}, update_info *map[string]inter
 	s_toptype, s_subtype := ChooseTheBestClassField(ai_zhipu, tmp, update_info, &ext_ai_record)
 	//选取字段
 	ChooseTheBestCoreField(ai_zhipu, s_toptype, s_subtype, tmp, update_info, &ext_ai_record)
-
 	//选取分包
 	ChooseTheBestPackageField(ai_zhipu, s_toptype, s_subtype, tmp, update_info, &ext_ai_record)
 

+ 49 - 4
ul/init.go

@@ -9,6 +9,7 @@ import (
 	"io/ioutil"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"net/http"
+	"time"
 )
 
 func InitGlobalVar() {
@@ -264,17 +265,25 @@ func PostMarkDownText(html string) string {
 		url = "http://172.17.0.11:8888/md"
 	}
 	// 创建请求数据
-	data := map[string]interface{}{"html": html}
-	jsonData, err := json.Marshal(data)
+	jsonData, err := json.Marshal(map[string]interface{}{"html": html})
+	if err != nil {
+		return ""
+	}
+	// 创建HTTP请求
+	req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
 	if err != nil {
 		return ""
 	}
-	// 发送POST请求
-	resp, err := http.Post(url, "application/json", bytes.NewBuffer(jsonData))
+	// 设置请求头
+	req.Header.Set("Content-Type", "application/json")
+	client := &http.Client{}
+	client.Timeout = 60 * time.Second
+	resp, err := client.Do(req)
 	if err != nil {
 		return ""
 	}
 	defer resp.Body.Close()
+
 	// 读取响应体
 	body, err := ioutil.ReadAll(resp.Body)
 	if err != nil {
@@ -287,3 +296,39 @@ func PostMarkDownText(html string) string {
 	}
 	return qu.ObjToString(info["result"])
 }
+
+func PostPurchasingList(data map[string]interface{}) map[string]interface{} {
+	url := "http://172.17.0.11:18787/"
+	if IsLocal {
+		url = "http://172.17.0.11:18787/"
+	}
+	jsonData, err := json.Marshal(data)
+	if err != nil {
+		return map[string]interface{}{}
+	}
+	// 创建HTTP请求
+	req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return map[string]interface{}{}
+	}
+	// 设置请求头
+	req.Header.Set("Content-Type", "application/json")
+	client := &http.Client{}
+	client.Timeout = 180 * time.Second
+	resp, err := client.Do(req)
+	if err != nil {
+		return map[string]interface{}{}
+	}
+	defer resp.Body.Close()
+	// 读取响应体
+	body, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		return map[string]interface{}{}
+	}
+	info := map[string]interface{}{}
+	err = json.Unmarshal(body, &info)
+	if err != nil {
+		return map[string]interface{}{}
+	}
+	return info
+}

+ 1 - 2
ul/md.go

@@ -9,7 +9,6 @@ var md_reg1 = regexp.MustCompile("([ *]+)")
 var md_reg2 = regexp.MustCompile("([\\n]+)")
 var md_reg3 = regexp.MustCompile("([-]{3,})")
 
-// 有缺陷---内存增大···不知原因
 func HttpConvertToMarkdown(html string) string {
 	if text := PostMarkDownText(html); text != "" {
 		return text
@@ -17,7 +16,7 @@ func HttpConvertToMarkdown(html string) string {
 	return html
 }
 
-// 有问题,内存泄漏
+// 有缺陷---内存增大···不知原因
 func ConvertToMarkdown(html string) string {
 	markdown, err := html2text.FromString(html, html2text.Options{PrettyTables: true})
 	if err != nil {

+ 72 - 0
ul/oss.go

@@ -0,0 +1,72 @@
+package ul
+
+import (
+	"bytes"
+	"fmt"
+	"github.com/aliyun/aliyun-oss-go-sdk/oss"
+	"io"
+	util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"log"
+	"os"
+)
+
+var (
+	ossEndpoint        = "oss-cn-beijing-internal.aliyuncs.com" //http://oss-cn-beijing.aliyuncs.com"
+	ossEndpointTest    = "oss-cn-beijing.aliyuncs.com"
+	ossAccessKeyId     = "LTAI4G5x9aoZx8dDamQ7vfZi"
+	ossAccessKeySecret = "Bk98FsbPYXcJe72n1bG3Ssf73acuNh"
+	ossBucketName      = "jy-datadetail"
+	ossclient          *oss.Client
+)
+
+func InitOss(isTest bool) {
+	if isTest {
+		ossEndpoint = ossEndpointTest //测试服务器~阿里云
+	}
+	client, err := oss.New(ossEndpoint, ossAccessKeyId, ossAccessKeySecret)
+	if err != nil {
+		log.Println("Error:", err)
+		os.Exit(-1)
+	}
+	ossclient = client
+}
+
+func OssGetObject(objectName string) string {
+	util.Catch()
+	// 获取存储空间。
+	bucket, err := ossclient.Bucket(ossBucketName)
+	if err != nil {
+		fmt.Println("Error:", err)
+		return ""
+	}
+	// 下载文件到流。
+	body, err := bucket.GetObject(objectName)
+	if err != nil {
+		fmt.Println("Error:", err)
+		return ""
+	}
+	defer body.Close()
+	data, err := io.ReadAll(body)
+	if err != nil {
+		fmt.Println("Error:", err)
+		return ""
+	}
+	return string(data)
+}
+
+func OssPutObject(bucketName, objectKey, text string) (bool, error) {
+	defer util.Catch()
+	// 获取存储空间。
+	bucket, err := ossclient.Bucket(bucketName)
+	if err != nil {
+		log.Println("Error:", err)
+		return false, err
+	}
+	// 上传文件流。
+	err = bucket.PutObject(objectKey, bytes.NewReader([]byte(text)))
+	if err != nil {
+		log.Println("Error:", err)
+		return false, err
+	}
+	return true, nil
+}