Explorar o código

平替方法
html2text.FromString(html, html2text.Options{PrettyTables: true})
python部署服务

zhengkun hai 9 meses
pai
achega
a891e6add8
Modificáronse 14 ficheiros con 294 adicións e 188 borrados
  1. 2 2
      config.json
  2. 1 1
      extract/extract.go
  3. 1 1
      extract/full.go
  4. 1 1
      extract/test.go
  5. 11 4
      go.mod
  6. 44 0
      go.sum
  7. 46 2
      main.go
  8. 0 33
      mainT.go
  9. 4 4
      tool.json
  10. 5 5
      tool/tool.go
  11. 15 15
      ul/attr.go
  12. 121 119
      ul/global.go
  13. 34 0
      ul/init.go
  14. 9 1
      ul/md.go

+ 2 - 2
config.json

@@ -1,7 +1,7 @@
 {
   "udpport": ":1791",
   "bid_name": "bidding",
-  "ext_name": "zzzzzkkk",
+  "ext_name": "zktest_package_1011",
   "reading": 500,
   "udp_max": 10000,
   "smail": {
@@ -12,7 +12,7 @@
     "local": true,
     "l_addr": "127.0.0.1:12005",
     "addr": "172.17.189.140:27080,172.17.189.141:27081",
-    "dbname" : "qfw_ai",
+    "dbname" : "qfw",
     "username": "zhengkun",
     "password": "zk@123123"
   },

+ 1 - 1
extract/extract.go

@@ -85,7 +85,7 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 	if utf8.RuneCountInString(detail) < 100 {
 		return f_data
 	}
-	detail = ul.ConvertToMarkdown(detail)
+	detail = ul.HttpConvertToMarkdown(detail)
 	//获取外围字段数据
 	f_info := prompt.AcquireExtractFieldInfo(detail)
 	//获取分包信息

+ 1 - 1
extract/full.go

@@ -64,7 +64,7 @@ func MovingFullInfo(sid string, eid string) {
 				if len(data) > 0 {
 					tmp["ai_zhipu"] = data
 					update_check := make(map[string]interface{}, 0)
-					is_unset := ul.ChooseCheckDataAI(tmp, update_check)
+					is_unset := ul.ChooseCheckDataAI(tmp, &update_check)
 					for k, v := range update_check {
 						tmp[k] = v //覆盖值
 					}

+ 1 - 1
extract/test.go

@@ -41,7 +41,7 @@ func TestSinglePackageInfo(name string, tmpid string) {
 	if utf8.RuneCountInString(detail) < 100 {
 		detail = filetext
 	}
-	detail = ul.ConvertToMarkdown(detail)
+	detail = ul.HttpConvertToMarkdown(detail)
 	pkg := prompt.AcquireNewMultiplePackageInfo(detail)
 	//最终结果...
 	for k, v := range pkg {

+ 11 - 4
go.mod

@@ -10,8 +10,10 @@ require (
 )
 
 require (
+	github.com/JohannesKaufmann/html-to-markdown v1.6.0 // indirect
 	github.com/PuerkitoBio/goquery v1.9.2 // indirect
 	github.com/andybalholm/cascadia v1.3.2 // indirect
+	github.com/aymerick/douceur v0.2.0 // indirect
 	github.com/baidubce/bce-sdk-go v0.9.164 // indirect
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
 	github.com/dchest/captcha v1.0.0 // indirect
@@ -19,18 +21,23 @@ require (
 	github.com/frankban/quicktest v1.14.6 // indirect
 	github.com/fsnotify/fsnotify v1.7.0 // indirect
 	github.com/golang/snappy v0.0.4 // indirect
+	github.com/gomarkdown/markdown v0.0.0-20230922112808-5421fefb8386 // indirect
+	github.com/gomarkdown/mdtohtml v0.0.0-20240124153210-d773061d1585 // indirect
 	github.com/google/btree v1.0.0 // indirect
 	github.com/google/go-cmp v0.6.0 // indirect
 	github.com/google/uuid v1.6.0 // indirect
+	github.com/gorilla/css v1.0.1 // indirect
 	github.com/hashicorp/hcl v1.0.0 // indirect
 	github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
+	github.com/k3a/html2text v1.2.1 // indirect
 	github.com/klauspost/compress v1.17.0 // indirect
 	github.com/kr/pretty v0.3.1 // indirect
 	github.com/kr/text v0.2.0 // indirect
 	github.com/magiconair/properties v1.8.7 // indirect
 	github.com/mailru/easyjson v0.7.7 // indirect
 	github.com/mattn/go-runewidth v0.0.9 // indirect
+	github.com/microcosm-cc/bluemonday v1.0.27 // indirect
 	github.com/mitchellh/mapstructure v1.5.0 // indirect
 	github.com/montanaflynn/stats v0.7.1 // indirect
 	github.com/olekukonko/tablewriter v0.0.5 // indirect
@@ -61,12 +68,12 @@ require (
 	github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect
 	go.uber.org/atomic v1.9.0 // indirect
 	go.uber.org/multierr v1.9.0 // indirect
-	golang.org/x/crypto v0.22.0 // indirect
+	golang.org/x/crypto v0.24.0 // indirect
 	golang.org/x/exp v0.0.0-20240205201215-2c58cdc269a3 // indirect
-	golang.org/x/net v0.24.0 // indirect
+	golang.org/x/net v0.26.0 // indirect
 	golang.org/x/sync v0.7.0 // indirect
-	golang.org/x/sys v0.19.0 // indirect
-	golang.org/x/text v0.14.0 // indirect
+	golang.org/x/sys v0.21.0 // indirect
+	golang.org/x/text v0.16.0 // indirect
 	gopkg.in/ini.v1 v1.67.0 // indirect
 	gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect

+ 44 - 0
go.sum

@@ -1,6 +1,8 @@
 cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/BurntSushi/toml v1.2.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
+github.com/JohannesKaufmann/html-to-markdown v1.6.0 h1:04VXMiE50YYfCfLboJCLcgqF5x+rHJnb1ssNmqpLH/k=
+github.com/JohannesKaufmann/html-to-markdown v1.6.0/go.mod h1:NUI78lGg/a7vpEJTz/0uOcYMaibytE4BUOQS8k78yPQ=
 github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
 github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
 github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4yPeE=
@@ -10,6 +12,8 @@ github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEq
 github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
 github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
 github.com/aws/aws-sdk-go v1.43.21/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo=
+github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
+github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
 github.com/baidubce/bce-qianfan-sdk/go/qianfan v0.0.9 h1:24e498hi7J0aHsVLe3NMMhMQcDHXku9xStKL0ytQ+EM=
 github.com/baidubce/bce-qianfan-sdk/go/qianfan v0.0.9/go.mod h1:f/kIWWvAHAcU7bzgkfN30SkpN0I4lLvsJkljVK6v5YY=
 github.com/baidubce/bce-sdk-go v0.9.164 h1:7gswLMsdQyarovMKuv3i6wxFQ3BQgvc5CmyGXb/D/xA=
@@ -57,6 +61,10 @@ github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw
 github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
 github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/gomarkdown/markdown v0.0.0-20230922112808-5421fefb8386 h1:EcQR3gusLHN46TAD+G+EbaaqJArt5vHhNpXAa12PQf4=
+github.com/gomarkdown/markdown v0.0.0-20230922112808-5421fefb8386/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
+github.com/gomarkdown/mdtohtml v0.0.0-20240124153210-d773061d1585 h1:gfB9CukKWjBI83xByTJwpOkSwFD+Ev+2m3U332KZCLo=
+github.com/gomarkdown/mdtohtml v0.0.0-20240124153210-d773061d1585/go.mod h1:6grYm5/uY15CwgBBqwA3+o/cAzaxssckznJ0B35ouBY=
 github.com/gomodule/redigo v1.8.9/go.mod h1:7ArFNvsTjH8GMMzB4uy1snslv2BwmginuMs06a1uzZE=
 github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo=
 github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
@@ -75,6 +83,9 @@ github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN
 github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
+github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
+github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
 github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
 github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
 github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056 h1:iCHtR9CQyktQ5+f3dMVZfwD2KWJUgm7M0gdL9NGr8KA=
@@ -83,6 +94,9 @@ github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHW
 github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
+github.com/k3a/html2text v1.2.1 h1:nvnKgBvBR/myqrwfLuiqecUtaK1lB9hGziIJKatNFVY=
+github.com/k3a/html2text v1.2.1/go.mod h1:ieEXykM67iT8lTvEWBh6fhpH4B23kB9OMKPdIBmgUqA=
 github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
 github.com/klauspost/compress v1.17.0 h1:Rnbp4K9EjcDuVuHtd0dgA4qNuv9yKDYKK1ulpJwgrqM=
 github.com/klauspost/compress v1.17.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
@@ -99,6 +113,8 @@ github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0
 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
 github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
 github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
+github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk=
+github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
 github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
 github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
 github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
@@ -136,12 +152,17 @@ github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6ke
 github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4=
 github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE=
 github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ=
+github.com/sebdah/goldie/v2 v2.5.3/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI=
+github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
+github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I=
 github.com/shabbyrobe/xmlwriter v0.0.0-20200208144257-9fca06d00ffa h1:2cO3RojjYl3hVTbEvJVqrMaFmORhL6O06qdW42toftk=
 github.com/shabbyrobe/xmlwriter v0.0.0-20200208144257-9fca06d00ffa/go.mod h1:Yjr3bdWaVWyME1kha7X0jsz3k2DgXNa1Pj3XGyUAbx8=
 github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
+github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
 github.com/smartystreets/assertions v1.1.1/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
 github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM=
+github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
 github.com/smartystreets/gunit v1.4.2/go.mod h1:ZjM1ozSIMJlAz/ay4SG8PeKF00ckUp+zMHZXV9/bvak=
 github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
 github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0=
@@ -159,6 +180,7 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
 github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
@@ -182,6 +204,7 @@ github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d h1:splanxYIlg+5LfHAM
 github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA=
 github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
 github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+github.com/yuin/goldmark v1.7.1/go.mod h1:uzxRWxtg69N339t3louHJ7+O03ezfj6PlliRlaOzY1E=
 go.mongodb.org/mongo-driver v1.10.1 h1:NujsPveKwHaWuKUer/ceo9DzEe7HIj1SlJ6uvXZG0S4=
 go.mongodb.org/mongo-driver v1.10.1/go.mod h1:z4XpeoU6w+9Vht+jAFyLgVrD+jGSQQe0+CBWFHNiHt8=
 go.mongodb.org/mongo-driver v1.16.0 h1:tpRsfBJMROVHKpdGyc1BBEzzjDUWjItxbVSZ8Ls4BQ4=
@@ -202,8 +225,12 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
+golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
 golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30=
 golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M=
+golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
+golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
+golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20240205201215-2c58cdc269a3 h1:/RIbNt/Zr7rVhIkQhooTxCxFcdWLGIKnZA4IXNFSrvo=
 golang.org/x/exp v0.0.0-20240205201215-2c58cdc269a3/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08=
@@ -229,11 +256,15 @@ golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
 golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
 golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
+golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
 golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c=
 golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U=
 golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
 golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w=
 golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8=
+golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
+golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
+golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -258,14 +289,22 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
 golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
 golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
+golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
 golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
+golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
+golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
+golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk=
+golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
@@ -276,10 +315,14 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
+golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
 golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
 golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
@@ -319,6 +362,7 @@ gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
 gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 h1:VpOs+IwYnYBaFnrNAeB8UUWtL3vEUnzSCL1nVjPhqrw=
 gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA=
 gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
 gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=

+ 46 - 2
main.go

@@ -1,15 +1,20 @@
 package main
 
 import (
+	"data_ai/extract"
 	"data_ai/tool"
 	"data_ai/udp"
 	"data_ai/ul"
 	log "github.com/donnie4w/go-logger/logger"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"sync"
+	"unicode/utf8"
 )
 
 func init() {
-	ul.IsTool = true
+	ul.IsTool = false
 	ul.IsFull = false
+	ul.IsLocal = true
 	if ul.IsTool {
 		log.Debug("工具版本···")
 		ul.InitToolVar()
@@ -30,9 +35,48 @@ func main() {
 		return
 	} else {
 		if !ul.IsFull {
-			//extract.TestSinglePackageInfo("bidding", "66e3874db25c3e1debf23dbb")
+			//extract.TestSinglePackageInfo("bidding", "66e38634b25c3e1debf23a00")
+			extract.TestSinglePackageInfo("bidding", "67050280b25c3e1deb667d38")
 		}
 	}
 	lock := make(chan bool)
 	<-lock
 }
+
+func test() {
+	log.Debug("开始大模型验证内存数据···")
+	q := map[string]interface{}{}
+	pool_mgo := make(chan bool, ul.Reading)
+	wg_mgo := &sync.WaitGroup{}
+	sess := ul.SourceMgo.GetMgoConn()
+	defer ul.SourceMgo.DestoryMongoConn(sess)
+	total, isok := 0, 0
+	it := sess.DB(ul.SourceMgo.DbName).C(ul.Ext_Name).Find(&q).Sort("_id").Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%100 == 0 {
+			log.Debug("cur index ", total)
+		}
+		isok++
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			detail := qu.ObjToString(tmp["detail"])
+			filetext := qu.ObjToString(tmp["filetext"]) //此处为附件信息···
+			if utf8.RuneCountInString(detail) < 100 {
+				detail = filetext
+			}
+			detail = ul.HttpConvertToMarkdown(detail)
+			//最终结果...
+			if detail != "" {
+
+			}
+		}(tmp)
+		tmp = make(map[string]interface{})
+	}
+	wg_mgo.Wait()
+	log.Debug("ai is over ...")
+}

+ 0 - 33
mainT.go

@@ -1,33 +0,0 @@
-package main
-
-import (
-	"data_ai/prompt"
-	"data_ai/ul"
-	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
-	"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
-)
-
-func mainT() {
-	coll := "ai_41411_100"
-	q := map[string]interface{}{
-		"_id": mongodb.StringTOBsonId("669e907166cf0db42a653c52"),
-	}
-	sort := map[string]interface{}{
-		"num": 1,
-	}
-	list, _ := ul.SourceMgo.Find(coll, q, sort, nil)
-	for _, l := range list {
-		detail := qu.ObjToString(l["detail"])
-		title := qu.ObjToString(l["title"])
-		s_toptype, s_subtype := prompt.AcquireClassInfo(detail, title)
-		qu.Debug("ai:", s_toptype, s_subtype)
-		s_toptype, s_subtype = prompt.CheckClassByOtherFileds(s_toptype, s_subtype, l) //根据抽取字段校正
-		qu.Debug("check:", l["_id"], s_toptype, s_subtype)
-		ul.SourceMgo.UpdateById(coll, mongodb.BsonIdToSId(l["_id"]), map[string]interface{}{
-			"$set": map[string]interface{}{
-				"top": s_toptype,
-				"sub": s_subtype,
-			},
-		})
-	}
-}

+ 4 - 4
tool.json

@@ -1,11 +1,11 @@
 {
-  "reading": 500,
-  "ext_name": "zxl_20240926",
+  "reading": 50,
+  "ext_name": "zktest_package_1011",
   "s_mgo": {
     "local": true,
     "l_addr": "127.0.0.1:12005",
     "addr": "127.0.0.1:12005",
-    "dbname" : "qfw_ai",
+    "dbname" : "qfw",
     "username": "zhengkun",
     "password": "zk@123123"
   },
@@ -13,7 +13,7 @@
     "local": true,
     "l_addr": "127.0.0.1:12005",
     "addr": "127.0.0.1:12005",
-    "dbname" : "qfw_ai",
+    "dbname" : "qfw",
     "username": "zhengkun",
     "password": "zk@123123"
   },

+ 5 - 5
tool/tool.go

@@ -33,14 +33,14 @@ func StartToolInfo() {
 			data := extract.ResolveInfo(tmp)
 			if len(data) > 0 || u_id == "" {
 				tmp["ai_zhipu"] = data
-				update_check := make(map[string]interface{}, 0)
-				is_unset := ul.ChooseCheckDataAI(tmp, update_check)
-				update_check["ai_zhipu"] = data
+				update_info := make(map[string]interface{}, 0)
+				is_unset := ul.ChooseCheckDataAI(tmp, &update_info)
+				update_info["ai_zhipu"] = data
 				//清洗与记录
-				if len(update_check) > 0 {
+				if len(update_info) > 0 {
 					//$set
 					ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
-						"$set": update_check,
+						"$set": update_info,
 					})
 				}
 				if is_unset {

+ 15 - 15
ul/attr.go

@@ -3,21 +3,21 @@ package ul
 import "regexp"
 
 var (
-	SourceMgo, QyxyMgo *MongodbSim
-	BidMgo             *MongodbSim
-	SysConfig          map[string]interface{}
-	ToolConfig         map[string]interface{}
-	Bid_Name, Ext_Name string
-	Url                = "https://www.jianyu360.cn/article/content/%s.html"
-	CleanResultReg     = regexp.MustCompile("((\\s|\n| |\\[|\\]|\\`|json)+)")
-	SaveResultReg      = regexp.MustCompile("([{].*[}])")
-	MaxLen             = 3000
-	MaxByte            = 8000
-	MaxUdp             = 10000
-	RulesPname         = []*ExtReg{}
-	IsTool, IsFull     bool
-	Reading            int
-	FlashModel         string
+	SourceMgo, QyxyMgo      *MongodbSim
+	BidMgo                  *MongodbSim
+	SysConfig               map[string]interface{}
+	ToolConfig              map[string]interface{}
+	Bid_Name, Ext_Name      string
+	Url                     = "https://www.jianyu360.cn/article/content/%s.html"
+	CleanResultReg          = regexp.MustCompile("((\\s|\n| |\\[|\\]|\\`|json)+)")
+	SaveResultReg           = regexp.MustCompile("([{].*[}])")
+	MaxLen                  = 3000
+	MaxByte                 = 8000
+	MaxUdp                  = 10000
+	RulesPname              = []*ExtReg{}
+	IsTool, IsFull, IsLocal bool
+	Reading                 int
+	FlashModel              string
 )
 
 type ExtReg struct {

+ 121 - 119
ul/global.go

@@ -7,36 +7,98 @@ import (
 
 var Unset_Check = map[string]interface{}{"winner": 1, "s_winner": 1, "bidamount": 1, "winnerorder": 1}
 
+// 大模型与抽取数据合并计算
+func ChooseCheckDataAI(tmp map[string]interface{}, update_info *map[string]interface{}) bool {
+	if tmp["ai_zhipu"] == nil {
+		return false
+	}
+	//记录抽取原值
+	ext_ai_record := map[string]interface{}{}
+	ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
+	//选取分类
+	s_toptype, s_subtype := ChooseTheBestClassField(ai_zhipu, tmp, update_info, &ext_ai_record)
+	//选取字段
+	ChooseTheBestCoreField(ai_zhipu, s_toptype, s_subtype, tmp, update_info, &ext_ai_record)
+	//选取分包
+	ChooseTheBestPackageField(ai_zhipu, s_toptype, s_subtype, tmp, update_info, &ext_ai_record)
+	//最终金额修正与选取
+	ChooseTheBestAmountField(tmp, update_info)
+
+	//字段记录
+	(*update_info)["ext_ai_record"] = ext_ai_record
+
+	//跨分类是否删除结果类字段
+	if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
+
+	} else {
+		for k, _ := range Unset_Check {
+			if tmp[k] != nil {
+				return true
+			}
+		}
+	}
+
+	return false
+}
+
+// 选取分类
+func ChooseTheBestClassField(ai_zhipu map[string]interface{}, tmp map[string]interface{}, update_info *map[string]interface{}, ext_ai_record *map[string]interface{}) (string, string) {
+	//分类字段···
+	s_toptype, s_subtype := qu.ObjToString(ai_zhipu["s_toptype"]), qu.ObjToString(ai_zhipu["s_subtype"])
+	ns_toptype, ns_subtype := CheckClassByOtherFileds(s_toptype, s_subtype, tmp)
+	if ns_toptype != s_toptype || ns_subtype != s_subtype {
+		(*ext_ai_record)["s_toptype"] = ns_toptype
+		(*ext_ai_record)["s_subtype"] = ns_subtype
+	}
+	//赋值···
+	s_toptype, s_subtype = ns_toptype, ns_subtype
+	if qu.ObjToString(tmp["toptype"]) == "拟建" || qu.ObjToString(tmp["toptype"]) == "产权" {
+		s_toptype = qu.ObjToString(tmp["toptype"])
+		s_subtype = qu.ObjToString(tmp["subtype"])
+	} else {
+		if s_toptype != "" && s_subtype != "" {
+			(*update_info)["toptype"] = s_toptype
+			(*update_info)["subtype"] = s_subtype
+			(*ext_ai_record)["toptype"] = tmp["toptype"]
+			(*ext_ai_record)["subtype"] = tmp["subtype"]
+		} else {
+			s_toptype = qu.ObjToString(tmp["toptype"])
+			s_subtype = qu.ObjToString(tmp["subtype"])
+		}
+	}
+	return s_toptype, s_subtype
+}
+
 // 选取字段
-func ChooseTheBestCoreField(ai_zhipu map[string]interface{}, s_toptype string, s_subtype string, tmp map[string]interface{}, update_info map[string]interface{}, ext_ai_record map[string]interface{}) {
+func ChooseTheBestCoreField(ai_zhipu map[string]interface{}, s_toptype string, s_subtype string, tmp map[string]interface{}, update_info *map[string]interface{}, ext_ai_record *map[string]interface{}) {
 	//基础字段···
 	if s_buyer := qu.ObjToString(ai_zhipu["s_buyer"]); s_buyer != "" {
-		update_info["buyer"] = s_buyer
-		ext_ai_record["buyer"] = tmp["buyer"]
+		(*update_info)["buyer"] = s_buyer
+		(*ext_ai_record)["buyer"] = tmp["buyer"]
 		if agency := qu.ObjToString(tmp["agency"]); agency != "" && agency == s_buyer {
-			delete(update_info, "buyer")
-			delete(ext_ai_record, "buyer")
+			delete((*update_info), "buyer")
+			delete((*ext_ai_record), "buyer")
 		}
 	}
 	if s_projectname := qu.ObjToString(ai_zhipu["s_projectname"]); s_projectname != "" {
-		update_info["projectname"] = s_projectname
-		ext_ai_record["projectname"] = tmp["projectname"]
+		(*update_info)["projectname"] = s_projectname
+		(*ext_ai_record)["projectname"] = tmp["projectname"]
 	}
 	if s_projectcode := qu.ObjToString(ai_zhipu["s_projectcode"]); s_projectcode != "" {
-		update_info["projectcode"] = s_projectcode
-		ext_ai_record["projectcode"] = tmp["projectcode"]
+		(*update_info)["projectcode"] = s_projectcode
+		(*ext_ai_record)["projectcode"] = tmp["projectcode"]
 	}
 	if s_budget := qu.Float64All(ai_zhipu["s_budget"]); s_budget > 0.0 && s_budget < 1000000000.0 {
-		update_info["budget"] = s_budget
-		ext_ai_record["budget"] = tmp["budget"]
+		(*update_info)["budget"] = s_budget
+		(*ext_ai_record)["budget"] = tmp["budget"]
 	}
 	//地域字段···
 	o_area, o_district := qu.ObjToString(tmp["area"]), qu.ObjToString(tmp["district"])
 	s_area, s_city := qu.ObjToString(ai_zhipu["s_area"]), qu.ObjToString(ai_zhipu["s_city"])
 	if s_area != "" && s_area != "全国" {
-		update_info["area"] = s_area
+		(*update_info)["area"] = s_area
 		if s_city != "" {
-			update_info["city"] = s_city
+			(*update_info)["city"] = s_city
 			if o_district != "" {
 				//判断抽取的区县是否合理···
 				isT := false
@@ -49,35 +111,35 @@ func ChooseTheBestCoreField(ai_zhipu map[string]interface{}, s_toptype string, s
 					}
 				}
 				if !isT {
-					update_info["district"] = ""
+					(*update_info)["district"] = ""
 				}
 			}
 		} else {
 			if o_area != s_area {
-				update_info["city"] = ""
-				update_info["district"] = ""
+				(*update_info)["city"] = ""
+				(*update_info)["district"] = ""
 			}
 		}
-		ext_ai_record["area"] = tmp["area"]
-		ext_ai_record["city"] = tmp["city"]
-		ext_ai_record["district"] = tmp["district"]
+		(*ext_ai_record)["area"] = tmp["area"]
+		(*ext_ai_record)["city"] = tmp["city"]
+		(*ext_ai_record)["district"] = tmp["district"]
 	}
 	//先用外围字段替换
 	if s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" || s_subtype == "单一" {
 		if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
-			update_info["bidamount"] = s_bidamount
-			ext_ai_record["bidamount"] = tmp["bidamount"]
+			(*update_info)["bidamount"] = s_bidamount
+			(*ext_ai_record)["bidamount"] = tmp["bidamount"]
 		}
 		if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" {
-			update_info["s_winner"] = s_winner
-			ext_ai_record["s_winner"] = tmp["s_winner"]
-			update_info["winner"] = s_winner
-			ext_ai_record["winner"] = tmp["winner"]
+			(*update_info)["s_winner"] = s_winner
+			(*ext_ai_record)["s_winner"] = tmp["s_winner"]
+			(*update_info)["winner"] = s_winner
+			(*ext_ai_record)["winner"] = tmp["winner"]
 			//对于winner来说...规则值有包含关系,采用规则值
 			if winner := qu.ObjToString(tmp["winner"]); winner != "" {
 				if strings.Contains(s_winner, winner) {
-					delete(update_info, "winner")
-					delete(ext_ai_record, "winner")
+					delete((*update_info), "winner")
+					delete((*ext_ai_record), "winner")
 				}
 			}
 		}
@@ -113,91 +175,64 @@ func ChooseTheBestCoreField(ai_zhipu map[string]interface{}, s_toptype string, s
 	}
 }
 
-// 选取分类
-func ChooseTheBestClassField(ai_zhipu map[string]interface{}, tmp map[string]interface{}, update_info map[string]interface{}, ext_ai_record map[string]interface{}) (string, string) {
-	//分类字段···
-	s_toptype, s_subtype := qu.ObjToString(ai_zhipu["s_toptype"]), qu.ObjToString(ai_zhipu["s_subtype"])
-	ns_toptype, ns_subtype := CheckClassByOtherFileds(s_toptype, s_subtype, tmp)
-	if ns_toptype != s_toptype || ns_subtype != s_subtype {
-		ext_ai_record["s_toptype"] = ns_toptype
-		ext_ai_record["s_subtype"] = ns_subtype
-	}
-	//赋值···
-	s_toptype, s_subtype = ns_toptype, ns_subtype
-	if qu.ObjToString(tmp["toptype"]) == "拟建" || qu.ObjToString(tmp["toptype"]) == "产权" {
-		s_toptype = qu.ObjToString(tmp["toptype"])
-		s_subtype = qu.ObjToString(tmp["subtype"])
-	} else {
-		if s_toptype != "" && s_subtype != "" {
-			update_info["toptype"] = s_toptype
-			update_info["subtype"] = s_subtype
-			ext_ai_record["toptype"] = tmp["toptype"]
-			ext_ai_record["subtype"] = tmp["subtype"]
-		} else {
-			s_toptype = qu.ObjToString(tmp["toptype"])
-			s_subtype = qu.ObjToString(tmp["subtype"])
-		}
-	}
-	return s_toptype, s_subtype
-}
-
 // 选取分包
-func ChooseTheBestPackageField(ai_zhipu map[string]interface{}, s_toptype string, s_subtype string, tmp map[string]interface{}, update_info map[string]interface{}, ext_ai_record map[string]interface{}) {
+func ChooseTheBestPackageField(ai_zhipu map[string]interface{}, s_toptype string, s_subtype string, tmp map[string]interface{}, update_info *map[string]interface{}, ext_ai_record *map[string]interface{}) {
 	//新分包判定···com_package - 默认大模型分包可信
 	if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil {
 		com_package := IsMarkInterfaceMap((*s_pkg)["com_package"])
 		//是否替换外围字段···质谱外围是否已提取
 		if len(com_package) > 1 { //多包字段覆盖
+			//校验核对分包结构
 			new_com_package := staffAiPackageInfo(com_package, s_toptype, s_subtype)
-			update_info["com_package"] = new_com_package
+			(*update_info)["com_package"] = new_com_package
 
 			if p_budget := qu.Float64All((*s_pkg)["s_budget"]); p_budget > 0.0 {
-				update_info["budget"] = p_budget
-				ext_ai_record["budget"] = tmp["budget"]
+				(*update_info)["budget"] = p_budget
+				(*ext_ai_record)["budget"] = tmp["budget"]
 			}
 			if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
 				if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" {
-					update_info["s_winner"] = p_winner
-					update_info["winner"] = p_winner
-					ext_ai_record["s_winner"] = tmp["s_winner"]
-					ext_ai_record["winner"] = tmp["winner"]
+					(*update_info)["s_winner"] = p_winner
+					(*update_info)["winner"] = p_winner
+					(*ext_ai_record)["s_winner"] = tmp["s_winner"]
+					(*ext_ai_record)["winner"] = tmp["winner"]
 
 					//对于winner来说...规则值有包含关系,采用规则值应用判重
 					if winner := qu.ObjToString(tmp["winner"]); winner != "" {
 						if strings.Contains(p_winner, winner) {
-							delete(update_info, "winner")
-							delete(ext_ai_record, "winner")
+							delete((*update_info), "winner")
+							delete((*ext_ai_record), "winner")
 						}
 					}
 				}
 				if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 {
-					update_info["bidamount"] = p_bidamount
-					ext_ai_record["bidamount"] = tmp["bidamount"]
+					(*update_info)["bidamount"] = p_bidamount
+					(*ext_ai_record)["bidamount"] = tmp["bidamount"]
 				}
 			}
 		} else if len(com_package) == 1 { //单包字段覆盖
-			if p_budget := qu.Float64All((*s_pkg)["s_budget"]); p_budget > 0.0 && update_info["budget"] == nil {
-				update_info["budget"] = p_budget
-				ext_ai_record["budget"] = tmp["budget"]
+			if p_budget := qu.Float64All((*s_pkg)["s_budget"]); p_budget > 0.0 && (*update_info)["budget"] == nil {
+				(*update_info)["budget"] = p_budget
+				(*ext_ai_record)["budget"] = tmp["budget"]
 			}
 			if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
-				if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" && update_info["s_winner"] == nil {
-					update_info["s_winner"] = p_winner
-					update_info["winner"] = p_winner
-					ext_ai_record["s_winner"] = tmp["s_winner"]
-					ext_ai_record["winner"] = tmp["winner"]
+				if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" && (*update_info)["s_winner"] == nil {
+					(*update_info)["s_winner"] = p_winner
+					(*update_info)["winner"] = p_winner
+					(*ext_ai_record)["s_winner"] = tmp["s_winner"]
+					(*ext_ai_record)["winner"] = tmp["winner"]
 
 					//对于winner来说...规则值有包含关系,采用规则值应用判重
 					if winner := qu.ObjToString(tmp["winner"]); winner != "" {
 						if strings.Contains(p_winner, winner) {
-							delete(update_info, "winner")
-							delete(ext_ai_record, "winner")
+							delete((*update_info), "winner")
+							delete((*ext_ai_record), "winner")
 						}
 					}
 				}
-				if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 && update_info["bidamount"] == nil {
-					update_info["bidamount"] = p_bidamount
-					ext_ai_record["bidamount"] = tmp["bidamount"]
+				if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 && (*update_info)["bidamount"] == nil {
+					(*update_info)["bidamount"] = p_bidamount
+					(*ext_ai_record)["bidamount"] = tmp["bidamount"]
 				}
 			}
 		} else {
@@ -207,55 +242,21 @@ func ChooseTheBestPackageField(ai_zhipu map[string]interface{}, s_toptype string
 }
 
 // 选取金额
-func ChooseTheBestAmountField(tmp map[string]interface{}, update_info map[string]interface{}) {
-	if r_budget := qu.Float64All(update_info["budget"]); r_budget > 0.0 && r_budget < 1000000000.0 {
+func ChooseTheBestAmountField(tmp map[string]interface{}, update_info *map[string]interface{}) {
+	if r_budget := qu.Float64All((*update_info)["budget"]); r_budget > 0.0 && r_budget < 1000000000.0 {
 		if o_budget := qu.Float64All(tmp["budget"]); o_budget > 0.0 {
 			if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
-				update_info["budget"] = filterAmount(r_budget, o_budget)
+				(*update_info)["budget"] = filterAmount(r_budget, o_budget)
 			}
 		}
 	}
-	if r_bidamount := qu.Float64All(update_info["bidamount"]); r_bidamount > 0.0 && r_bidamount < 1000000000.0 {
+	if r_bidamount := qu.Float64All((*update_info)["bidamount"]); r_bidamount > 0.0 && r_bidamount < 1000000000.0 {
 		if o_bidamount := qu.Float64All(tmp["bidamount"]); o_bidamount > 0.0 {
 			if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
-				update_info["bidamount"] = filterAmount(r_bidamount, o_bidamount)
-			}
-		}
-	}
-}
-
-// 大模型与抽取数据合并计算
-func ChooseCheckDataAI(tmp map[string]interface{}, update_info map[string]interface{}) bool {
-	if tmp["ai_zhipu"] == nil {
-		return false
-	}
-	//记录抽取原值
-	ext_ai_record := map[string]interface{}{}
-	ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
-	//选取分类
-	s_toptype, s_subtype := ChooseTheBestClassField(ai_zhipu, tmp, update_info, ext_ai_record)
-	//选取字段
-	ChooseTheBestCoreField(ai_zhipu, s_toptype, s_subtype, tmp, update_info, ext_ai_record)
-	//选取分包
-	ChooseTheBestPackageField(ai_zhipu, s_toptype, s_subtype, tmp, update_info, ext_ai_record)
-	//最终金额修正与选取
-	ChooseTheBestAmountField(tmp, update_info)
-
-	//字段记录
-	update_info["ext_ai_record"] = ext_ai_record
-
-	//跨分类是否删除结果类字段
-	if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
-
-	} else {
-		for k, _ := range Unset_Check {
-			if tmp[k] != nil {
-				return true
+				(*update_info)["bidamount"] = filterAmount(r_bidamount, o_bidamount)
 			}
 		}
 	}
-
-	return false
 }
 
 // 筛选金额
@@ -322,6 +323,7 @@ func staffAiPackageInfo(com_package []map[string]interface{}, s_toptype string,
 	return new_com_package
 }
 
+// 核对分类
 func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]interface{}) (string, string) {
 	toptype_rule := qu.ObjToString(data["toptype"])
 	subtype_rule := qu.ObjToString(data["subtype"])

+ 34 - 0
ul/init.go

@@ -1,10 +1,14 @@
 package ul
 
 import (
+	"bytes"
+	"encoding/json"
 	"fmt"
 	log "github.com/donnie4w/go-logger/logger"
 	"go.mongodb.org/mongo-driver/bson/primitive"
+	"io/ioutil"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"net/http"
 )
 
 func InitGlobalVar() {
@@ -253,3 +257,33 @@ func IsMarkInterfaceMap(t interface{}) []map[string]interface{} {
 	}
 	return p_list
 }
+
+func PostMarkDownText(html string) string {
+	url := "http://172.17.0.11:8888/md"
+	if IsLocal {
+		url = "http://172.17.0.11:8888/md"
+	}
+	// 创建请求数据
+	data := map[string]interface{}{"html": html}
+	jsonData, err := json.Marshal(data)
+	if err != nil {
+		return ""
+	}
+	// 发送POST请求
+	resp, err := http.Post(url, "application/json", bytes.NewBuffer(jsonData))
+	if err != nil {
+		return ""
+	}
+	defer resp.Body.Close()
+	// 读取响应体
+	body, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		return ""
+	}
+	info := map[string]interface{}{}
+	err = json.Unmarshal(body, &info)
+	if err != nil {
+		return ""
+	}
+	return qu.ObjToString(info["result"])
+}

+ 9 - 1
ul/md.go

@@ -9,6 +9,15 @@ var md_reg1 = regexp.MustCompile("([ *]+)")
 var md_reg2 = regexp.MustCompile("([\\n]+)")
 var md_reg3 = regexp.MustCompile("([-]{3,})")
 
+// 有缺陷---内存增大···不知原因
+func HttpConvertToMarkdown(html string) string {
+	if text := PostMarkDownText(html); text != "" {
+		return text
+	}
+	return html
+}
+
+// 有问题,内存泄漏
 func ConvertToMarkdown(html string) string {
 	markdown, err := html2text.FromString(html, html2text.Options{PrettyTables: true})
 	if err != nil {
@@ -17,6 +26,5 @@ func ConvertToMarkdown(html string) string {
 	markdown = md_reg1.ReplaceAllString(markdown, " ")
 	markdown = md_reg2.ReplaceAllString(markdown, "\n")
 	markdown = md_reg3.ReplaceAllString(markdown, "---")
-
 	return markdown
 }