zhengkun hai 1 mes
pai
achega
a7980f0e50

+ 1 - 1
extcity/src/ext/extInit.go

@@ -110,7 +110,7 @@ func (e *ExtractTask) InitAreaCode() {
 // 加载所有
 func InitProvincesx() []map[string]interface{} {
 	defer qu.Catch()
-	provinces, _ := ul.ExtMgo.Find("address_new_2020", map[string]interface{}{
+	provinces, _ := ul.QyxyMgo.Find("address_new_2020", map[string]interface{}{
 		"Remarks": nil,
 	}, nil, nil)
 	return provinces

+ 20 - 20
extcity/src/ext/extRegion.go

@@ -426,26 +426,26 @@ func (e *ExtractTask) NewVerifySensitiveInfo(detail string, area *string, city *
 	}
 
 	//简称区县
-	//simDistrictArr := e.SensitiveSimDistrict.FindAll(detail)
-	//if len(simDistrictArr) == 1 {
-	//	for _, v := range simDistrictArr {
-	//		if citysArr := e.DistrictSimAndAll[v]; len(citysArr) > 0 {
-	//			for _, f_citys := range citysArr {
-	//				for d, c := range f_citys {
-	//					if c == nil || c.P == nil || c.Name == "" {
-	//						continue
-	//					}
-	//					if c.P.Brief != "" && c.Name != "" && d != "" {
-	//						*area = c.P.Brief
-	//						*city = c.Name
-	//						*district = d
-	//						return true
-	//					}
-	//				}
-	//			}
-	//		}
-	//	}
-	//}
+	simDistrictArr := e.SensitiveSimDistrict.FindAll(detail)
+	if len(simDistrictArr) == 1 {
+		for _, v := range simDistrictArr {
+			if citysArr := e.DistrictSimAndAll[v]; len(citysArr) > 0 {
+				for _, f_citys := range citysArr {
+					for d, c := range f_citys {
+						if c == nil || c.P == nil || c.Name == "" {
+							continue
+						}
+						if c.P.Brief != "" && c.Name != "" && d != "" {
+							*area = c.P.Brief
+							*city = c.Name
+							*district = d
+							return true
+						}
+					}
+				}
+			}
+		}
+	}
 
 	//省份提取--临时
 	fullAreaArr := e.SensitiveFullArea.FindAll(detail)

+ 97 - 0
extcity/src/ext/extTown.go

@@ -0,0 +1,97 @@
+package ext
+
+import qu "qfw/util"
+
+var keyArr = []string{
+	"addressing",
+	"projectaddr",
+	"projectname",
+	"buyer",
+	"approvedepartment",
+	"buyeraddr",
+	"title",
+	"bidopenaddress",
+}
+
+// 提取乡镇
+func (e *ExtractTask) ExtractRegionTownInfo(tmp map[string]interface{}) map[string]string {
+	f_area, f_city, f_district, f_town := qu.ObjToString(tmp["area"]), qu.ObjToString(tmp["city"]), qu.ObjToString(tmp["district"]), ""
+	//根据不同字段...
+	for _, v := range keyArr {
+		text := qu.ObjToString(tmp[v])
+		if text == "" {
+			continue
+		}
+		ST := e.GetRegionTownByFieldInfo(text)
+		if len(ST) > 0 {
+			if e.ConfirmTownRegionInfo(ST, &f_area, &f_city, &f_district, &f_town) {
+				break
+			}
+		}
+	}
+	//根据正文扫描...
+
+	return map[string]string{"area": f_area, "city": f_city, "district": f_district, "town": f_town}
+}
+
+// 提取乡镇数据倒推
+func (e *ExtractTask) GetRegionTownByFieldInfo(text string) []S_Town {
+	ST := []S_Town{}
+	wordsArr := e.Seg_SV.Cut(text, true)
+	for _, word := range wordsArr {
+		for pos_full, trie_full := range e.Trie_Fulls {
+			if pos_full == 3 {
+				if trie_full.Get(word) {
+					districts := e.StreetDistrictMap[word]
+					for _, d := range districts {
+						v_area, v_city, v_district := d.C.P.Brief, d.C.Name, d.Name
+						ST = append(ST, S_Town{v_area, v_city, v_district, word})
+					}
+				}
+			}
+		}
+	}
+	return ST
+}
+
+func (e *ExtractTask) ConfirmTownRegionInfo(ST []S_Town, area, city, district, town *string) bool {
+	for _, v := range ST {
+		v_area, v_city, v_district, v_town := v.P_Name, v.C_Name, v.D_Name, v.T_Name
+		CompleteRegionInfo(&v_area, &v_city, &v_district)
+		if v_area == "" || v_area == "全国" {
+			continue
+		}
+		if *area == "全国" || *area == "" {
+			*area = v_area
+			*city = v_city
+			*district = v_district
+			*town = v_town
+			return true
+		} else {
+			if *area != v_area {
+				continue
+			}
+			if *city != "" && *city != v_city {
+				continue
+			}
+			if *district != "" && *district != v_district {
+				continue
+			}
+			*area = v_area
+			*city = v_city
+			*district = v_district
+			*town = v_town
+			return true
+		}
+	}
+	return false
+}
+
+// 特殊情况处理...提级
+func (e *ExtractTask) GetRegionTownCleanInfo(info *map[string]string) {
+	city := qu.ObjToString((*info)["city"])
+	town := qu.ObjToString((*info)["town"])
+	if city == "中山市" && town != "" {
+		(*info)["district"] = town
+	}
+}

+ 6 - 0
extcity/src/ext/struct.go

@@ -86,3 +86,9 @@ type S_District struct {
 	C_Name string
 	D_Name string
 }
+type S_Town struct {
+	P_Name string
+	C_Name string
+	D_Name string
+	T_Name string
+}

+ 11 - 35
extcity/src/main.go

@@ -5,7 +5,6 @@ import (
 	"encoding/json"
 	"ext"
 	log "github.com/donnie4w/go-logger/logger"
-	"gopkg.in/mgo.v2/bson"
 	"io"
 	"net/http"
 	qu "qfw/util"
@@ -27,50 +26,27 @@ func init() {
 	go http.ListenAndServe(ul.Port, nil)
 }
 func main() {
-	testRegionInfo()
+	//以下测试测试
+	//data := ul.TestMgo.FindById("bidding", "66292fab66cf0db42ac58000")
+	log.Debug("准备测试...等待3s...")
+	time.Sleep(3 * time.Second)
+	data := map[string]interface{}{
+		"detail": "请问金水在哪",
+	}
+	log.Debug(test(data))
 	lock := make(chan bool)
 	<-lock
 	/*
 		extcity.spdata.jianyu360.com
-		172.17.4.238:9996
+		172.17.4.238:9996  - 用在了实体识别程序了
 	*/
 }
 
-func testRegionInfo() {
-	return
-	dataArr, _ := ul.TestMgo.Find("site", bson.M{"area": "全国"}, nil, map[string]interface{}{"site": 1})
-	isok := 0
-	for k, v := range dataArr {
-		if k%100 == 0 {
-			log.Debug("cur index ", k, "~", isok)
-		}
-		tmpid := ul.BsonTOStringId(v["_id"])
-		site := qu.ObjToString(v["site"])
-		info := test(site)
-		area := qu.ObjToString(info["area"])
-		if area != "" && area != "全国" {
-			isok++
-			//ul.SiteMgo.Save("123123", map[string]interface{}{
-			//	"site": site,
-			//	"area": area,
-			//})
-			log.Debug(tmpid, "~", area)
-			//ul.TestMgo.UpdateById("site", tmpid, map[string]interface{}{
-			//	"$set": map[string]interface{}{
-			//		"area": area,
-			//	},
-			//})
-		}
-	}
-	log.Debug("is over ", isok)
-}
-
-func test(detail string) map[string]interface{} {
+func test(data map[string]interface{}) map[string]interface{} {
 	info := map[string]interface{}{}
 	client := &http.Client{Timeout: 2 * time.Second}
-	data := map[string]interface{}{"detail": detail}
 	jsonStr, _ := json.Marshal(data)
-	resp, err := client.Post("http://127.0.0.1:9996/service/region", "application/json", bytes.NewBuffer(jsonStr))
+	resp, err := client.Post("http://127.0.0.1:9997/service/region", "application/json", bytes.NewBuffer(jsonStr))
 	if err != nil {
 		return info
 	}

+ 11 - 12
extcity/src/mark

@@ -1,27 +1,26 @@
 {
    "port": ":9996",
    "ext_mgodb": {
-     "addr": "172.17.145.163:27083,172.17.4.187:27082",
-     "db": "mixdata",
-     "ext_coll": "address_new_2020",
-     "check_coll": "address_jy_2022",
-     "pool": 10,
+     "addr": "172.17.189.140:27080,172.17.189.141:27081",
+     "db": "extract_2021",
      "user": "zhengkun",
      "password": "zk@123123"
    },
-  "save_mgodb": {
-    "addr": "120.0.0.1:27017",
-    "db": "zhengkun",
-    "coll": "zktest_data_new_city",
-    "pool": 10,
+  "site_mgodb": {
+    "addr": "172.17.4.87:27080",
+    "db": "editor",
     "user": "",
     "password": ""
   },
+  "qyxy_mgodb": {
+    "addr": "172.17.189.140:27080,172.17.189.141:27081",
+    "db": "mixdata",
+    "user": "zhengkun",
+    "password": "zk@123123"
+  },
   "source_field": {
     "authority": 1,
     "company_address": 1
   }
 }
 
-
-

+ 1 - 1
extcity/src/res/config.json

@@ -1,5 +1,5 @@
 {
-   "port": ":9996",
+   "port": ":9997",
    "ext_mgodb": {
      "addr": "127.0.0.1:27017",
      "db": "extract_service",

+ 18 - 0
extcity/src/service/service.go

@@ -13,6 +13,7 @@ import (
 var datalock sync.Mutex
 
 func InitRegionsService() {
+	//提取地域信息
 	http.HandleFunc("/service/region", func(w http.ResponseWriter, r *http.Request) {
 		datalock.Lock()
 		data, _ := ioutil.ReadAll(r.Body)
@@ -35,6 +36,7 @@ func InitRegionsService() {
 		datalock.Unlock()
 	})
 
+	//地域信息清洗
 	http.HandleFunc("/service/region/clean", func(w http.ResponseWriter, r *http.Request) {
 		datalock.Lock()
 		data, _ := ioutil.ReadAll(r.Body)
@@ -55,4 +57,20 @@ func InitRegionsService() {
 		w.Write(res)
 		datalock.Unlock()
 	})
+
+	//提取乡镇街道
+	http.HandleFunc("/service/region/town", func(w http.ResponseWriter, r *http.Request) {
+		datalock.Lock()
+		data, _ := ioutil.ReadAll(r.Body)
+		tmp := map[string]interface{}{}
+		err := json.Unmarshal([]byte(data), &tmp)
+		if err != nil {
+			log.Debug(err)
+		}
+		info := ext.Ext.ExtractRegionTownInfo(tmp)
+		ext.Ext.GetRegionTownCleanInfo(&info)
+		res, _ := json.Marshal(info)
+		w.Write(res)
+		datalock.Unlock()
+	})
 }

+ 8 - 8
extcity/src/util/init.go

@@ -58,14 +58,14 @@ func initMgo() {
 	QyxyMgo.InitPool()
 
 	//临时Mgo
-	TestMgo = &MongodbSim{
-		MongodbAddr: "127.0.0.1:12003",
-		DbName:      "editor",
-		Size:        10,
-		UserName:    "",
-		PassWord:    "",
-	}
-	TestMgo.InitPoolDirect()
+	//TestMgo = &MongodbSim{
+	//	MongodbAddr: "127.0.0.1:12005",
+	//	DbName:      "qfw",
+	//	Size:        10,
+	//	UserName:    "zhengkun",
+	//	PassWord:    "zk@123123",
+	//}
+	//TestMgo.InitPoolDirect()
 }
 
 func convertInterface(t interface{}) []string {

+ 1 - 0
src/config.json

@@ -11,6 +11,7 @@
     "qyxy_password": "",
     "mergetable": "projectset",
     "mergetablealias": "projectset_v1",
+    "isoss" : true,
     "ffield": true,
     "saveresult": false,
     "fieldsfind": false,

+ 29 - 24
src/jy/extract/extract.go

@@ -267,6 +267,7 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 	}
 	toptype := qu.ObjToString(doc["toptype"])
 	subtype := qu.ObjToString(doc["subtype"])
+	toptype_old, subtype_old := toptype, subtype
 	if qu.ObjToString(doc["type"]) == "bid" {
 		toptype = "结果"
 	}
@@ -300,12 +301,14 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 		}
 	}
 	j = &ju.Job{
-		SourceMid:      qu.BsonIdToSId(doc["_id"]),
-		Category:       toptype,
-		CategorySecond: subtype,
-		Content:        qu.ObjToString(doc["detail"]),
-		SpiderCode:     qu.ObjToString(doc["spidercode"]),
-		Site:           qu.ObjToString(doc["site"]),
+		SourceMid:          qu.BsonIdToSId(doc["_id"]),
+		Category:           toptype,
+		CategorySecond:     subtype,
+		Category_Old:       toptype_old,
+		CategorySecond_Old: subtype_old,
+		Content:            qu.ObjToString(doc["detail"]),
+		SpiderCode:         qu.ObjToString(doc["spidercode"]),
+		Site:               qu.ObjToString(doc["site"]),
 		//Domain:     qu.ObjToString(doc["domain"]),
 		//Href:       qu.ObjToString(doc["href"]),
 		Title:         qu.ObjToString(doc["title"]),
@@ -322,24 +325,26 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 	}
 	if isextFile {
 		jf = &ju.Job{
-			SourceMid:      qu.BsonIdToSId(doc["_id"]),
-			Category:       toptype,
-			CategorySecond: subtype,
-			Content:        qu.ObjToString(doc["detailfile"]),
-			SpiderCode:     qu.ObjToString(doc["spidercode"]),
-			Site:           qu.ObjToString(doc["site"]),
-			Title:          qu.ObjToString(doc["title"]),
-			Data:           &doc,
-			City:           qu.ObjToString(doc["city"]),
-			Province:       qu.ObjToString(doc["area"]),
-			Jsondata:       toMap,
-			Result:         map[string][]*ju.ExtField{},
-			BuyerAddr:      qu.ObjToString(doc["buyeraddr"]),
-			RuleBlock:      e.RuleBlock,
-			IsFile:         isextFile,
-			Dataging:       qu.IntAll(doc["dataging"]),
-			IsClearnMoney:  isClearnMoneystr,
-			IsUnRulesTab:   false,
+			SourceMid:          qu.BsonIdToSId(doc["_id"]),
+			Category:           toptype,
+			CategorySecond:     subtype,
+			Category_Old:       toptype_old,
+			CategorySecond_Old: subtype_old,
+			Content:            qu.ObjToString(doc["detailfile"]),
+			SpiderCode:         qu.ObjToString(doc["spidercode"]),
+			Site:               qu.ObjToString(doc["site"]),
+			Title:              qu.ObjToString(doc["title"]),
+			Data:               &doc,
+			City:               qu.ObjToString(doc["city"]),
+			Province:           qu.ObjToString(doc["area"]),
+			Jsondata:           toMap,
+			Result:             map[string][]*ju.ExtField{},
+			BuyerAddr:          qu.ObjToString(doc["buyeraddr"]),
+			RuleBlock:          e.RuleBlock,
+			IsFile:             isextFile,
+			Dataging:           qu.IntAll(doc["dataging"]),
+			IsClearnMoney:      isClearnMoneystr,
+			IsUnRulesTab:       false,
 		}
 	}
 	codeSite := j.SpiderCode

+ 274 - 0
src/jy/extract/extract_oss.go

@@ -0,0 +1,274 @@
+package extract
+
+import (
+	"bytes"
+	"encoding/json"
+	"errors"
+	"io"
+	"log"
+	"mime/multipart"
+	"net/http"
+	"net/rpc"
+	"strconv"
+)
+
+/*
+	http请求地址:172.17.162.27:18011
+	rpc请求地址:172.17.162.27:18012
+*/
+
+var Oss_Address = "http://172.17.162.27:18011"
+var Detail_BucketId = "detail"
+var Html_BucketId = "contenthtml"
+
+// RPC相关结构体
+type UploadArgs struct {
+	Stream     []byte // 客户端将文件数据传递过来
+	Gzip       bool   //是否压缩
+	BucketID   string //桶id
+	ObjectName string //对象名称
+}
+
+type Args struct {
+	BucketID   string //桶id
+	ObjectName string //对象名称
+}
+
+// 接口统一返回值
+type Result struct {
+	Error_code int         `json:"error_code"`
+	Error_msg  string      `json:"error_msg"`
+	Data       interface{} `json:"data"`
+}
+
+const (
+	UploadUrl           = "/ossservice/upload"
+	DownloadUrl         = "/ossservice/download"
+	DeleteUrl           = "/ossservice/delete"
+	GetBidDetailUrl     = "/ossservice/biddetail"
+	UploadSuccess       = "上传成功"
+	DownloadSuccess     = "下载成功"
+	DeleteSuccess       = "删除成功"
+	GetBidDetailSuccess = "获取正文成功"
+	UploadFail          = "上传失败:%v"
+	DownloadFail        = "下载失败:%v"
+	DeleteFail          = "删除失败:%v"
+	BidDetailFail       = "获取正文失败:%v"
+)
+
+/* restful方式上传
+ * @param domain 域名,例如:https://ossservice.jianyu360.cn
+ * @param bucketId 桶id
+ * @param objectName 对象名称
+ * @param stream 文件流
+ * @param gzip 是否压缩
+ * @return {"error_code":0,"error_msg":"上传成功"}
+ */
+func UpLoadByRestful(domain, bucketId, objectName string, stream []byte, gzip bool) (reply *Result) {
+	reply = &Result{Error_code: -1}
+
+	// 创建一个缓冲区来存储表单数据
+	body := &bytes.Buffer{}
+	writer := multipart.NewWriter(body)
+	writer.WriteField("bucket_id", bucketId)
+	writer.WriteField("object_name", objectName)
+	writer.WriteField("gzip", strconv.FormatBool(gzip))
+
+	// 创建表单字段
+	part, err := writer.CreateFormFile("file", objectName)
+	if err != nil {
+		reply.Error_msg = err.Error()
+		return
+	}
+
+	// 模拟文件流
+	fileStream := bytes.NewReader(stream)
+
+	// 将文件流复制到表单字段
+	_, err = io.Copy(part, fileStream)
+	if err != nil {
+		reply.Error_msg = err.Error()
+		return
+	}
+
+	// 创建 HTTP 请求
+	if respBody, err := post(domain+UploadUrl, writer, body); err != nil {
+		reply.Error_msg = err.Error()
+	} else {
+		json.Unmarshal(respBody, &reply)
+	}
+	return
+}
+
+/* restful方式下载
+ * @param domain 域名,例如:https://ossservice.jianyu360.cn
+ * @param bucketId 桶id
+ * @param objectName 对象名称
+ * @return {"error_code":0,"error_msg":"下载成功"}
+ */
+func DownloadByRestful(domain, bucketId, objectName string) (reply *Result) {
+	reply = &Result{}
+	// 创建一个缓冲区来存储表单数据
+	body := &bytes.Buffer{}
+	writer := multipart.NewWriter(body)
+	writer.WriteField("bucket_id", bucketId)
+	writer.WriteField("object_name", objectName)
+	if respBody, err := post(domain+DownloadUrl, writer, body); err != nil {
+		reply.Error_msg = err.Error()
+	} else {
+		reply.Error_msg = DownloadSuccess
+		reply.Data = respBody
+	}
+	return
+}
+
+/* restful方式删除
+ * @param domain 域名,例如:https://ossservice.jianyu360.cn
+ * @param bucketId 桶id
+ * @param objectName 对象名称
+ * @return {"error_code":0,"error_msg":"上传成功"}
+ */
+func DeleteByRestful(domain, bucketId, objectName string) (reply *Result) {
+	reply = &Result{}
+	// 创建一个缓冲区来存储表单数据
+	body := &bytes.Buffer{}
+	writer := multipart.NewWriter(body)
+	writer.WriteField("bucket_id", bucketId)
+	writer.WriteField("object_name", objectName)
+	if respBody, err := post(domain+DeleteUrl, writer, body); err != nil {
+		reply.Error_msg = err.Error()
+	} else {
+		json.Unmarshal(respBody, &reply)
+	}
+	return
+}
+
+/* restful方式获取标讯正文
+ * @param domain 域名,例如:https://ossservice.jianyu360.cn
+ * @param bucketId 桶id
+ * @param objectName 对象名称
+ * @return {"error_code":0,"error_msg":"获取正文成功","data":"正文内容"}
+ */
+func GetBidDetailByRestful(domain, bucketId, objectName string) (reply *Result) {
+	reply = &Result{}
+	// 创建一个缓冲区来存储表单数据
+	body := &bytes.Buffer{}
+	writer := multipart.NewWriter(body)
+	writer.WriteField("bucket_id", bucketId)
+	writer.WriteField("object_name", objectName)
+	if respBody, err := post(domain+GetBidDetailUrl, writer, body); err != nil {
+		reply.Error_msg = err.Error()
+	} else {
+		reply.Error_msg = GetBidDetailSuccess
+		reply.Data = string(respBody)
+	}
+	return
+}
+
+func post(url string, writer *multipart.Writer, body *bytes.Buffer) ([]byte, error) {
+	// 关闭表单写入器
+	if err := writer.Close(); err != nil {
+		return nil, err
+	}
+	// 创建 HTTP 请求
+	req, err := http.NewRequest("POST", url, body)
+	if err != nil {
+		log.Println("Error creating request:", err)
+		return nil, err
+	}
+
+	// 设置请求头
+	req.Header.Set("Content-Type", writer.FormDataContentType())
+
+	// 发送请求
+	client := &http.Client{}
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	// 读取响应
+	respBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, err
+	}
+	if resp.StatusCode != http.StatusOK {
+		return nil, errors.New(string(respBody))
+	}
+	return respBody, nil
+}
+
+/* rpc方式上传
+ * @param address 域名,例如:192.168.3.206:8110
+ * @param args 参数
+ * @param args.BucketID 文件名
+ * @param args.objectName 对象名称
+ * @param args.Stream 文件流
+ * @param args.Gzip 是否压缩
+ * @return {"error_code":0,"error_msg":"上传成功"}
+ * @return error 错误信息
+ */
+func UpLoadByRpc(address string, args *UploadArgs) (Result, error) {
+	var reply Result
+	err := rpcCall(address, "OSSService.Upload", args, &reply)
+	return reply, err
+}
+
+/*
+ *rpc方式下载
+ * @param address 域名,例如:192.168.3.206:8110
+ * @param args 参数
+ * @param args.BucketID 文件名
+ * @param args.objectName 对象名称
+ * @return {"error_code":0,"error_msg":"下载成功","data":"文件流"}
+ * @return error 错误信息
+ */
+func DownloadByRpc(address string, args *Args) (Result, error) {
+	var reply Result
+	err := rpcCall(address, "OSSService.Download", args, &reply)
+	return reply, err
+}
+
+/* rpc方式删除
+ * @param address 域名,例如:192.168.3.206:8110
+ * @param args 参数
+ * @param args.BucketID 文件名
+ * @param args.objectName 对象名称
+ * @return {"error_code":0,"error_msg":"删除成功"}
+ * @return error 错误信息
+ */
+func DeleteByRpc(address string, args *Args) (Result, error) {
+	var reply Result
+	err := rpcCall(address, "OSSService.Delete", args, &reply)
+	return reply, err
+}
+
+/*
+ *rpc方式获取标讯正文
+ * @param address 域名,例如:192.168.3.206:8110
+ * @param args 参数
+ * @param args.BucketID 文件名
+ * @param args.objectName 对象名称
+ * @return {"error_code":0,"error_msg":"下载成功","data":"正文内容"}
+ * @return error 错误信息
+ */
+func GetBidDetailByRpc(address string, args *Args) (Result, error) {
+	var reply Result
+	err := rpcCall(address, "OSSService.GetBidDetail", args, &reply)
+	return reply, err
+}
+func rpcCall(address, serviceMethod string, args any, reply any) error {
+	client, err := rpc.DialHTTP("tcp", address)
+	if err != nil {
+		log.Println(err)
+		return err
+	}
+	defer client.Close()
+	err = client.Call(serviceMethod, args, reply)
+	if err != nil {
+		log.Println(err)
+		return err
+	}
+	return nil
+}

+ 1 - 1
src/jy/extract/extractcheck.go

@@ -346,6 +346,6 @@ func checkFields(tmp map[string]interface{}, j_data map[string]interface{}) map[
 	if expiredate > 0 && signaturedate > 0 && expiredate < signaturedate {
 		delete(tmp, "expiredate")
 	}
-
 	return tmp
+
 }

+ 18 - 18
src/jy/extract/extractcity_new.go

@@ -22,24 +22,24 @@ func (e *ExtractTask) ExtractRegionClean(tmp *map[string]interface{}) {
 	}
 
 	//对于补充的地域信息进行标准化校验...
-	s_area := qu.ObjToString((*tmp)["s_area"])
-	s_city := qu.ObjToString((*tmp)["s_city"])
-	s_district := qu.ObjToString((*tmp)["s_district"])
-	if s_area != "" && s_area != "全国" {
-		s_rdata := e.StandardCheckCity(s_area, s_city, s_district)
-		delete((*tmp), "s_area")
-		delete((*tmp), "s_city")
-		delete((*tmp), "s_district")
-		if s_rdata["s_area"] != "" && s_rdata["s_area"] != "全国" {
-			(*tmp)["s_area"] = s_rdata["s_area"]
-		}
-		if s_rdata["s_city"] != "" {
-			(*tmp)["s_city"] = s_rdata["s_city"]
-		}
-		if s_rdata["s_district"] != "" {
-			(*tmp)["s_district"] = s_rdata["s_district"]
-		}
-	}
+	//s_area := qu.ObjToString((*tmp)["s_area"])
+	//s_city := qu.ObjToString((*tmp)["s_city"])
+	//s_district := qu.ObjToString((*tmp)["s_district"])
+	//if s_area != "" && s_area != "全国" {
+	//	s_rdata := e.StandardCheckCity(s_area, s_city, s_district)
+	//	delete((*tmp), "s_area")
+	//	delete((*tmp), "s_city")
+	//	delete((*tmp), "s_district")
+	//	if s_rdata["s_area"] != "" && s_rdata["s_area"] != "全国" {
+	//		(*tmp)["s_area"] = s_rdata["s_area"]
+	//	}
+	//	if s_rdata["s_city"] != "" {
+	//		(*tmp)["s_city"] = s_rdata["s_city"]
+	//	}
+	//	if s_rdata["s_district"] != "" {
+	//		(*tmp)["s_district"] = s_rdata["s_district"]
+	//	}
+	//}
 }
 
 // 抽取地域信息

+ 5 - 1
src/jy/extract/extractflow.go

@@ -49,9 +49,10 @@ func InitExtractFlowTask() {
 		ExtFlow.IsRun = true
 		ExtFlow.BidTotal = 0
 	}
+	log.Debug("流式任务加载完毕···")
 }
 
-func ExtractByExtFlow(v map[string]interface{}) map[string]interface{} {
+func ExtractUpdateRule() {
 	defer qu.Catch()
 	if ju.IsUpdateRule {
 		ju.IsUpdateRule = false
@@ -78,6 +79,9 @@ func ExtractByExtFlow(v map[string]interface{}) map[string]interface{} {
 		}
 		ExtFlow.Unlock()
 	}
+}
+
+func ExtractByExtFlow(v map[string]interface{}) map[string]interface{} {
 	if spidercode[qu.ObjToString(v["spidercode"])] { //临时开标记录
 		log.Debug(qu.BsonIdToSId(v["_id"]), "//开标记录")
 		return v

+ 5 - 11
src/jy/extract/extractsave.go

@@ -411,10 +411,6 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		if e.IsExtractCity {
 			e.ExtractRegionInfo(j, jf, &tmp, true)
 			e.ExtractRegionClean(&tmp) //正常标准清洗
-			if qu.ObjToString(tmp["area"]) == "" || qu.ObjToString(tmp["全国"]) == "" {
-				//需要调试...
-				e.ExtractRegionOtherInfo(j, &tmp)
-			}
 		}
 		//品牌抽取
 		if ju.IsBrandGoods {
@@ -1131,7 +1127,7 @@ func AnalysisSaveFlowResult(j, jf *ju.Job, e *ExtractTask) map[string]interface{
 		if e.IsExtractCity {
 			e.ExtractRegionInfo(j, jf, &tmp, true)
 			e.ExtractRegionClean(&tmp) //正常标准清洗
-			if qu.ObjToString(tmp["area"]) == "" || qu.ObjToString(tmp["全国"]) == "" {
+			if qu.ObjToString(tmp["area"]) == "" || qu.ObjToString(tmp["area"]) == "全国" {
 				//需要调试...
 				e.ExtractRegionOtherInfo(j, &tmp)
 			}
@@ -1189,13 +1185,11 @@ func AnalysisSaveFlowResult(j, jf *ju.Job, e *ExtractTask) map[string]interface{
 				tmp["ffield"] = ffield
 			}
 		}
-		//临时保存指定字段数据
-		//new_tmp := map[string]interface{}{}
-		//new_tmp["area"] = qu.ObjToString(tmp["area"])
-		//new_tmp["city"] = qu.ObjToString(tmp["city"])
-		//new_tmp["district"] = qu.ObjToString(tmp["district"])
-		//tmp = new_tmp
 
+		//流式处理···直接返回···
+		if ju.IsFlow {
+			return
+		}
 		if e.TaskInfo.TestColl == "" {
 			if len(tmp) > 0 { //保存抽取结果
 				delete(tmp, "_id")

+ 12 - 0
src/jy/extract/extractudp.go

@@ -296,6 +296,18 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 				}
 
 				_id := qu.BsonIdToSId(v["_id"])
+				if ju.IsOss { //根据id获取正文以及源码···
+					if rep := GetBidDetailByRestful(Oss_Address, Detail_BucketId, _id); rep.Error_msg == GetBidDetailSuccess {
+						if detail := qu.ObjToString(rep.Data); detail != "" {
+							v["detail"] = detail
+						}
+					}
+					if rep := GetBidDetailByRestful(Oss_Address, Html_BucketId, _id); rep.Error_msg == GetBidDetailSuccess {
+						if contenthtml := qu.ObjToString(rep.Data); contenthtml != "" {
+							v["contenthtml"] = contenthtml
+						}
+					}
+				}
 				//......
 				var j, jf *ju.Job
 				var isSite bool

+ 11 - 6
src/jy/extract/extraxtmethod.go

@@ -111,11 +111,11 @@ var unPackageWinnerReg = regexp.MustCompile("(重新招标)")
 var letter_entity = regexp.MustCompile("^[\u4E00-\u9FA5]{1,10}[A-Za-z]{1,5}[\u4E00-\u9FA5]{1,10}(公司|集团|单位|委员会|办公室|车务段|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会体]|政府)$")
 
 // 落款单位抽取
-var inscribe_entity_1 = regexp.MustCompile("\n([\\s]+)?([\u4E00-\u9FA5].{2,25}(公司|集团|单位|委员会|办公室|车务段|机构|企业|厂|场|院|所|店|中心|市|校|学|局|站|城|处|行|部|队|联合[会体]|政府|段))[\\s  ]*[\n]+([\\s ]+|发布时间[::\\s ]+)?([0-9]+[\\s ]*年[0-9]+月[0-9]+日|[0-9]+[-][0-9]+[-][0-9]+)")
-var inscribe_entity_2 = regexp.MustCompile("[\n。]([\\s]+)?([\u4E00-\u9FA5].{2,25}(公司|集团|单位|委员会|办公室|车务段|机构|企业|厂|场|院|所|店|中心|市|校|学|局|站|城|处|行|部|队|联合[会体]|政府|段))[\\s  ]*([\\s ]+|发布时间[::\\s ]+)?([0-9]+[\\s ]*年[0-9]+月[0-9]+日|[0-9]+[-][0-9]+[-][0-9]+)\n([\u4E00-\u9FA5].{4,20}(公司|集团|单位|委员会|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体]))")
+var inscribe_entity_1 = regexp.MustCompile("\n([\\s]+)?([\u4E00-\u9FA5].{2,25}(公司|集团|单位|委员会|办公室|车务段|机构|企业|厂|场|院|所|店|中心|市|校|学|局|站|城|处|行|部|队|厅|联合[会体]|政府|段))[\\s  ]*[\n]+([\\s ]+|发布时间[::\\s ]+)?([0-9]+[\\s ]*年[0-9]+月[0-9]+日|[0-9]+[-][0-9]+[-][0-9]+)")
+var inscribe_entity_2 = regexp.MustCompile("[\n。]([\\s]+)?([\u4E00-\u9FA5].{2,25}(公司|集团|单位|委员会|办公室|车务段|机构|企业|厂|场|院|所|店|中心|市|校|学|局|站|城|处|行|部|队|厅|联合[会体]|政府|段))[\\s  ]*([\\s ]+|发布时间[::\\s ]+)?([0-9]+[\\s ]*年[0-9]+月[0-9]+日|[0-9]+[-][0-9]+[-][0-9]+)\n([\u4E00-\u9FA5].{4,20}(公司|集团|单位|委员会|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体]))")
 
 // 特殊实体
-var inscribe_entity_3 = regexp.MustCompile("(招标组织部门|招标机构)[::]([\u4E00-\u9FA5].{2,25}(公司|集团|单位|委员会|办公室|车务段|机构|企业|厂|场|院|所|店|中心|市|校|学|局|站|城|处|行|部|队|联合[会体]|政府))")
+var inscribe_entity_3 = regexp.MustCompile("(招标组织部门|招标机构)[::]([\u4E00-\u9FA5].{2,25}(公司|集团|单位|委员会|办公室|车务段|机构|企业|厂|场|院|所|店|中心|市|校|学|局|站|城|处|行|部|队|联合[会体]|政府|厅))")
 
 // 有效企业
 var effectivefirm = regexp.MustCompile("^[\u4E00-\u9FA5]{4,15}(公司|集团|委员会|办公室|车务段|机构|企业|设计|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会体]|政府)$")
@@ -185,8 +185,7 @@ func file2text(doc *map[string]interface{}) {
 							continue
 						}
 						/*
-							{0: '承诺书', 1: '委托书', 2: '文件格式', 3: '中标公告', 4: '招标公告', 5: '招标文件', 6: '更正公告', 7: '合同', 8: '评审',
-							 9: '工程量清单', 10: '验收', 11: '报价', 12: '中小企业声明', 13: '采购内容', 14: '审批', 15: '其他', 16: '其他公告'}
+							{0: '承诺书', 1: '委托书', 2: '文件格式', 3: '中标公告', 4: '招标公告', 5: '招标文件', 6: '更正公告', 7: '合同', 8: '评审',9: '工程量清单', 10: '验收', 11: '报价', 12: '中小企业声明', 13: '采购内容', 14: '审批', 15: '其他', 16: '其他公告'}
 						*/
 						if cls := qu.ObjToString(ff["cls"]); cls != "" {
 							if cls == "文件格式" || cls == "工程量清单" || cls == "中小企业声明" {
@@ -441,6 +440,12 @@ func EmployEntDfaText(text string, winner string, agency string) string {
 		for _, v := range res {
 			if cl := utf8.RuneCountInString(v); cl > l && cl > 3 && !exclude_entity.MatchString(v) && entdfa_entity.MatchString(v) {
 				if !(v == winner || v == agency) {
+					if winner != "" && strings.Contains(winner, v) {
+						continue
+					}
+					if agency != "" && strings.Contains(agency, v) {
+						continue
+					}
 					l = cl
 					new_str = v
 				}
@@ -520,7 +525,7 @@ func EmployPostEntDfa(data map[string]interface{}) map[string]interface{} {
 	client := &http.Client{Timeout: 2 * time.Second}
 	jsonStr, _ := json.Marshal(data)
 	//172.17.4.238:9996,extcity.spdata.jianyu360.com
-	resp, err := client.Post("http://172.17.4.238:9996/service/entity/", "application/json", bytes.NewBuffer(jsonStr))
+	resp, err := client.Post("http://extcity.spdata.jianyu360.com/service/entity/", "application/json", bytes.NewBuffer(jsonStr))
 	if err != nil {
 		return info
 	}

+ 99 - 42
src/jy/mongodbutil/mgo.go

@@ -3,7 +3,6 @@ package mongodbutil
 import (
 	"context"
 	"log"
-	"runtime"
 	"time"
 
 	"go.mongodb.org/mongo-driver/bson"
@@ -143,17 +142,48 @@ func (m *MongodbSim) DestoryMongoConn(ms *MgoSess) {
 	ms = nil
 }
 
+func (m *MongodbSim) InitPoolDirect() {
+	opts := options.Client()
+	opts.SetConnectTimeout(3 * time.Second)
+	opts.ApplyURI("mongodb://" + m.MongodbAddr)
+	opts.SetMaxPoolSize(uint64(m.Size))
+	opts.SetDirect(true)
+	m.pool = make(chan bool, m.Size)
+
+	if m.UserName != "" && m.Password != "" {
+		cre := options.Credential{
+			Username:   m.UserName,
+			Password:   m.Password,
+			AuthSource: "admin",
+		}
+		opts.SetAuth(cre)
+	}
+
+	opts.SetMaxConnIdleTime(2 * time.Hour)
+	m.Ctx, _ = context.WithTimeout(context.Background(), 99999*time.Hour)
+	m.ShortCtx, _ = context.WithTimeout(context.Background(), 1*time.Minute)
+	client, err := mongo.Connect(m.ShortCtx, opts)
+	if err != nil {
+		log.Println("mgo init error:", err.Error())
+	} else {
+		m.C = client
+		log.Println("init success")
+	}
+}
+
 func (m *MongodbSim) InitPool() {
 	opts := options.Client()
 	opts.SetConnectTimeout(3 * time.Second)
 	opts.ApplyURI("mongodb://" + m.MongodbAddr)
 	opts.SetMaxPoolSize(uint64(m.Size))
+	//opts.SetDirect(true)
 	m.pool = make(chan bool, m.Size)
 
 	if m.UserName != "" && m.Password != "" {
 		cre := options.Credential{
-			Username: m.UserName,
-			Password: m.Password,
+			Username:   m.UserName,
+			Password:   m.Password,
+			AuthSource: "admin",
 		}
 		opts.SetAuth(cre)
 	}
@@ -177,7 +207,29 @@ func (m *MongodbSim) Close() {
 	<-m.pool
 }
 
-//批量插入
+// 新建表并生成索引
+func (m *MongodbSim) CreateIndex(c string, models []mongo.IndexModel) bool {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	names, err := coll.Indexes().CreateMany(m.Ctx, models)
+	if err == nil && len(names) > 0 {
+		return true
+	} else {
+		log.Println("CreateIndex Error:", err)
+		return false
+	}
+}
+
+// 查询数量
+func (m *MongodbSim) Count(coll string, query map[string]interface{}) (int64, error) {
+	m.Open()
+	defer m.Close()
+	c, err := m.C.Database(m.DbName).Collection(coll).CountDocuments(m.Ctx, query)
+	return c, err
+}
+
+// 批量插入
 func (m *MongodbSim) UpSertBulk(c string, doc ...[]map[string]interface{}) (map[int64]interface{}, bool) {
 	m.Open()
 	defer m.Close()
@@ -204,7 +256,7 @@ func (m *MongodbSim) UpSertBulk(c string, doc ...[]map[string]interface{}) (map[
 	return r.UpsertedIDs, true
 }
 
-//批量插入
+// 批量插入
 func (m *MongodbSim) SaveBulk(c string, doc ...map[string]interface{}) bool {
 	m.Open()
 	defer m.Close()
@@ -223,7 +275,7 @@ func (m *MongodbSim) SaveBulk(c string, doc ...map[string]interface{}) bool {
 	return true
 }
 
-//保存
+// 保存
 func (m *MongodbSim) Save(c string, doc map[string]interface{}) interface{} {
 	m.Open()
 	defer m.Close()
@@ -235,43 +287,41 @@ func (m *MongodbSim) Save(c string, doc map[string]interface{}) interface{} {
 	return r.InsertedID
 }
 
-//按条件更新
-func (m *MongodbSim) Update(c string, q, u interface{}, upsert bool, multi bool) bool {
-	defer catch()
+// 更新by Id
+func (m *MongodbSim) UpdateById(c, id string, doc map[string]interface{}) bool {
 	m.Open()
 	defer m.Close()
-	ct := options.Update()
-	if upsert {
-		ct.SetUpsert(true)
-	}
 	coll := m.C.Database(m.DbName).Collection(c)
-	var err error
-	if multi {
-		_, err = coll.UpdateMany(m.Ctx, ObjToM(q), ObjToM(u), ct)
-	} else {
-		_, err = coll.UpdateOne(m.Ctx, ObjToM(q), ObjToM(u), ct)
+	_, err := coll.UpdateOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)}, doc)
+	if err != nil {
+		return false
 	}
+	return true
+}
+
+func (m *MongodbSim) UpdateStrId(c, id string, doc map[string]interface{}) bool {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	_, err := coll.UpdateOne(m.Ctx, map[string]interface{}{"_id": id}, doc)
 	if err != nil {
-		log.Println("删除错误", err.Error())
 		return false
 	}
 	return true
 }
 
-//更新by Id
-func (m *MongodbSim) UpdateById(c, id string, doc map[string]interface{}) bool {
+func (m *MongodbSim) UpdateQueryData(c string, query map[string]interface{}, doc map[string]interface{}) bool {
 	m.Open()
 	defer m.Close()
 	coll := m.C.Database(m.DbName).Collection(c)
-	_, err := coll.UpdateOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)}, doc)
+	_, err := coll.UpdateOne(m.Ctx, query, doc)
 	if err != nil {
-		log.Println(err)
 		return false
 	}
 	return true
 }
 
-//删除by id
+// 删除by id
 func (m *MongodbSim) DeleteById(c, id string) int64 {
 	m.Open()
 	defer m.Close()
@@ -283,7 +333,7 @@ func (m *MongodbSim) DeleteById(c, id string) int64 {
 	return r.DeletedCount
 }
 
-//通过条件删除
+// 通过条件删除
 func (m *MongodbSim) Delete(c string, query map[string]interface{}) int64 {
 	m.Open()
 	defer m.Close()
@@ -295,7 +345,7 @@ func (m *MongodbSim) Delete(c string, query map[string]interface{}) int64 {
 	return r.DeletedCount
 }
 
-//findbyid
+// findbyid
 func (m *MongodbSim) FindById(c, id string) map[string]interface{} {
 	m.Open()
 	defer m.Close()
@@ -306,7 +356,7 @@ func (m *MongodbSim) FindById(c, id string) map[string]interface{} {
 	return v
 }
 
-//findone
+// findone
 func (m *MongodbSim) FindOne(c string, query map[string]interface{}) map[string]interface{} {
 	m.Open()
 	defer m.Close()
@@ -317,7 +367,7 @@ func (m *MongodbSim) FindOne(c string, query map[string]interface{}) map[string]
 	return v
 }
 
-//find
+// find
 func (m *MongodbSim) Find(c string, query map[string]interface{}, sort, fields interface{}) ([]map[string]interface{}, error) {
 	m.Open()
 	defer m.Close()
@@ -328,6 +378,27 @@ func (m *MongodbSim) Find(c string, query map[string]interface{}, sort, fields i
 		log.Fatal(err)
 		return nil, err
 	}
+
+	var results []map[string]interface{}
+	if err = r.All(m.Ctx, &results); err != nil {
+		log.Fatal(err)
+		return nil, err
+	}
+	return results, nil
+}
+
+// find
+func (m *MongodbSim) FindLimit(c string, query map[string]interface{}, sort, fields interface{}, limit int64) ([]map[string]interface{}, error) {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	op := options.Find()
+	r, err := coll.Find(m.Ctx, query, op.SetSort(sort), op.SetProjection(fields), op.SetLimit(limit))
+	if err != nil {
+		log.Fatal(err)
+		return nil, err
+	}
+
 	var results []map[string]interface{}
 	if err = r.All(m.Ctx, &results); err != nil {
 		log.Fatal(err)
@@ -344,17 +415,3 @@ func StringTOBsonId(id string) primitive.ObjectID {
 func BsonTOStringId(id interface{}) string {
 	return id.(primitive.ObjectID).Hex()
 }
-
-//出错拦截
-func catch() {
-	if r := recover(); r != nil {
-		log.Println(r)
-		for skip := 0; ; skip++ {
-			_, file, line, ok := runtime.Caller(skip)
-			if !ok {
-				break
-			}
-			go log.Printf("%v,%v\n", file, line)
-		}
-	}
-}

+ 2 - 2
src/jy/pretreated/analycore.go

@@ -655,7 +655,7 @@ func (table *Table) MergerToTableresult() {
 返回:汇总表格对象
 *
 */
-func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}, ruleBlock *u.RuleBlock, isSite bool, codeSite string) (tabres *TableResult) {
+func AnalyTableV2(tabs *goquery.Selection, toptype, toptype_old, blockTag, con string, itype int, _id interface{}, ruleBlock *u.RuleBlock, isSite bool, codeSite string) (tabres *TableResult) {
 	defer qutil.Catch()
 	//u.Debug(con)
 	if itype == 1 {
@@ -663,7 +663,7 @@ func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype
 		con = RepairCon(con)
 	}
 	//生成tableresult对象
-	tabres = NewTableResult(_id, toptype, blockTag, con, itype, ruleBlock)
+	tabres = NewTableResult(_id, toptype, toptype_old, blockTag, con, itype, ruleBlock)
 	if fblbReg.MatchString(blockTag) && utf8.RuneCountInString(blockTag) < 200 {
 		//log.Debug(fblbReg.FindString(blockTag))
 		return

+ 26 - 1
src/jy/pretreated/analymethod.go

@@ -281,6 +281,27 @@ func dealWithMultiSuppliersText(con string) (bool, string) {
 	return false, ""
 }
 
+// 结果类-供应商信息
+func dealWithSpecResultInfoText(con string) (bool, string) {
+	startIndex := ResultStartReg.FindAllStringIndex(con, 1)
+	middleIndex := ResultMiddleReg.FindAllStringIndex(con, 1)
+	endIndex := ResultEndReg.FindAllStringIndex(con, 1)
+	if len(startIndex) == 1 && len(middleIndex) == 1 && len(endIndex) == 1 {
+		if len(startIndex[0]) > 1 && len(middleIndex[0]) > 1 && len(endIndex[0]) > 1 {
+			s1, e1 := startIndex[0][1], middleIndex[0][0]
+			s2, e2 := middleIndex[0][1], endIndex[0][0]
+			if e1 > s1 && e2 > s2 {
+				if t1, t2 := con[s1:e1], con[s2:e2]; t1 != "" && t2 != "" {
+					con = strings.ReplaceAll(con, t1, "\n")
+					con = strings.ReplaceAll(con, t2, "\n")
+					return true, con
+				}
+			}
+		}
+	}
+	return false, ""
+}
+
 // 特殊-重构
 func supplyInfoMethod(arr [][]string, w_index int, b_index int) string {
 	new_text := ""
@@ -329,6 +350,10 @@ func AnalyStart(job *u.Job, isSite bool, codeSite string) {
 	if m_b, m_c := dealWithMultiSuppliersText(con); m_b {
 		con = m_c
 	}
+	//结果类调整-特殊结构~重构
+	if m_b, m_c := dealWithSpecResultInfoText(con); m_b {
+		con = m_c
+	}
 
 	//工程业绩描述影响抽取
 	con = formattext20.ReplaceAllString(con, "\n")
@@ -427,7 +452,7 @@ func AnalyStart(job *u.Job, isSite bool, codeSite string) {
 				//添加标识:文本中有table
 				//blockTag - 块标签
 				//处理表格
-				tabres := AnalyTableV2(tabs[i], job.Category, blockTag, con, 1, job.SourceMid, job.RuleBlock, isSite, codeSite) //解析表格入口 返回:汇总表格对象
+				tabres := AnalyTableV2(tabs[i], job.Category, job.Category_Old, blockTag, con, 1, job.SourceMid, job.RuleBlock, isSite, codeSite) //解析表格入口 返回:汇总表格对象
 				job.IsUnRulesTab = tabres.isUnRulesTab
 				processTableResult(tabres, bl, job, isSite, codeSite)
 			}

+ 5 - 1
src/jy/pretreated/analystep.go

@@ -70,6 +70,10 @@ var MultiEndReg = regexp.MustCompile("([三四五][、]主要标的信息)")
 var SupplyInfoReg1 = regexp.MustCompile("([1-9])[::]供应商名称[\\s  ]+(.{4,25}(公司|研究院|研究所)).*中标[((]成交[))]金额[\\s  ]+([0-9\\.万元]+)[;;]")
 var SupplyInfoReg2 = regexp.MustCompile("()供应商名称[::](.{4,25}(公司|研究院|研究所))\n供应商地址.*\n中标[((]成交[))]金额[::]([0-9\\.]+[((]?[万元]+[))]?)")
 
+var ResultStartReg = regexp.MustCompile("([二三四][、]投标供应商名称及报价[::])")
+var ResultMiddleReg = regexp.MustCompile("([三四五][、]候选中标([((]成交[))])?供应商名单(.{2,10})?[::])")
+var ResultEndReg = regexp.MustCompile("([四五六][、]中标[((]成交[))]信息)")
+
 func processTableInBlock(bl *util.Block, job *util.Job, isSite bool, codeSite string) bool {
 	//块中再查找表格(块,处理完把值赋到块)
 	//bl.Text = formatText(bl.Text, "biangeng")
@@ -84,7 +88,7 @@ func processTableInBlock(bl *util.Block, job *util.Job, isSite bool, codeSite st
 			tmptag = strings.TrimSpace(tab.Nodes[0].PrevSibling.Data)
 		}
 		//添加标识:文本中有table
-		tabres := AnalyTableV2(tab, job.Category, tmptag, tab.Text(), 2, job.SourceMid, job.RuleBlock, isSite, codeSite) //解析表格入口 返回:汇总表格对象
+		tabres := AnalyTableV2(tab, job.Category, job.Category_Old, tmptag, tab.Text(), 2, job.SourceMid, job.RuleBlock, isSite, codeSite) //解析表格入口 返回:汇总表格对象
 		//if packageFlag {
 		//	tabres.PackageMap = nil
 		//	tabres.IsMultiPackage = false

+ 4 - 0
src/jy/pretreated/analytable.go

@@ -228,6 +228,10 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 					key = tmp_near.Val
 				}
 			}
+		} else if key == "联系人" || key == "联系电话" {
+			if table.Toptype_Old == "采购意向" {
+				key = "采购" + key
+			}
 		}
 		if near.Val == "" {
 			key = fmtkey("k", near.TR.RowPos, near.ColPos)

+ 11 - 5
src/jy/pretreated/tablev2.go

@@ -20,8 +20,9 @@ import (
 type TableResult struct {
 	Id              interface{} //信息id
 	Toptype         string      //信息类型
-	Itype           int         //1全文 2是块
-	BlockTag        string      //块标签
+	Toptype_Old     string
+	Itype           int    //1全文 2是块
+	BlockTag        string //块标签
 	Html            string
 	Tabs            []*Table            //子表集合,子表中包含标准化kv或原始kv
 	GoqueryTabs     *goquery.Selection  //goquery对象
@@ -40,10 +41,11 @@ type TableResult struct {
 }
 
 // 快速创建TableResult对象
-func NewTableResult(Id interface{}, Toptype, BlockTag, con string, Itype int, ruleBlock *u.RuleBlock) *TableResult {
+func NewTableResult(Id interface{}, Toptype, Toptype_Old, BlockTag, con string, Itype int, ruleBlock *u.RuleBlock) *TableResult {
 	return &TableResult{
 		Id:           Id,
 		Toptype:      Toptype,
+		Toptype_Old:  Toptype_Old,
 		Html:         con,
 		Itype:        Itype,
 		BlockTag:     BlockTag,
@@ -217,14 +219,14 @@ func (td *TD) tdHasTable(bsontable *bool, tr *TR, isSite bool, codeSite, tag str
 			if IsHide(tv) {
 				continue
 			}
-			sonts := NewTableResult(ts.Id, ts.Toptype, stag, td.Html, 2, td.TR.Table.TableResult.RuleBlock)
+			sonts := NewTableResult(ts.Id, ts.Toptype, ts.Toptype_Old, stag, td.Html, 2, td.TR.Table.TableResult.RuleBlock)
 			sonts.GoqueryTabs = tv
 			sonts.Analy(isSite, codeSite)
 
 			//sonts := AnalyTableV2(tabs, ts.Toptype, stag, td.Html, 2, ts.Id, table.TableResult.RuleBlock) //又一次调用解析表格入口
 			td.BH = false
 			if td.TR.Table.TableResult == nil {
-				td.TR.Table.TableResult = NewTableResult(sonts.Id, sonts.Toptype, sonts.BlockTag, sonts.Html, sonts.Itype, sonts.RuleBlock)
+				td.TR.Table.TableResult = NewTableResult(sonts.Id, sonts.Toptype, sonts.Toptype_Old, sonts.BlockTag, sonts.Html, sonts.Itype, sonts.RuleBlock)
 			}
 			MergeKvTags(td.TR.Table.TableResult.KvTags, sonts.KvTags)
 			td.SonTableResult = sonts
@@ -590,6 +592,8 @@ type Table struct {
 	HasBrand               int                        //有品牌
 	HasGoods               int                        //有商品
 	PriceNumberData        [][]map[string]interface{} //单价和个数抽取结果
+	Toptype                string
+	Toptype_Old            string
 }
 
 func NewTable(Html string, TableResult *TableResult, tab *goquery.Selection) *Table {
@@ -606,6 +610,8 @@ func NewTable(Html string, TableResult *TableResult, tab *goquery.Selection) *Ta
 		StartAndEndRation:      map[string]*TDRationScope{},
 		StartAndEndRationKSort: NewSortMap(),
 		BlockPackage:           NewSortMap(),
+		Toptype:                TableResult.Toptype,
+		Toptype_Old:            TableResult.Toptype_Old,
 	}
 }
 

+ 40 - 38
src/jy/util/article.go

@@ -5,44 +5,46 @@ import (
 )
 
 type Job struct {
-	SourceMid         string                            //数据源的MongoId
-	Category          string                            //类别
-	CategorySecond    string                            //二级分类
-	Content           string                            //正文
-	ContentClean      string                            //清洗后的正文
-	Title             string                            //标题
-	SpiderCode        string                            //爬虫代码
-	Site              string                            //站点
-	Domain            string                            //网站域名
-	Href              string                            //原文链接
-	City              string                            //城市
-	Province          string                            //省份
-	Jsondata          *map[string]interface{}           //
-	Data              *map[string]interface{}           //数据库源数据
-	Block             []*Block                          //分块
-	Result            map[string][]*ExtField            //结果
-	BuyerAddr         string                            //采购单位地址
-	BlockPackage      map[string]*BlockPackage          //块中的分包
-	Winnerorder       []map[string]interface{}          //中标候选人排序
-	PackageInfo       map[string]map[string]interface{} //分包信息
-	RuleBlock         *RuleBlock                        //分块规则
-	BlockClassify     *BlockClassify                    //块分类
-	BrandData         [][]map[string]string             //品牌抽取
-	PriceNumberData   [][]map[string]interface{}        //单价和个数抽取
-	HasTable          int                               //有table
-	HasKey            int                               //是否匹配到table中的标题
-	HasBrand          int                               //有品牌
-	HasGoods          int                               //有商品
-	IsFile            bool                              //有附件
-	FullAreaScore     map[string]float64                //全称province得分
-	FullCityScore     map[string]float64                //全称city得分
-	FullDistrictScore map[string]float64                //全称district得分
-	SimAreaScore      map[string]float64                //简称province得分
-	SimCityScore      map[string]float64                //简称city得分
-	SimDistrictScore  map[string]float64                //简称district得分
-	Dataging          int
-	IsClearnMoney     string //站点清理金额
-	IsUnRulesTab      bool   //是否为不规则表格
+	SourceMid          string                            //数据源的MongoId
+	Category           string                            //类别
+	CategorySecond     string                            //二级分类
+	Category_Old       string                            //类别
+	CategorySecond_Old string                            //二级分类
+	Content            string                            //正文
+	ContentClean       string                            //清洗后的正文
+	Title              string                            //标题
+	SpiderCode         string                            //爬虫代码
+	Site               string                            //站点
+	Domain             string                            //网站域名
+	Href               string                            //原文链接
+	City               string                            //城市
+	Province           string                            //省份
+	Jsondata           *map[string]interface{}           //
+	Data               *map[string]interface{}           //数据库源数据
+	Block              []*Block                          //分块
+	Result             map[string][]*ExtField            //结果
+	BuyerAddr          string                            //采购单位地址
+	BlockPackage       map[string]*BlockPackage          //块中的分包
+	Winnerorder        []map[string]interface{}          //中标候选人排序
+	PackageInfo        map[string]map[string]interface{} //分包信息
+	RuleBlock          *RuleBlock                        //分块规则
+	BlockClassify      *BlockClassify                    //块分类
+	BrandData          [][]map[string]string             //品牌抽取
+	PriceNumberData    [][]map[string]interface{}        //单价和个数抽取
+	HasTable           int                               //有table
+	HasKey             int                               //是否匹配到table中的标题
+	HasBrand           int                               //有品牌
+	HasGoods           int                               //有商品
+	IsFile             bool                              //有附件
+	FullAreaScore      map[string]float64                //全称province得分
+	FullCityScore      map[string]float64                //全称city得分
+	FullDistrictScore  map[string]float64                //全称district得分
+	SimAreaScore       map[string]float64                //简称province得分
+	SimCityScore       map[string]float64                //简称city得分
+	SimDistrictScore   map[string]float64                //简称district得分
+	Dataging           int
+	IsClearnMoney      string //站点清理金额
+	IsUnRulesTab       bool   //是否为不规则表格
 }
 
 type ExtField struct {

+ 13 - 2
src/jy/util/util.go

@@ -45,8 +45,9 @@ var BrandGet *DFA     //品牌
 var IsBrandGoods bool //是否开启品牌抽取
 
 var SaveResult, FieldsFind, IsSaveTag, SaveBlock, QualityAudit, Ffield, Inscribe bool
-var Site_Mgo, Qyxy_Mgo *MongodbSim
-
+var Site_Mgo, Qyxy_Mgo, Bid_Mgo *MongodbSim
+var IsFlow = false
+var IsOss = false
 var IsUpdateRule bool
 var DefaultRegions, AdjustmentRegions = []string{}, []string{}
 
@@ -90,6 +91,16 @@ func UtilInit() {
 	}
 	Qyxy_Mgo.InitPool()
 
+	Bid_Mgo = &MongodbSim{
+		MongodbAddr: qu.ObjToString(Config["qyxy_addr"]),
+		DbName:      "qfw",
+		Size:        5,
+		UserName:    qu.ObjToString(Config["qyxy_username"]),
+		Password:    qu.ObjToString(Config["qyxy_password"]),
+	}
+	Bid_Mgo.InitPool()
+
+	IsOss, _ = Config["isoss"].(bool)
 	SaveResult, _ = Config["saveresult"].(bool)
 	FieldsFind, _ = Config["fieldsfind"].(bool)
 	IsSaveTag, _ = Config["iscltlog"].(bool)

+ 55 - 1
src/main.go

@@ -17,6 +17,7 @@ import (
 	_ "net/http/pprof"
 	qu "qfw/util"
 	"sync"
+	"time"
 )
 
 func init() {
@@ -33,7 +34,8 @@ func init() {
 
 // 流式...
 func mainT() {
-	go RunFlowSystem()
+	//go RunFlowSystem()
+	go RunFlowFile() //流式处理-前置数据···
 	lock := make(chan bool)
 	<-lock
 }
@@ -50,6 +52,58 @@ func main() {
 	<-lock
 }
 
+func RunFlowFile() {
+	log.Debug("开始启动流程前置处理···bidding_file···")
+	u.IsFlow = true
+	extract.InitExtractFlowTask()
+	q := map[string]interface{}{"processing_flow": 1}
+	bid_coll := "bidding_file"
+	for {
+		extract.ExtractUpdateRule()
+		count, _ := u.Bid_Mgo.Count(bid_coll, q)
+		if count == 0 {
+			log.Debug("未查询到待处理数据···睡眠30秒···")
+			time.Sleep(time.Second * 30)
+			continue
+		}
+		log.Debug("待处理数据量:", count)
+		pool_mgo := make(chan bool, 10)
+		wg_mgo := &sync.WaitGroup{}
+		sess := u.Bid_Mgo.GetMgoConn()
+		defer u.Bid_Mgo.DestoryMongoConn(sess)
+		total, isok := 0, 0
+		it := sess.DB(u.Bid_Mgo.DbName).C(bid_coll).Find(&q).Sort("_id").Iter()
+		for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+			if total%200 == 0 {
+				log.Debug("cur ai index ", total)
+			}
+			isok++
+			pool_mgo <- true
+			wg_mgo.Add(1)
+			go func(tmp map[string]interface{}) {
+				defer func() {
+					<-pool_mgo
+					wg_mgo.Done()
+				}()
+				tmpid := qu.BsonIdToSId(tmp["_id"])
+				update := map[string]interface{}{}
+				ext_dict := extract.ExtractByExtFlow(tmp)
+				delete(ext_dict, "kvtext")
+				delete(ext_dict, "regions_log")
+				delete(ext_dict, "field_source")
+				update["ext_dict"] = ext_dict
+				update["processing_flow"] = 2
+				//去更新数据···
+				u.Bid_Mgo.UpdateById(bid_coll, tmpid, map[string]interface{}{"$set": update})
+			}(tmp)
+			tmp = make(map[string]interface{})
+		}
+		wg_mgo.Wait()
+		log.Debug("流程前置处理···bidding_file···完毕", isok)
+		time.Sleep(time.Second * 30)
+	}
+}
+
 func RunFlowSystem() {
 	addr := qu.ObjToString(u.Config["flowaddr"])
 	jn := jnats.NewJnats(addr)

+ 9 - 1
src/res/pcd.txt

@@ -1,3 +1,11 @@
+横州市 4 n
+雄安新区 4 n
+白杨市 4 n
+米林市 4 n
+周口临港开发区 4 n
+德州天衢新区 4 n
+错那市 4 n
+乌拉盖管理区管委会 4 n
 乐东黎族自治县 4 n
 合浦县 4 n
 双流区 4 n
@@ -4905,7 +4913,6 @@
 临翔 3 n
 高密 3 n
 永寿 3 n
-阿里 3 n
 尚义 3 n
 遂昌 3 n
 盐田 3 n
@@ -6293,3 +6300,4 @@
 渭城 3 n
 江干 3 n
 嘉荫 3 n
+

+ 9 - 2
src/res/sv.txt

@@ -4909,7 +4909,6 @@
 临翔 3 n
 高密 3 n
 永寿 3 n
-阿里 3 n
 尚义 3 n
 遂昌 3 n
 盐田 3 n
@@ -409397,4 +409396,12 @@ IT家园社区居委会 4 n
 任泽区 4 n
 西洞庭管理区 4 n
 羊山新区 4 n
-西咸新区 4 n
+西咸新区 4 n
+雄安新区 4 n
+白杨市 4 n
+米林市 4 n
+周口临港开发区 4 n
+德州天衢新区 4 n
+错那市 4 n
+乌拉盖管理区管委会 4 n
+横州市 4 n

+ 1 - 1
src/res/tablev1.json

@@ -6,7 +6,7 @@
 		"^(包号|联系|评标|单位|公告|采购|商品|附件|质保|用途|公示|机构|评审|品名|规格|参数|指标|型号|数量|证书).{0,10}$__",
 		"(专家|评委|[打得]分|附件材料)[a-zA-Z0-9]*$__M",
 		"(基本需求.{0,15}|.*联系方式|总计|包组^[一二三123]|总监|经理|负责人|证书名称|证书编号|合同包|排序|二级建造师|项目负责人及资格证书编号)__M",
-		"(品牌|份额|姓名|起讫桩号|服务期|建设期限|限价|邮编|面积|组织形式|发布单位|招标方式|修建宽度|类别|备注|合计|电话|评审|原因|行业|价格|注册资金|印刷服务|业绩奖项)__",
+		"(品牌|媒介|份额|姓名|起讫桩号|服务期|建设期限|限价|邮编|面积|组织形式|发布单位|招标方式|修建宽度|类别|备注|合计|电话|评审|原因|行业|价格|注册资金|印刷服务|业绩奖项)__",
 		"(\\W{2,10}(名称|参数[及]?要求)|[\\d]+标段)$__M"
 	],
 	"jghead":[

+ 7 - 13
udpcontrol/src/config.json

@@ -1,32 +1,26 @@
 {
     "udpport": ":1784",
     "jkmail": {
-        "to": "zhengkun@topnet.net.cn,zhangjinkun@topnet.net.cn,wangjianghan@topnet.net.cn",
-        "api": "http://172.17.145.179:19281/_send/_mail"
+        "to": "zhengkun@topnet.net.cn,zhangjinkun@topnet.net.cn,wangjianghan@topnet.net.cn,wangchengcheng@topnet.net.cn,xuzhiheng@topnet.net.cn",
+        "api": "http://172.17.162.36:19281/_send/_mail"
     },
     "data_mgodb": {
-        "addr": "172.17.145.163:27083,172.17.4.187:27082",
+        "addr": "172.31.31.202:27081,172.20.45.128:27080",
         "db": "extract_2021",
         "coll": "extract_control_center"
     },
     "using_machine": 3,
     "source_mgodb": {
-        "addr": "172.17.145.163:27083,172.17.4.187:27082",
+        "addr": "172.31.31.202:27081,172.20.45.128:27080",
         "db": "qfw",
         "coll": "bidding"
     },
     "nextNode": [
         {
-            "addr": "172.17.4.196",
-            "port": 1799,
+            "addr": "172.17.162.35",
+            "port": 1791,
             "stype": "",
-            "memo": "清洗数据"
-        },
-        {
-            "addr": "172.17.4.196",
-            "port": 1762,
-            "stype": "",
-            "memo": "敏感词清理"
+            "memo": "大模型识别"
         }
     ]
 }

+ 1 - 1
udpcontrol/src/main.go

@@ -16,9 +16,9 @@ func init() {
 func main() {
 	//各种监控等
 	go extractRunningMonitoring()
+	go getRepeatTask()
 	go lastUdpMonitoring()
 	go nextUdpMonitoring()
-	go getRepeatTask()
 
 	lock := make(chan bool)
 	<-lock

+ 1 - 1
udpcontrol/src/method.go

@@ -155,7 +155,7 @@ func nextUdpMonitoring() {
 			node, _ := v.(*udpNode)
 			if now-node.timestamp > 120 {
 				udptaskmap.Delete(k)
-				sendErrMailApi("抽取控制中心~下节点未响应~警告", fmt.Sprintf("下节点~数据清洗~未及时响应...请检查..."))
+				sendErrMailApi("抽取控制中心~下节点未响应~警告", fmt.Sprintf("下节点~大模型识别~未及时响应...请检查..."))
 			}
 			return true
 		})

+ 1 - 1
udpcontrol/src/updprocess.go

@@ -53,7 +53,7 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 					"sid": sid,
 					"eid": eid,
 				})
-				log.Debug("udp收到任务...数量:", len(taskList), "具体任务:", taskList)
+				log.Debug("udp收到任务...数量:", len(taskList), "具体任务:", key)
 				udplock.Unlock()
 			}
 		}

+ 12 - 2
udps/main.go

@@ -13,18 +13,22 @@ import (
 )
 
 var startDate, endDate string
+var coll string
 
 func main() {
 	ip, p, tmptime, tmpkey, id1, id2, stype, q, bkey, param, ids := "", 0, 0, "", "", "", "", "", "", "", ""
+	pici := ""
+	flag.StringVar(&pici, "pici", "", "pici")
 	flag.StringVar(&startDate, "start", "", "开始日期2006-01-02")
 	flag.StringVar(&endDate, "end", "", "结束日期2006-01-02")
 	flag.StringVar(&ip, "ip", "127.0.0.1", "ip")
 	flag.IntVar(&p, "p", 6601, "端口")
 	flag.IntVar(&tmptime, "tmptime", 0, "时间查询")
 	flag.StringVar(&tmpkey, "tmpkey", "", "时间字段")
+	flag.StringVar(&coll, "coll", "", "表名")
 
-	flag.StringVar(&id1, "gtid", "124ed30f4f7bde5444f1eb84", "gtid")
-	flag.StringVar(&id2, "lteid", "92446f91923488e1724735de", "lteid")
+	flag.StringVar(&id1, "gtid", "", "gtid")
+	flag.StringVar(&id2, "lteid", "", "lteid")
 
 	flag.StringVar(&ids, "ids", "", "id1,id2")
 	flag.StringVar(&stype, "stype", "", "stype,传递类型")
@@ -59,6 +63,9 @@ func main() {
 			//"lteid": id2,
 			"stype": stype,
 		}
+		if pici != "" {
+			m1["pici"] = pici
+		}
 		if id1 != "" {
 			m1["gtid"] = id1
 		}
@@ -71,6 +78,9 @@ func main() {
 		if ids != "" {
 			m1["ids"] = ids
 		}
+		if coll != "" {
+			m1["coll"] = coll
+		}
 		if q != "" {
 			m1["query"] = mongodb.ObjToMQ(q, true) //qutil.ObjToMap(q)
 		}