123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261 |
- package main
- import (
- "fmt"
- "jy/extract"
- "jy/mongodbutil"
- "jy/pretreated"
- ju "jy/util"
- "log"
- "os"
- qu "qfw/util"
- "regexp"
- "time"
- )
- var f *os.File
- var m = map[string]bool{}
- func main12() {
- //winnerorder()
- //return
- //log.Println(pretreated.ProcTitle("以上公告内容如有变动将在相关网络媒体上另行通知凡购买本招标文件的单位必须就此采购项目的相关事宜详细咨询否则参与投标即被视为已经充分了解了招标方的需求中标后承担该文件范围内的所有要求投标前如对招标文件存有疑问请在投标截止日期前三个工作日以实名制书面文件向我公司询问否则视为接受已报名购买招标文件的投标商未递交投标文件或虽递交投标文件但未参加开标大会的投标商不得再参加该项目的采购活动"))
- //return
- //f, _ = os.OpenFile("./title.txt", os.O_RDWR|os.O_CREATE, 777)
- //all()
- one()
- }
- func all() {
- m := mongodbutil.MgoFactory(3, 3, 120, "192.168.3.207:27082", "extract_kf")
- sess := m.Get()
- defer m.Close(sess)
- it := sess.DB("extract_kf").C("bidding201901").Find(nil).Iter()
- pool := make(chan bool, 5)
- count := 0
- for temp := make(map[string]interface{}); it.Next(&temp); {
- pool <- true
- count++
- go func(d map[string]interface{}) {
- defer func() {
- <-pool
- }()
- com(d)
- }(temp)
- temp = make(map[string]interface{})
- if count%200 == 0 {
- log.Println(count)
- }
- }
- log.Println("over...")
- time.Sleep(time.Hour)
- }
- func one() {
- m := mongodbutil.MgoFactory(3, 3, 120, "192.168.3.207:27081", "qfw")
- d, _ := m.FindById("bidding", "5d424df7a5cb26b9b7b61fde", extract.Fields)
- com(*d)
- }
- func com(doc map[string]interface{}) {
- detail := GetDetail(doc)
- doc["detail"] = detail
- toptype := qu.ObjToString(doc["toptype"])
- subtype := qu.ObjToString(doc["subtype"])
- if qu.ObjToString(doc["type"]) == "bid" {
- toptype = "结果"
- }
- if toptype == "" {
- toptype = "*"
- }
- e := &extract.ExtractTask{
- TaskInfo: &extract.TaskInfo{
- Version: "V3.1.2",
- VersionId: "5cdd1c70e138234848c1d703",
- ProcessPool: make(chan bool, 1),
- },
- }
- e.Id = qu.ObjToString(ju.Config["udptaskid"])
- e.InitTaskInfo()
- //d.TaskInfo.FDB = db.MgoFactory(3, 5, 600, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB)
- //d.TaskInfo.TDB = db.MgoFactory(3, 5, 600, ext.TaskInfo.ToDbAddr, ext.TaskInfo.ToDB)
- e.InitSite()
- e.InitRulePres()
- e.InitRuleBacks(false)
- e.InitRuleBacks(true)
- e.InitRuleCore(false)
- e.InitRuleCore(true)
- e.InitBlockRule()
- e.InitPkgCore()
- e.InitTag(false)
- e.InitTag(true)
- e.InitClearFn(false)
- e.InitClearFn(true)
- if e.IsExtractCity { //版本上控制是否开始城市抽取
- //初始化城市DFA信息
- e.InitCityDFA()
- e.InitAreaCode()
- e.InitPostCode()
- }
- //质量审核
- e.InitAuditFields()
- e.InitAuditRule()
- e.InitAuditClass()
- e.InitAuditRecogField()
- //品牌抽取是否开启
- ju.IsBrandGoods, _ = ju.Config["brandgoods"].(bool)
- j := &ju.Job{
- SourceMid: qu.BsonIdToSId(doc["_id"]),
- Category: toptype,
- CategorySecond: subtype,
- Content: qu.ObjToString(doc["detail"]),
- SpiderCode: qu.ObjToString(doc["spidercode"]),
- //Domain: qu.ObjToString(doc["domain"]),
- //Href: qu.ObjToString(doc["href"]),
- Title: qu.ObjToString(doc["title"]),
- Data: &doc,
- City: qu.ObjToString(doc["city"]),
- Province: qu.ObjToString(doc["area"]),
- Result: map[string][]*ju.ExtField{},
- BuyerAddr: qu.ObjToString(doc["buyeraddr"]),
- RuleBlock: e.RuleBlock,
- }
- e.TaskInfo.ProcessPool <- true
- pretreated.AnalyStart(j,false,"")
- e.ExtractProcess(j, nil,false)
- log.Println("=============块信息================")
- for _, v := range j.Block {
- log.Println("----", v.Title, v.Titles, "----")
- if v.ColonKV != nil {
- for kk, vv := range v.ColonKV.KvTags {
- for _, vvv := range vv {
- log.Println("ColonKV", kk, "---", vvv.Key, "---", vvv.Value, "---", vvv.Weight)
- }
- }
- }
- if v.SpaceKV != nil {
- for kk, vv := range v.SpaceKV.KvTags {
- for _, vvv := range vv {
- log.Println("SpaceKV", kk, "---", vvv.Key, "---", vvv.Value, "---", vvv.Weight)
- }
- }
- }
- if v.TableKV != nil {
- for kk, vv := range v.TableKV.KvTags {
- for _, vvv := range vv {
- log.Println("TableKV", kk, "---", vvv.Key, "---", vvv.Value, "---", vvv.Weight)
- }
- }
- }
- //log.Println("Classify", v.Classify)
- //log.Println("Tag", v.Tag)
- }
- log.Println("=============抽取结果================")
- set := (e.ResultArr[0][1]["$set"]).(map[string]interface{})
- for k, v := range set {
- if k == "budget" || k == "bidamount" || k == "winner" || k == "amount" || k == "projectname" || k == "projectcode" || k == "buyer" || k == "buyerperson" || k == "buyertel" || k == "agency" {
- log.Println(k, "---", v)
- }
- }
- log.Println("=============抽取结果 result================")
- for k, v := range set["result"].(map[string][]*ju.ExtField) {
- if k != "winner" {
- continue
- }
- for _, vv := range v {
- log.Println(k, fmt.Sprintf("%+v", vv))
- for kkk, vvv := range vv.ScoreItem {
- log.Println("--", kkk, k, fmt.Sprintf("%+v", vvv))
- }
- log.Println("\n")
- }
- }
- log.Println("=============中标候选人================")
- for _, v := range j.Winnerorder {
- log.Println(v)
- }
- log.Println("=============分包================")
- for k, v := range j.BlockPackage {
- log.Println(k, v)
- }
- log.Println("=============正文================")
- //log.Println(j.Content)
- return
- for _, v := range j.Block {
- if v.ColonKV != nil && v.ColonKV.KvTags != nil {
- for kk, vv := range v.ColonKV.KvTags {
- for _, vvv := range vv {
- log.Println(kk, vvv.Weight, vvv.Value)
- }
- }
- }
- if v.TableKV != nil && v.TableKV.KvTags != nil {
- for kk, vv := range v.TableKV.KvTags {
- for _, vvv := range vv {
- log.Println(kk, vvv.Weight, vvv.Value)
- }
- }
- }
- if v.SpaceKV != nil && v.SpaceKV.KvTags != nil {
- for kk, vv := range v.SpaceKV.KvTags {
- for _, vvv := range vv {
- log.Println(kk, vvv.Weight, vvv.Value)
- }
- }
- }
- }
- log.Println(len(j.Block))
- return
- for _, v := range j.Block {
- if m[v.Title] || v.Title == "" {
- continue
- }
- if !regexp.MustCompile("或|和|以?及|与|、|或").MatchString(v.Title) {
- //continue
- }
- m[v.Title] = true
- f.WriteString(j.SourceMid + "-----" + v.Title + "---" + fmt.Sprint(v.Titles) + "\n")
- continue
- for _, kv := range v.ColonKV.Kvs {
- log.Println("\n")
- log.Println(kv.Key, "---", kv.Value)
- log.Println(kv.Line)
- log.Println("=======================")
- }
- }
- }
- func GetDetail(doc map[string]interface{}) (detail string) {
- detail = ""
- d1, _ := doc["detail"].(string)
- d2, _ := doc["contenthtml"].(string)
- if len(d1) >= len(d2) || d2 == "" {
- detail = d1
- } else {
- detail = d2
- }
- detail = ju.CutLableStr(detail)
- detail = ju.NewCut().ClearHtml(detail)
- tabs, ration := pretreated.ComputeConRatio(detail, 1)
- if len(tabs) > 0 {
- newcon, newtabs, newration := pretreated.FindBigText(detail, ration, tabs)
- //log.Println(newcon, newtabs, newration)
- if newcon != "" && newration == 0 {
- detail = newcon
- tabs = newtabs
- ration = newration
- }
- }
- return detail
- }
- func winnerorder() {
- text := `评审专家名单:
- 吴殿波、韩屹、孙胜进、郑丹、李海波
-
- 中标标的名称、规格型号、数量、单价、服务要求:
- 2019年沈阳惠涌供热有限责任公司、沈阳圣达热力供暖有限责任公司、沈阳惠盛供热有限责任公司PE管保温
- 第一入围供货商:沈阳曲暖鼎盛保温安装有限公司 、总单价:11.833300
- 第二入围供货商:沈阳国盛防腐保温有限公司、总单价:11.102100
- 第三入围供货商:沈阳泰豪管材有限公司、总单价:13.258100`
- log.Println((&pretreated.WinnerOrderEntity{}).Find(text, true, 1,false,""))
- }
|