zhangjinkun 6 gadi atpakaļ
vecāks
revīzija
685b608435
7 mainītis faili ar 187 papildinājumiem un 84 dzēšanām
  1. 1 1
      src/jy/cluster/aliecs.go
  2. 25 1
      src/jy/extract/extract.go
  3. 0 23
      src/jy/extract/score.go
  4. 6 0
      src/main_test.go
  5. 90 59
      src/specialsymbols.json
  6. 65 0
      udps/main.go
  7. BIN
      udps/udps

+ 1 - 1
src/jy/cluster/aliecs.go

@@ -41,7 +41,7 @@ func RunInstances(TaskName string, num, hours int) {
 				//[]string{"SecurityGroupId", "sg-bp16x3td2evrejhkshp7"},
 				[]string{"VSwitchId", qu.ObjToString(esconfig["VSwitchId"])},
 				[]string{"InternetMaxBandwidthIn", "50"},
-				[]string{"InternetMaxBandwidthOut", "0"},
+				[]string{"InternetMaxBandwidthOut", "25"},
 				[]string{"InstanceChargeType", "PostPaid"},
 				[]string{"SpotStrategy", "SpotWithPriceLimit"},
 				[]string{"SpotPriceLimit", "1.99"},

+ 25 - 1
src/jy/extract/extract.go

@@ -27,7 +27,7 @@ var (
 	TaskList  map[string]*ExtractTask                //任务列表
 	saveLimit = 200                                  //抽取日志批量保存
 	PageSize  = 5000                                 //查询分页
-	Fields    = `{"title":1,"detail":1,"contenthtml":1,"href":1,"site":1,"spidercode":1,"toptype":1,"area":1,"city":1}`
+	Fields    = `{"title":1,"detail":1,"contenthtml":1,"href":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1}`
 )
 
 //启动测试抽取
@@ -254,6 +254,30 @@ func (e *ExtractTask) ExtractProcess(j *ju.Job) {
 		for _, v := range e.RuleBacks {
 			ExtRegBack(j, v, e.TaskInfo)
 		}
+		//候选人加入
+		if len(j.Winnerorder) > 0 {
+			winner := &ju.ExtField{
+				Field:     "winner",
+				Code:      "",
+				RuleText:  "",
+				Type:      "winnerorder",
+				MatchType: "winnerorder",
+				ExtFrom:   "",
+				Value:     j.Winnerorder[0]["entname"],
+				Score:     0,
+			}
+			if len([]rune(qu.ObjToString(j.Winnerorder[0]["entname"]))) < 4 {
+				winner.Score = -5
+			}
+			winners := j.Result["winner"]
+			if winners != nil {
+				winners = append(winners, winner)
+			} else {
+				winners = []*ju.ExtField{}
+				winners = append(winners, winner)
+			}
+			j.Result["winner"] = winners
+		}
 		//函数清理
 		for key, val := range j.Result {
 			for _, v := range val {

+ 0 - 23
src/jy/extract/score.go

@@ -35,29 +35,6 @@ func init() {
 //结果打分
 func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 	result := j.Result
-	if len(j.Winnerorder) > 0 { //候选人加入打分集合
-		winner := &ju.ExtField{
-			Field:     "winner",
-			Code:      "",
-			RuleText:  "",
-			Type:      "winnerorder",
-			MatchType: "winnerorder",
-			ExtFrom:   "",
-			Value:     j.Winnerorder[0]["entname"],
-			Score:     0,
-		}
-		if len([]rune(qu.ObjToString(j.Winnerorder[0]["entname"]))) < 4 {
-			winner.Score = -5
-		}
-		winners := result["winner"]
-		if winners != nil {
-			winners = append(winners, winner)
-		} else {
-			winners = []*ju.ExtField{}
-			winners = append(winners, winner)
-		}
-		result["winner"] = winners
-	}
 	//打分
 	for field, tmps := range result {
 		scoreRule := SoreConfig[field]

+ 6 - 0
src/main_test.go

@@ -2,6 +2,7 @@ package main
 
 import (
 	"jy/admin/track"
+	"jy/clear"
 	"jy/extract"
 	. "jy/mongodbutil"
 	"log"
@@ -51,6 +52,11 @@ func Test_paths(t *testing.T) {
 	}
 }
 
+func Test_clear(t *testing.T) {
+	text := clear.OtherClean("winner", "宁夏泷泽医疗器械有限公司(地址:银川市兴庆区绿地214商城D区7号楼317房)")
+	log.Println(text)
+}
+
 func Test_reg3(t *testing.T) {
 	text := []rune("(法撒旦法士大夫发的发)生(的]发的法旦法士大夫三发的)")
 	for i := 1; i <= 2; i++ {

+ 90 - 59
src/specialsymbols.json

@@ -1,61 +1,92 @@
 {
-	"symmetric":{
-		"field":{
-		"projectname":true,
-		"projectcode":true,
-		"buyer":true,
-		"winner":true,
-		"agency":true
-		},
-		"symbol":[
-			["(",")"],
-			["\\[","\\]"],
-			["{","}"],
-			["{","}"],
-			["'","'"],			
-			["‘","’"],
-			["“","”"],			
-			["\"","\""],
-			["【","】"],
-			["(",")"],
-			["<",">"]
-		]
-	},
-	"asymmetric":{
-		"field":{
-		"projectname":true,
-		"projectcode":true,
-		"buyer":true,
-		"winner":true,
-		"agency":true,
-		"agency":true,
-		"buyertel":true,
-		"buyerperson":true
-		},
-		"symbol":[
-			":",
-			",",
-			".",
-			":",
-			",",
-			"。",
-			";",
-			";"
-		]
-	},
-	"messycode":{
-		"field":{
-		"projectname":true,
-		"projectcode":true,
-		"buyer":true,
-		"winner":true,
-		"agency":true,
-		"agency":true,
-		"buyertel":true,
-		"buyerperson":true
-		},
-		"symbol":[
-		
-		]
-	}
+    "symmetric": {
+        "field": {
+            "projectname": true,
+            "projectcode": true,
+            "buyer": true,
+            "winner": true,
+            "agency": true
+        },
+        "symbol": [
+            [
+                "(",
+                ")"
+            ],
+            [
+                "\\[",
+                "\\]"
+            ],
+            [
+                "{",
+                "}"
+            ],
+            [
+                "{",
+                "}"
+            ],
+            [
+                "'",
+                "'"
+            ],
+            [
+                "‘",
+                "’"
+            ],
+            [
+                "“",
+                "”"
+            ],
+            [
+                "\"",
+                "\""
+            ],
+            [
+                "【",
+                "】"
+            ],
+            [
+                "(",
+                ")"
+            ],
+            [
+                "<",
+                ">"
+            ]
+        ]
+    },
+    "asymmetric": {
+        "field": {
+            "projectname": true,
+            "projectcode": true,
+            "buyer": true,
+            "winner": true,
+            "agency": true,
+            "agency": true,
+            "buyertel": true,
+            "buyerperson": true
+        },
+        "symbol": [
+            ":",
+            ",",
+            ".",
+            ":",
+            ",",
+            "。",
+            ";",
+            ";"
+        ]
+    },
+    "messycode": {
+        "field": {
+            "projectname": true,
+            "projectcode": true,
+            "buyer": true,
+            "winner": true,
+            "agency": true,
+            "agency": true,
+            "buyertel": true,
+            "buyerperson": true
+        },
+        "symbol": []
+    }
 }

+ 65 - 0
udps/main.go

@@ -0,0 +1,65 @@
+package main
+
+import (
+	"encoding/json"
+	"flag"
+	"log"
+	mu "mfw/util"
+	"net"
+	qu "qfw/util"
+	"time"
+
+	"gopkg.in/mgo.v2/bson"
+)
+
+var udpclient mu.UdpClient //udp对象
+var nextNodes []map[string]interface{}
+
+var startDate, endDate, ip, port, stype string
+
+func main() {
+	//2015-11-03,2017-04-01
+	//2017-04-01,2017-06-01
+	//2017-06-01,2018-06-01
+	//2018-06-01,2019-02-20
+	flag.StringVar(&startDate, "start", "", "开始日期2006-01-02")
+	flag.StringVar(&endDate, "end", "", "结束日期2006-01-02")
+	flag.StringVar(&ip, "ip", "127.0.0.1", "dup端口")
+	flag.StringVar(&port, "port", "", "dup端口")
+	flag.StringVar(&stype, "stype", "", "stype")
+	flag.Parse()
+	log.Println(startDate, endDate, ip, port)
+	start, _ := time.ParseInLocation(qu.Date_Short_Layout, startDate, time.Local)
+	end, _ := time.ParseInLocation(qu.Date_Short_Layout, endDate, time.Local)
+	sid := bson.NewObjectIdWithTime(start)
+	eid := bson.NewObjectIdWithTime(end)
+	log.Println(sid, eid)
+	udpclient = mu.UdpClient{Local: ":1470", BufSize: 1024}
+	udpclient.Listen(processUdpMsg)
+	by, _ := json.Marshal(map[string]interface{}{
+		"gtid":  sid,
+		"lteid": eid,
+		"stype": stype,
+	})
+	udpclient.WriteUdp(by, mu.OP_TYPE_DATA, &net.UDPAddr{
+		IP:   net.ParseIP(ip),
+		Port: qu.IntAll(port),
+	})
+	b := make(chan bool, 1)
+	<-b
+}
+
+func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
+	switch act {
+	case mu.OP_TYPE_DATA:
+		var mapInfo map[string]interface{}
+		err := json.Unmarshal(data, &mapInfo)
+		if err != nil {
+			log.Println(err)
+		} else {
+			log.Println(mapInfo)
+		}
+	case mu.OP_NOOP: //下个节点回应
+		log.Println("发送成功", string(data))
+	}
+}

BIN
udps/udps