apple 4 年之前
父節點
當前提交
46437393df

+ 1 - 1
data_quality/src/config.json

@@ -3,7 +3,7 @@
   "mongodb": {
     "addrName": "172.17.4.85:27080",
     "dbName": "qfw",
-    "collName": "result_20200715",
+    "collName": "result_20200917",
     "pool": 10,
     "site": {
       "site_dbname": "qfw",

+ 1 - 1
src/config.json

@@ -32,7 +32,7 @@
     "iscltlog": false,
     "brandgoods": false,
     "pricenumber":true,
-    "udptaskid": "5eda01b0c566ca08409370bb",
+    "udptaskid": "5fceec1c92b4ee1025b7d091",
     "udpport": "1484",
     "nextNode": [
         {

+ 10 - 0
src/jy/extract/extract.go

@@ -61,6 +61,7 @@ func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bo
 	ext.InitClearFn(false)
 	ext.InitClearFn(true)
 	ext.Lock()
+	//ext.IsExtractCity = false
 	if ext.IsExtractCity { //版本上控制是否开始城市抽取
 		//初始化城市DFA信息
 		ext.InitCityInfo()
@@ -276,6 +277,7 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 	detail = pretreated.RepairCon(detail)
 	detail = ju.CutLableStr(d3 + "\n" + detail)
 	detail = cut.ClearHtml(d3 + "\n" + detail)
+
 	doc["detail"] = detail
 	isClearnMoney := !clearMoneyReg.MatchString(detail)
 	if isClearnMoney {
@@ -645,6 +647,12 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
 		//函数清理
 		for key, val := range j.Result {
 			for i, v := range val {
+				if v.Field == "project_duration" {
+					arr:=clear.ObjToMoney([]interface{}{v.Value, j.Content},j.SpiderCode, j.IsClearnMoney)
+					if len(arr)>0 {
+						v.Value = arr[0]
+					}
+				}
 				if v.Field == "projectname" && v.Type == "table" {
 					break
 				}
@@ -2076,6 +2084,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 	})
 }
 
+//检查字段-
 func checkFields(tmp map[string]interface{}) map[string]interface{} {
 	delete(tmp, "contenthtml")
 	delete(tmp, "detail")
@@ -2106,6 +2115,7 @@ func checkFields(tmp map[string]interface{}) map[string]interface{} {
 			delete(tmp, k)
 		}
 	}
+
 	return tmp
 }
 

+ 2 - 0
src/jy/extract/extractudp.go

@@ -112,6 +112,7 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 		ext.InitClearFn(false)
 		ext.InitClearFn(true)
 		ext.Lock()
+		//ext.IsExtractCity = false
 		if ext.IsExtractCity { //版本上控制是否开始城市抽取
 			//初始化城市DFA信息
 			//ext.InitCityDFA()
@@ -289,6 +290,7 @@ func ExtractByBidForecast(infoid string, ossid ...string) map[string]interface{}
 		exF.InitTag(true)
 		exF.InitClearFn(false)
 		exF.InitClearFn(true)
+
 		if exF.IsExtractCity { //版本上控制是否开始城市抽取
 			//初始化城市DFA信息
 			//exF.InitCityDFA()

+ 1 - 1
src/web/templates/admin/result_list.html

@@ -194,7 +194,7 @@ $(function () {
 							var taskid = $("#s_taskname").val();
 							var resulttrack = $("#s_resulttrack").val();
 							//taskid ="5eda01b0c566ca08409370bb"
-							num = 1
+							// num = 1
 							//resulttrack = "a"
 							var bcon = true;
 							if(id == "" || resulttrack == ""){

+ 16 - 4
udpfilterdup/src/main.go

@@ -58,7 +58,7 @@ var (
 
 
 func init() {
-
+	return
 	flag.StringVar(&lastid, "id", "", "增量加载的lastid") //增量
 	flag.StringVar(&gtid, "gtid", "", "历史增量的起始id")	//历史
 	flag.StringVar(&gtept, "gtept", "", "全量gte发布时间")//全量区间pt
@@ -130,7 +130,7 @@ func init() {
 }
 
 
-func main() {
+func mainT() {
 
 	go checkMapJob()
 	updport := Sysconfig["udpport"].(string)
@@ -165,7 +165,7 @@ func main() {
 }
 
 //测试组人员使用
-func mainT() {
+func main() {
 	//dealBuyerAlias() //生成别名
 	//dealWithBuyerNameAliasRecord()//处理数据-别名相关
 	//dealRepeatAliasBuyerName() //处理多余重复
@@ -174,12 +174,20 @@ func mainT() {
 	//exportAllBuyerAlias() //导出测试数据
 
 
-	dealWithAddressData()//城市
+	//dealWithAddressData()//城市
+
+
 
+	exportTestBiddingData()
 
+	//testmain()
 
 	return
 
+
+
+
+
 	if TimingTask {
 		go historyTaskDay()
 		time.Sleep(99999 * time.Hour)
@@ -196,7 +204,10 @@ func mainT() {
 		mapinfo["gtid"] = sid
 		mapinfo["lteid"] = eid
 		mapinfo["stop"] = "true"
+
+		log.Println("测试:全量判重-准备开始")
 		task([]byte{}, mapinfo)
+
 		time.Sleep(99999 * time.Hour)
 	}
 }
@@ -253,6 +264,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
 		}
 	}
 
+
 	log.Println("查询条件:",mgo.DbName, extract, q)
 
 	sess := mgo.GetMgoConn()