Browse Source

新增爬虫认领功能

mxs 1 year ago
parent
commit
06cbf3823d
3 changed files with 91 additions and 39 deletions
  1. 72 25
      src/front/front.go
  2. 12 12
      src/front/spider.go
  3. 7 2
      src/taskManager/sitetask.go

+ 72 - 25
src/front/front.go

@@ -97,8 +97,9 @@ const Sp_state_0, Sp_state_1, Sp_state_2, Sp_state_3, Sp_state_4, Sp_state_5, Sp
 var spinfos sync.Map = sync.Map{}
 var SessMap map[string]*httpsession.Session
 var AutoTpl map[string]interface{}
-var Mails *util.Mail
-var Reg = regexp.MustCompile(`(http|https)://([\w]+\.)+[\w]+`)
+
+// var Mails *util.Mail
+// var Reg = regexp.MustCompile(`(http|https)://([\w]+\.)+[\w]+`)
 var ProjectHrefReg = regexp.MustCompile("projecthref")
 var Transfercode map[string]interface{}
 
@@ -350,16 +351,23 @@ func (f *Front) ImportLua() {
 						o["city"] = cells[12].Value
 						o["district"] = cells[13].Value
 						weigh, _ := cells[14].Int()
+						if weigh == -1 {
+							weigh = 1
+						}
 						o["weight"] = weigh
+						//存储表
+						coll := cells[15].Value
+						if coll == "" {
+							coll = "bidding"
+						}
 						//爬虫类型
-						infoformat, _ := cells[15].Int()
+						infoformat, _ := cells[16].Int()
 						if infoformat < 1 {
 							errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行爬虫" + cells[1].Value + ",缺少爬虫类型信息"
 							continue
 						}
 						o["infoformat"] = infoformat
-						//存储表
-						coll := cells[16].Value
+
 						if strings.Contains(code, "bidding") {
 							errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行爬虫" + cells[1].Value + ",存储表错误"
 							continue
@@ -388,16 +396,55 @@ func (f *Front) ImportLua() {
 }
 
 func saveLua(o map[string]interface{}) bool {
+	param := map[string]interface{}{}
 	AutoTpl["Base.SpiderName"] = o["name"]
 	AutoTpl["Base.SpiderCode"] = o["code"]
 	AutoTpl["Base.SpiderChannel"] = o["channel"]
 	AutoTpl["Base.SpiderTargetChannelUrl"] = o["channeladdr"]
 	modifyuser := o["modifyuser"].(string)
-	one, _ := u.MgoEB.FindOne("user", map[string]interface{}{"s_name": modifyuser, "i_auth": 1})
-	if len(*one) == 0 {
-		return false
+	priority := qu.IntAll(o["priority"])
+	param["priority"] = o["priority"] //优先级
+	platform := qu.ObjToString(o["platform"])
+	var userid, email string
+	claimLog := map[string]interface{}{}
+	if modifyuser != "" { //指定维护人
+		one, _ := u.MgoEB.FindOne("user", map[string]interface{}{"s_name": modifyuser, "i_auth": 1, "i_delete": 0})
+		if len(*one) == 0 {
+			return false
+		}
+		email = qu.ObjToString((*one)["s_email"])
+		userid = mongodb.BsonIdToSId((*one)["_id"])
+		if platform != "python" {
+			now := time.Now().Unix()
+			recovertime := now + 365*86400
+			param["claimtime"] = now //认领时间
+			param["claimtype"] = CLAIMTYPECLAIMED
+			param["recovertime"] = recovertime
+			claimLog = map[string]interface{}{
+				"site":             o["name"],
+				"code":             o["code"],
+				"channel":          o["channel"],
+				"modifyuser":       modifyuser,
+				"priority":         priority,
+				"stype":            "认领",
+				"comeintime":       now,
+				"claimtime":        now,
+				"recovertime":      recovertime,
+				"returntime":       int64(0),
+				"important":        false,
+				"returnreason":     "",
+				"claimrecovertype": 0,
+			}
+		} else {
+			param["recovertime"] = int64(0)       //回收时间
+			param["claimtime"] = int64(0)         //认领时间
+			param["claimtype"] = CLAIMTYPEHISTORY //爬虫认领状态
+		}
+	} else { //未指定人
+		param["recovertime"] = int64(0)         //回收时间
+		param["claimtime"] = int64(0)           //认领时间
+		param["claimtype"] = CLAIMTYPEUNCLAIMED //爬虫认领状态(未认领)
 	}
-	id := mongodb.BsonIdToSId((*one)["_id"])
 	common := []interface{}{
 		AutoTpl["Base.SpiderCode"],
 		AutoTpl["Base.SpiderName"],
@@ -435,7 +482,6 @@ func saveLua(o map[string]interface{}) bool {
 		AutoTpl["Step3.ContentChooser"],
 		AutoTpl["Step3.ElementChooser"],
 	}
-	param := map[string]interface{}{}
 	param["param_common"] = common
 	//向导模式
 	param["param_time"] = ptime
@@ -454,10 +500,10 @@ func saveLua(o map[string]interface{}) bool {
 	param["href"] = o["channeladdr"]
 	param["channel"] = o["channel"]
 	param["createuser"] = modifyuser
-	param["createuserid"] = id
-	param["createuseremail"] = (*one)["s_email"]
+	param["createuserid"] = userid
+	param["createuseremail"] = email
 	param["modifyuser"] = modifyuser
-	param["modifyuserid"] = id
+	param["modifyuserid"] = userid
 	param["modifytime"] = time.Now().Unix()
 	param["state"] = 0 //未完成
 	if qu.IntAll(o["event"]) > 0 {
@@ -470,11 +516,8 @@ func saveLua(o map[string]interface{}) bool {
 		model[k] = qu.ObjToString(o[k])
 	}
 	param["model"] = model
-	param["next"] = (*one)["s_email"]
+	param["next"] = email
 	param["urgency"] = o["urgency"]
-	param["priority"] = o["priority"] //优先级
-	param["recovertime"] = int64(0)   //回收时间
-	param["claimtime"] = int64(0)     //认领时间
 	param["isflow"] = o["isflow"]
 	param["spidertype"] = "history"
 	param["spiderremark"] = o["spiderremark"]
@@ -486,7 +529,7 @@ func saveLua(o map[string]interface{}) bool {
 	//	param["spidermovevent"] = "7700"
 	//}
 	param["incrementevent"] = qu.IntAll(o["incrementevent"])
-	param["platform"] = o["platform"]
+	param["platform"] = platform
 	param["weight"] = o["weight"]
 	param["infoformat"] = o["infoformat"]
 	infoformat := qu.IntAll(o["infoformat"])
@@ -499,12 +542,12 @@ func saveLua(o map[string]interface{}) bool {
 		infotype = "舆情"
 	}
 	//默认字段
-	param["spidercompete"] = true           //2021-11-20后爬虫加此字段(表示新爬虫,剑鱼网站不展示原文)
-	param["spiderhistorymaxpage"] = 1       //历史最大页
-	param["pendstate"] = 0                  //
-	param["grade"] = 0                      //爬虫难易度(主要用于python爬虫使用)
-	param["spiderimportant"] = false        //是否为重点网站爬虫
-	param["claimtype"] = CLAIMTYPEUNCLAIMED //爬虫认领状态(未认领)
+	param["spidercompete"] = true     //2021-11-20后爬虫加此字段(表示新爬虫,剑鱼网站不展示原文)
+	param["spiderhistorymaxpage"] = 1 //历史最大页
+	param["pendstate"] = 0            //
+	param["grade"] = 0                //爬虫难易度(主要用于python爬虫使用)
+	param["spiderimportant"] = false  //是否为重点网站爬虫
+	param["urgency"] = 0
 	//qu.Debug("param---", param)
 
 	ok := spider.SaveSpider(o["code"].(string), param)
@@ -562,10 +605,14 @@ func saveLua(o map[string]interface{}) bool {
 				"channel":    o["channel"],
 				"spidercode": o["code"],
 				"platform":   o["platform"],
-				"modifyuser": (*one)["s_name"],
+				"modifyuser": "",
 				"state":      0,
 			}}, true, false)
 		}
+		//生成认领日志
+		if len(claimLog) > 0 {
+			u.MgoEB.Save("lua_logs_claim", claimLog)
+		}
 	}
 	return ok
 }

+ 12 - 12
src/front/spider.go

@@ -115,18 +115,18 @@ func (f *Front) LoadSpider(codeTaskIdReState string) error {
 		}
 		if qu.ObjToString((*lua)["createuserid"]) == f.GetSession("userid").(string) || auth >= 1 {
 			if len(*lua) > 0 {
-				if qu.IntAll((*lua)["event"]) == 7000 && qu.IntAll((*lua)["urgency"]) == 0 && qu.IntAll((*lua)["state"]) == 0 {
-					q := map[string]interface{}{
-						"event":        7000,
-						"state":        0,
-						"urgency":      1,
-						"modifyuserid": f.GetSession("userid"),
-					}
-					if u.MgoEB.Count("luaconfig", q) > 0 {
-						f.Write("名下还有7000节点待完成的紧急爬虫,暂无法处理该爬虫!")
-						return nil
-					}
-				}
+				//if qu.IntAll((*lua)["event"]) == 7000 && qu.IntAll((*lua)["urgency"]) == 0 && qu.IntAll((*lua)["state"]) == 0 {
+				//	q := map[string]interface{}{
+				//		"event":        7000,
+				//		"state":        0,
+				//		"urgency":      1,
+				//		"modifyuserid": f.GetSession("userid"),
+				//	}
+				//	if u.MgoEB.Count("luaconfig", q) > 0 {
+				//		f.Write("名下还有7000节点待完成的紧急爬虫,暂无法处理该爬虫!")
+				//		return nil
+				//	}
+				//}
 				if copy != "" {
 					//luacopy, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": copy})
 					luacopy, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": copy})

+ 7 - 2
src/taskManager/sitetask.go

@@ -16,6 +16,7 @@ func (t *TaskM) SiteTask() {
 	repair := t.GetString("repair")
 	overdue := t.GetString("overdue")
 	latestdata := t.GetString("latestdata")
+	codestate, _ := t.GetInteger("codestate")
 	searchStr := t.GetString("search[value]")
 	search := strings.TrimSpace(searchStr)
 	draw, _ := t.GetInteger("draw")
@@ -46,6 +47,9 @@ func (t *TaskM) SiteTask() {
 		if repair != "-1" {
 			query["b_repair"] = repair == "1"
 		}
+		if codestate > -1 {
+			query["i_state"] = codestate
+		}
 		if overdue != "-1" {
 			if overdue == "1" { //查询超期
 				query["l_max_repair_time"] = map[string]interface{}{
@@ -68,14 +72,15 @@ func (t *TaskM) SiteTask() {
 			orderType = -1
 		}
 		sort = fmt.Sprintf(sort, orderName, orderType)
+
 		qu.Debug("query:", query, sort)
 		task, _ := u.MgoEB.Find("spider_important_warning", query, sort, nil, false, start, limit)
 		count := u.MgoEB.Count("spider_important_warning", query)
 		for _, t := range *task {
 			l_max_repair_time := qu.Int64All(t["l_max_repair_time"])
 			t["b_overdue"] = l_max_repair_time < time.Now().Unix()
-			lua, _ := u.MgoEB.FindOneByField("luaconfig", map[string]interface{}{"code": t["s_spidercode"]}, map[string]interface{}{"state": 1})
-			t["i_state"] = (*lua)["state"]
+			//lua, _ := u.MgoEB.FindOneByField("luaconfig", map[string]interface{}{"code": t["s_spidercode"]}, map[string]interface{}{"state": 1})
+			//t["i_state"] = (*lua)["state"]
 			t["encode"] = util.Se.Encode2Hex(fmt.Sprint(t["s_spidercode"]))
 		}
 		t.ServeJson(map[string]interface{}{"draw": draw, "data": task, "recordsFiltered": count, "recordsTotal": count})