Răsfoiți Sursa

爬虫新增infoformat字段相关修改

maxiaoshan 2 ani în urmă
părinte
comite
5523ffc7ed

+ 2 - 2
src/config.json

@@ -1,10 +1,10 @@
 {
     "webport": "8002",
-    "dbaddr": "192.168.3.207:27092",
+    "dbaddr": "192.168.3.207:29099",
     "dbname": "editor",
     "dbname2": "spider",
     "bideditor": {
-        "addr": "192.168.3.207:27092",
+        "addr": "192.168.3.207:29099",
         "db": "editor",
         "size": 5,
         "username": "",

+ 17 - 9
src/front/front.go

@@ -55,8 +55,8 @@ type Front struct {
 	saveChannels   xweb.Mapper `xweb:"/center/save/channels"`   //保存子栏目
 	saveJs         xweb.Mapper `xweb:"/center/save/js"`         //保存js
 	loadModel      xweb.Mapper `xweb:"/center/gmodel/(.*)"`     //加载模型
-	importdata     xweb.Mapper `xweb:"/center/importdata"`      //导入脚本
-	importfile     xweb.Mapper `xweb:"/center/importfile"`      //批量导入爬虫
+	importdata     xweb.Mapper `xweb:"/center/importdata"`      //导入爬虫列表页面
+	importLua      xweb.Mapper `xweb:"/center/importlua"`       //导入爬虫
 	oldedit        xweb.Mapper `xweb:"/center/oldedit"`         //老文件编辑
 	findName       xweb.Mapper `xweb:"/center/findname"`        //即时查询名称
 	checkrepeat    xweb.Mapper `xweb:"/center/spider/isrepeat"` //脚本代码判重
@@ -522,7 +522,7 @@ func (f *Front) Reg() {
 
 }
 
-func (f *Front) Importfile() {
+func (f *Front) ImportLua() {
 	auth := qu.IntAll(f.GetSession("auth"))
 	if auth != role_admin {
 		f.ServeJson("没有权限")
@@ -576,8 +576,11 @@ func (f *Front) Importfile() {
 						o["district"] = cells[13].Value
 						weigh, _ := cells[14].Int()
 						o["weight"] = weigh
+						//爬虫类型
+						infoformat, _ := cells[15].Int()
+						o["infoformat"] = infoformat
 						//存储表
-						o["coll"] = cells[15].Value
+						o["coll"] = cells[16].Value
 						//table := cells[6].Value
 						//o["table"] = table
 						//o["transfercode"] = qu.IntAll(Transfercode[table])
@@ -586,7 +589,7 @@ func (f *Front) Importfile() {
 						if len(*rs) > 0 {
 							errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行重复,已经过滤"
 						} else {
-							ok, name := savelua(o) //保存爬虫
+							ok, name := saveLua(o) //保存爬虫
 							if ok == false {
 								errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行找不到作者,已经过滤"
 							} else {
@@ -605,7 +608,7 @@ func (f *Front) Importfile() {
 	}
 }
 
-func savelua(o map[string]interface{}) (bool, string) {
+func saveLua(o map[string]interface{}) (bool, string) {
 	AutoTpl["Base.SpiderName"] = o["name"]
 	AutoTpl["Base.SpiderCode"] = o["code"]
 	AutoTpl["Base.SpiderChannel"] = o["channel"]
@@ -696,12 +699,14 @@ func savelua(o map[string]interface{}) (bool, string) {
 	incrementevent := qu.ObjToString(o["incrementevent"])
 	if movevent, ok := util.Config.Uploadevents[incrementevent].(string); ok && movevent != "" {
 		param["spidermovevent"] = movevent
-	} else {
-		param["spidermovevent"] = "7700"
 	}
+	//} else {
+	//	param["spidermovevent"] = "7700"
+	//}
 	param["incrementevent"] = qu.IntAll(o["incrementevent"])
 	param["platform"] = o["platform"]
 	param["weight"] = o["weight"]
+	param["infoformat"] = o["infoformat"]
 	//默认字段
 	param["spidercompete"] = true     //2021-11-20后爬虫加此字段(表示新爬虫,剑鱼网站不展示原文)
 	param["spiderhistorymaxpage"] = 1 //历史最大页
@@ -958,7 +963,10 @@ func (f *Front) UpdateESP() {
 		f.Write("n")
 		return
 	}
-	if w == "urgency" { //修改紧急度
+	if w == "infoformat" {
+		infoformat, _ := f.GetInteger("val")
+		set["infoformat"] = infoformat
+	} else if w == "urgency" { //修改紧急度
 		urgency, _ := f.GetInteger("val")
 		set["urgency"] = urgency
 	} else if w == "state" { //无效爬虫改为待完成

+ 14 - 9
src/front/spider.go

@@ -77,7 +77,7 @@ type OtherBase struct {
 	IsFlow               int    //爬虫所采集数据是否参与数据流程标识
 	SpiderType           string //爬虫类型:increment增量;history历史
 	SpiderHistoryMaxPage int    //采集历史数据时的采集最大页
-	SpiderMoveEvent      string //爬虫采集完历史后要转移到的节点 comm:队列模式、bid:高性能模式、7700
+	SpiderMoveEvent      string //爬虫采集完历史后要转移到的节点 comm:队列模式、bid:高性能模式
 }
 
 //加载某个爬虫
@@ -391,7 +391,7 @@ func (f *Front) SaveStep() {
 			param["spidertype"] = f.OtherBase.SpiderType
 			param["spiderhistorymaxpage"] = f.OtherBase.SpiderHistoryMaxPage
 			qu.Debug(f.OtherBase.SpiderMoveEvent)
-			tmpEvent, err := strconv.Atoi(f.OtherBase.SpiderMoveEvent) //f.OtherBase.SpiderMoveEvent此处SpiderMoveEvent已不表示comm、bid、7700,表示增量的节点
+			tmpEvent, err := strconv.Atoi(f.OtherBase.SpiderMoveEvent) //f.OtherBase.SpiderMoveEvent此处SpiderMoveEvent已不表示comm、bid,表示增量的节点
 			if f.OtherBase.SpiderType == "history" {                   //爬虫类型是history的放到7000节点,并记录历史节点
 				param["event"] = 7000
 				if err == nil {
@@ -402,9 +402,7 @@ func (f *Front) SaveStep() {
 			} else if f.OtherBase.SpiderType == "increment" && err == nil { //增量
 				param["event"] = tmpEvent //开发人员切换增量节点
 			}
-			if tmpEvent == 7700 {
-				param["spidermovevent"] = "7700"
-			} else if movevent, ok := util.Config.Uploadevents[f.OtherBase.SpiderMoveEvent].(string); ok && movevent != "" {
+			if movevent, ok := util.Config.Uploadevents[f.OtherBase.SpiderMoveEvent].(string); ok && movevent != "" {
 				param["spidermovevent"] = movevent
 			}
 			//开发人员修改爬虫节点后,在审核人员上架时,要在原来的节点下架,临时记录要下架的节点downevent
@@ -1280,6 +1278,7 @@ func UpStateAndUpSpider(code, id, reason, username string, state int) (bool, err
 					if state == Sp_state_3 {
 						types = "审核"
 					}
+					event := qu.IntAll((*one)["event"])
 					obj := map[string]interface{}{
 						"code":       code,
 						"auditor":    username,
@@ -1288,14 +1287,14 @@ func UpStateAndUpSpider(code, id, reason, username string, state int) (bool, err
 						"reason":     reason,
 						"spideruser": (*one)["createuser"],
 						"modifytime": (*one)["modifytime"],
-						"event":      (*one)["event"],
+						"event":      event,
 						"site":       (*one)["site"],
 						"channel":    (*one)["channel"],
 					}
 					if !strings.HasSuffix(code, u.Bu) { //凡是以_bu结尾的爬虫一律不计入审核记录
 						//新爬虫审核记录表
-						if state == Sp_state_3 || state == Sp_state_2 {
-							count := u.MgoEB.Count("lua_logs_auditor", map[string]interface{}{"code": code, "types": types})
+						if event == 7000 && (state == Sp_state_3 || state == Sp_state_2) {
+							count := u.MgoEB.Count("lua_logs_auditor", map[string]interface{}{"code": code, "types": "审核"})
 							if count == 0 { //新爬虫审核记录
 								u.MgoEB.Save("lua_logs_auditor_new", obj)
 							}
@@ -1620,7 +1619,7 @@ func (f *Front) Heart() {
 			code := qu.ObjToString(l["code"])
 			qu.Debug(code)
 			//d, _ := u.MgoE.FindOneByField("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"state": 1, "param_common": 1, "str_list": 1, "type_list": 1})
-			d, _ := u.MgoEB.FindOneByField("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"state": 1, "param_common": 1, "str_list": 1, "type_list": 1})
+			d, _ := u.MgoEB.FindOneByField("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"state": 1, "param_common": 1, "str_list": 1, "type_list": 1, "pendtime": 1})
 			l["state"] = (*d)["state"]
 			l["param_common"] = (*d)["param_common"]
 			if lt := qu.Int64All(l["list"]); lt != 0 {
@@ -1645,6 +1644,12 @@ func (f *Front) Heart() {
 			}
 			ut := qu.Int64All(l["updatetime"])
 			l["updatetime"] = qu.FormatDateByInt64(&ut, qu.Date_Full_Layout)
+			pendtime := qu.Int64All((*d)["pendtime"])
+			if pendtime != 0 {
+				l["pendtime"] = qu.FormatDateByInt64(&pendtime, qu.Date_Full_Layout)
+			} else {
+				l["pendtime"] = "0"
+			}
 			//l["isfindlist"] = "否"
 			//typeList := qu.IntAll((*d)["type_list"])
 			//strList := qu.ObjToString((*d)["str_list"])

+ 2 - 3
src/spider/script.go

@@ -337,10 +337,9 @@ func (s *Script) LoadScript(downloadnode, script string, isfile ...string) {
 				qu.Debug("下载文件出错!")
 			} else {
 				ftype = qu.GetFileType(ret)
+				url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, url, ret)
 				if (ftype == "docx" || ftype == "doc") && len(GarbledCodeReg.FindAllString(string(ret), -1)) > 10 {
-					url, name, size, ftype, fid = "附件中含有乱码", "附件中含有乱码", "", "", ""
-				} else {
-					url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, url, ret)
+					name = "附件中含有乱码"
 				}
 			}
 			if strings.TrimSpace(ftype) == "" {

+ 9 - 8
src/timetask/timetask.go

@@ -165,6 +165,9 @@ func SpiderMoveEvent() {
 	lock := &sync.Mutex{}
 	query := map[string]interface{}{
 		"ok": false,
+		"state": map[string]interface{}{
+			"$ne": 1,
+		},
 	}
 	count := util.MgoEB.Count("luamovelog", query)
 	if count == 0 {
@@ -216,14 +219,12 @@ func SpiderMoveEvent() {
 				util.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{"event": event, "state": 6}}, false, false)
 				qu.Debug("Code:", code, "历史迁移到增量节点失败")
 			}
-			if ok {
-				update := []map[string]interface{}{}
-				update = append(update, map[string]interface{}{"_id": tmp["_id"]})
-				update = append(update, map[string]interface{}{"$set": map[string]interface{}{"ok": ok, "updatetime": time.Now().Unix()}})
-				lock.Lock()
-				arr = append(arr, update)
-				lock.Unlock()
-			}
+			update := []map[string]interface{}{}
+			update = append(update, map[string]interface{}{"_id": tmp["_id"]})
+			update = append(update, map[string]interface{}{"$set": map[string]interface{}{"ok": ok, "state": 1, "updatetime": time.Now().Unix()}})
+			lock.Lock()
+			arr = append(arr, update)
+			lock.Unlock()
 		}(tmp)
 		tmp = map[string]interface{}{}
 	}

+ 2 - 0
src/web/templates/heart.html

@@ -28,6 +28,7 @@
                   <th>详情页执行心跳</th>
                   <th>列表页采集成功心跳</th>
                   <th>详情页采集成功心跳</th>
+                  <th>挂起时间</th>
                   <th>更新时间</th>
 <!--                  <th class="hidden-xs">操作</th>-->
                 </tr>
@@ -88,6 +89,7 @@
         { "data": "detail"},
         { "data": "findlist"},
         { "data": "detailexecute",width:"72px"},
+        { "data": "pendtime"},
         { "data": "updatetime"}
     ],
     "fnServerParams": function (e) {  

+ 2 - 3
src/web/templates/import.html

@@ -7,8 +7,7 @@
 				<button class="btn btn-primary" onclick='imports()'>
 					批量导入并分配
 				</button>
-				<iframe srcdoc="<form id='uploadform' method='post' enctype='multipart/form-data' action='/center/importf
-				ile'><input type='file' name='xlsx' /></form>" height=0 scrolling=no class="hide"  id="fileframe">
+				<iframe srcdoc="<form id='uploadform' method='post' enctype='multipart/form-data' action='/center/importlua'><input type='file' name='xlsx' /></form>" height=0 scrolling=no class="hide"  id="fileframe">
 				</iframe>
 			 </small>
 		   </h1>
@@ -91,7 +90,7 @@
 				if(f.length==0){
 					common.maskHide();
 					var b=$(window.frames[0].document).find("body").html();
-					$(window.frames[0].document).find("body").append("<form id='uploadform' method='post' enctype='multipart/form-data' action='/center/importfile'><input type='file' name='xlsx' /></form>");
+					$(window.frames[0].document).find("body").append("<form id='uploadform' method='post' enctype='multipart/form-data' action='/center/importlua'><input type='file' name='xlsx' /></form>");
 					var r=window.confirm("导入完毕,是否查看错误信息");
 					if(r){
 						$("#errmsg").removeClass("hide").append(b);

+ 60 - 19
src/web/templates/index.html

@@ -164,22 +164,23 @@
               <table id="spider" class="table table-bordered table-striped">
                 <thead>
                 <tr>
-          				<th><input type="checkbox" id="selrow" onclick="selectrow(this)"/></th>
-          				<th>编号{{.T.ischeck}}</th>
-          				<th>网站名称</th>
-          				<th>栏目名称</th>
-          				<th>爬虫代码</th>
-                  		<th>紧急度</th>
-          				<th>节点</th>
-          				<th>作者</th>
-          				<th>最后修改时间</th>
-          				<th>状态</th>
-						<th>平台</th>
-						<th>挂起</th>
-						{{if eq (session "platform") "python"}}
-						<th>难易度</th>
-						{{end}}
-                  		<th class="hidden-xs">操作</th>
+					<th><input type="checkbox" id="selrow" onclick="selectrow(this)"/></th>
+<!--					<th>编号</th>-->
+					<th>网站名称</th>
+					<th>栏目名称</th>
+					<th>爬虫代码</th>
+					<th>类型</th>
+					<th>紧急度</th>
+					<th>节点</th>
+					<th>作者</th>
+					<th>最后修改时间</th>
+					<th>状态</th>
+					<th>平台</th>
+					<th>挂起</th>
+					{{if eq (session "platform") "python"}}
+					<th>难易度</th>
+					{{end}}
+					<th class="hidden-xs">操作</th>
                 </tr>
                 </thead>
               </table>
@@ -198,6 +199,35 @@ $(function(){
           },
 		"columnDefs": [
 			{ "orderable": false, "targets": [0,10,12{{if eq (session "platform") "python"}},13{{end}}] },
+			//爬虫类型
+			{"targets":[4], createdCell: function (cell, cellData, rowData, rowIndex, colIndex) {
+				{{if gt (session "auth") 2}}
+				var aInput;
+				$(cell).click(function () {
+					$(this).html(updateInfoformat(rowData._id,rowData.state,rowData.code));
+					var aInput = $(this).find(":input");
+					aInput.focus().val(cellData);
+				});
+				$(cell).on("click", ":input", function (e) {
+					e.stopPropagation();
+				});
+				$(cell).on("change", ":input", function () {
+					$(this).blur();
+				});
+				$(cell).on("blur", ":input", function () {
+					var text = $(this).find("option:selected").text();
+					if (text == "招标"){
+						text = 1
+					}else if (text == "拟建"){
+						text = 2
+					}else if (text == "审批"){
+						text = 3
+					}
+					ttable.cell(cell).data(text);
+					cellData = text;
+				});
+				{{end}}
+			}},
 			//更新紧急度
 			{"targets":[5], createdCell: function (cell, cellData, rowData, rowIndex, colIndex) {
 					{{if gt (session "auth") 2}}
@@ -345,9 +375,6 @@ $(function(){
 			{ "data": "_id",render:function(val,a,row){
 				return "<input type='checkbox' platform='"+row.platform+"' scope='"+row.i_scope+"' value='"+val+"' name='"+row.param_common[1]+"_"+row.param_common[2]+"' code='"+row.param_common[0]+"'/>"
 			}},
-      		{ "data": "_id",render:function(val,a,row){
-				return row.num
-			}},
 			{ "data": "param_common","width":"200px",render:function(val,a,row){
 				vals=val[1];
 				if(vals.length>15){
@@ -368,6 +395,15 @@ $(function(){
 				}
 				return "<a href='"+href+"' target='_blank'>"+row["code"]+"</a>"
 			}},
+		    { "data": "infoformat",render:function(val,a,row){
+				if(val == 1){
+					return "招标"
+				}else if (val == 2){
+					return "拟建"
+				}else if (val == 3){
+					return "审批"
+				}
+			}},
 			{ "data": "urgency",render:function(val){
 				if(val==1){
 				  return "紧急"
@@ -860,6 +896,11 @@ $(function(){
 			return;
 		}	
 	};
+	//修改爬虫类型
+	function updateInfoformat(id,state,code){
+		var spiderInforformat="<option value=1>招标</option><option value=2>拟建</option><option value=3>审批</option>";
+		return "<select onchange='updateesp(this.value,\"infoformat\",\""+state+"\",\""+code+"\",\""+id+"\")' class='form-control input-sm'>"+spiderInforformat+"</select>"
+	};
 	//修改紧急度
 	function updateUrgency(id,state,code){
 		var spiderUrgency="<option value=0>普通</option><option value=1>紧急</option>";

+ 1 - 1
src/web/templates/spiderbase.html

@@ -142,7 +142,7 @@
 						
 					</script>
 				</div>
-        <div class="form-group">
+        		<div class="form-group">
 					<label for="isflow" class="col-sm-6 control-label text-right">
 						是否是流程爬虫 
 					</label>