package main import ( "encoding/json" "fmt" "log" util "spider_chromedp/chromedp/mfw" "time" ) type ChromedpParam struct { RunRedis bool `json:"runredis"` //是否redis判重 TimeOut int64 `json:"timeout"` //超时时间 OtherTimeOut int64 `json:"othertimeout"` //其他超时时间 Actions []Actions `json:"actions"` //动作集 OtherActions []Actions `json:"otheractions"` //其他动作集 } type Actions struct { Action string `json:"action"` //执行动作 Param interface{} `json:"param"` //选择器语句 Selector string `json:"selector"` //选择器Selectors } type DynamicIPMap struct { Code string InvalidTime int64 } var TimeChan = make(chan bool, 1) var Alldownloader map[string]DynamicIPMap = make(map[string]DynamicIPMap) var Msclient *util.Client func main() { InitMsgClient("127.0.0.1:801", "123") go Download6() //go Download5() //go Download1() //go Download1() //go Download2() //go Download3() //go Download4() //顺序采集 ch := make(chan bool) <-ch } func Download6() { msgid := util.UUID(8) param := ChromedpParam{ TimeOut: 60, Actions: []Actions{ Actions{ Action: "navigate", Param: "http://www.tnmg.com.cn/information/info_zxzb.aspx?classid=826&classname=%e8%af%a2%e4%bb%b7%e5%87%bd", Selector: "", }, Actions{ Action: "waitready", Param: "#GridView1_KXPortal_Pager1_btnNext", Selector: "ByID", }, Actions{ Action: "click", Param: `#GridView1_KXPortal_Pager1_btnNext`, Selector: "ByID", }, Actions{ Action: "wait", Param: 2, }, Actions{ Action: "waitready", Param: `#GridView1`, Selector: "ByID", }, Actions{ Action: "outerhtml", Param: `#GridView1`, Selector: "ByID", }, }, } ret, err := Msclient.Call("", msgid, util.SERVICE_DOWNLOAD, util.SENDTO_TYPE_RAND_RECIVER, param, 300) tmp := []string{} json.Unmarshal(ret, &tmp) fmt.Println(err, tmp) } func Download5() { msgid := util.UUID(8) param := ChromedpParam{ TimeOut: 60, Actions: []Actions{ Actions{ Action: "navigate", Param: "https://zbgl.ahmu.edu.cn/sfw_cms/e?page=cms.detail&cid=49832&nextcid=4181&aid=9652", Selector: "", }, Actions{ Action: "waitvisible", Param: "#main > div > div > div.contant > div > div.msbox > div > iframe", Selector: "ByQuery", }, Actions{ Action: "evaluate", //Param: "document.querySelector('iframe').contentDocument.body.children/[0/].contentWindow.document.body.outerHTML", Param: `document.querySelector("#main > div > div > div.contant > div > div.msbox > div > iframe").contentDocument.body.outerHTML;`, Selector: "", }, }, } ret, err := Msclient.Call("", msgid, util.SERVICE_DOWNLOAD, util.SENDTO_TYPE_RAND_RECIVER, param, 300) tmp := []string{} json.Unmarshal(ret, &tmp) fmt.Println(err, tmp) } func Download() { msgid := util.UUID(8) param := ChromedpParam{ TimeOut: 60, OtherTimeOut: 30, Actions: []Actions{ Actions{ Action: "navigate", Param: "https://neep.shop/html/portal/notice.html?type=rfqAnno&nodeurl=callback_list_enquiry_anno¬iceMoreUrl=https://gd-prod.oss-cn-beijing.aliyuncs.com/upload/cms/column/inquireListFive/index.html&pageTag=undefined&menu_code=&parent_menu_code=&root_menu_code=&tdsourcetag=s_pcqq_aiomsg", Selector: "", }, Actions{ Action: "waitready", Param: "#table > tbody > tr", Selector: "ByQuery", }, Actions{ Action: "listhref", Param: "#table > tbody > tr > td:nth-child(3) > a", Selector: "ByQuery", }, Actions{ Action: "listhtml", Param: 0, Selector: "", }, }, OtherActions: []Actions{ Actions{ Action: "waitready", Param: "#root > div.container.details-page > div.details-content > div", Selector: "ByQuery", }, //Actions{ // Action: "wait", // Param: 5, // Selector: "ByQuery", //}, Actions{ Action: "outerhtml", Param: "#root > div.container.details-page > div.details-content", Selector: "ByQuery", }, }, } ret, _ := Msclient.Call("", msgid, util.SERVICE_DOWNLOAD, util.SENDTO_TYPE_RAND_RECIVER, param, 300) result := []string{} json.Unmarshal(ret, &result) for _, r := range result { log.Println(r) log.Println("==================================================================================================") } } func Download4() { msgid := util.UUID(8) param := ChromedpParam{ TimeOut: 60, OtherTimeOut: 30, Actions: []Actions{ Actions{ Action: "navigate", Param: "http://www.ltcost.com/news/zaojiaxiehui/list_27_1.html", Selector: "", }, Actions{ Action: "waitready", Param: "#root > div.introduction > div.container.clearfix > div.right-content.fl > div > ul > li", Selector: "ByQuery", }, Actions{ Action: "listhref", Param: "#root > div.introduction > div.container.clearfix > div.right-content.fl > div > ul > li > h4 > a", Selector: "ByQuery", }, Actions{ Action: "listhtml", Param: 0, Selector: "", }, }, OtherActions: []Actions{ Actions{ Action: "waitready", Param: "#root > div.container.details-page > div.details-content > div", Selector: "ByQuery", }, //Actions{ // Action: "wait", // Param: 5, // Selector: "ByQuery", //}, Actions{ Action: "outerhtml", Param: "#root > div.container.details-page > div.details-content", Selector: "ByQuery", }, }, } ret, _ := Msclient.Call("", msgid, util.SERVICE_DOWNLOAD, util.SENDTO_TYPE_RAND_RECIVER, param, 300) result := []string{} json.Unmarshal(ret, &result) for _, r := range result { log.Println(r) log.Println("==================================================================================================") } } func Download1() { msgid := util.UUID(8) param := ChromedpParam{ TimeOut: 60, Actions: []Actions{ //Actions{ // Action: "changeip", // Param: "", // Selector: "", //}, Actions{ Action: "navigate", Param: "https://www.sprtc.com/index/qrtwo.htm?id=c1d01625213f11ee95a2d7772ab577a8", Selector: "", }, Actions{ Action: "waitready", Param: "#iframe", Selector: "ByQuery", }, Actions{ Action: "wait", Param: 5, Selector: "", }, Actions{ Action: "evaluate", //Param: "document.querySelector('iframe').contentDocument.body.children/[0/].contentWindow.document.body.outerHTML", Param: `document.querySelector('iframe').contentDocument.body.children[0].contentWindow.document.body.outerHTML`, Selector: "", }, }, } ret, err := Msclient.Call("", msgid, util.SERVICE_DOWNLOAD, util.SENDTO_TYPE_RAND_RECIVER, param, 300) tmp := []string{} json.Unmarshal(ret, &tmp) fmt.Println(err, tmp) } func Download2() { msgid := util.UUID(8) param := ChromedpParam{ TimeOut: 30, Actions: []Actions{ //Actions{ // Action: "navigate", // Param: "https://web.uutool.cn", // //Param: "http://www.baidu.com", // Selector: "", //}, //Actions{ // Action: "wait", // Param: 5, // Selector: "", //}, //Actions{ // Action: "changeip", // Param: "", // Selector: "", //}, //Actions{ // Action: "navigate", // Param: "https://web.uutool.cn", // //Param: "http://www.baidu.com", // Selector: "", //}, //Actions{ // Action: "wait", // Param: 5, // Selector: "", //}, }, } for i := 1; i <= 50; i++ { param.Actions = append(param.Actions, Actions{ Action: "changeip", Param: "", Selector: "", }) param.Actions = append(param.Actions, Actions{ Action: "navigate", Param: "https://web.uutool.cn", //Param: "http://www.baidu.com", Selector: "", }) param.Actions = append(param.Actions, Actions{ Action: "wait", Param: 5, Selector: "", }) } ret, err := Msclient.Call("", msgid, util.SERVICE_DOWNLOAD, util.SENDTO_TYPE_RAND_RECIVER, param, 300) tmp := map[string]interface{}{} json.Unmarshal(ret, &tmp) fmt.Println(err, tmp) } func Download3() { time.Sleep(5 * time.Second) msgid := util.UUID(8) param := ChromedpParam{ TimeOut: 600, Actions: []Actions{ Actions{ Action: "navigate", Param: "http://www.baidu.com", Selector: "", }, }, } ret, err := Msclient.Call("", msgid, util.SERVICE_DOWNLOAD, util.SENDTO_TYPE_RAND_RECIVER, param, 300) tmp := map[string]interface{}{} json.Unmarshal(ret, &tmp) fmt.Println(err, tmp) } //初始化,启动消息客户端 func InitMsgClient(serveraddr, name string) { Msclient, _ = util.NewClient(&util.ClientConfig{ClientName: name, MsgServerAddr: serveraddr, EventHandler: processevent, OnRequestConnect: func() { log.Println("重连", serveraddr, name) }, OnConnectSuccess: func() { log.Println("重连成功") }, CanHandleEvents: []int{util.SERVICE_DOWNLOAD_APPEND_NODE, util.SERVICE_DOWNLOAD_DELETE_NODE}, ReadBufferSize: 500, WriteBufferSize: 500, }) go gc4Alldownloader() } func processevent(p *util.Packet) { var data []byte switch p.Event { case util.SERVICE_DOWNLOAD_APPEND_NODE: data = p.GetBusinessData() //log.Println("获取动态地址:", len(data), string(data)) for i := 0; i < len(data)/8; i++ { code := string(data[i*8 : (i+1)*8]) Alldownloader[code] = DynamicIPMap{ Code: code, InvalidTime: time.Now().Unix() + 60*10, } } case util.SERVICE_DOWNLOAD_DELETE_NODE: data = p.GetBusinessData() //log.Println("删除动态地址:", len(data), string(data)) for i := 0; i < len(data)/8; i++ { code := string(data[i*8 : (i+1)*8]) delete(Alldownloader, code) } } } func gc4Alldownloader() { n := time.Now().Unix() for _, v := range Alldownloader { if v.InvalidTime < n { delete(Alldownloader, v.Code) } } TimeAfterFunc(1*time.Minute, gc4Alldownloader, TimeChan) } func TimeAfterFunc(td time.Duration, f func(), ch chan bool) { ch <- true time.Sleep(10 * time.Millisecond) <-ch time.AfterFunc(td, func() { f() }) }