3
0
lizongze 2 жил өмнө
parent
commit
8bd1249784

+ 2 - 2
FworkSpider/feapder/templates/spider_template.tmpl

@@ -55,10 +55,10 @@ class ${spider_name}(feapder.BiddingListSpider):
             district = ""  # 区县
 
             try:
-                next_page = driver.find_elements_by_xpath(f'//a[contains(text(),"{title}")]')[index]  # index防止标题相同
+                next_page = driver.find_elements_by_xpath(f'//a[contains(text(),"{title}")]')[0]  # index防止标题相同
             except:
                 try:
-                    next_page = driver.find_elements_by_xpath(f'//a[contains(text(),"{title[:10]}")]')[index] # 标题过长
+                    next_page = driver.find_elements_by_xpath(f'//a[contains(text(),"{title[:10]}")]')[0] # 标题过长
                 except:
                     continue
 

+ 15 - 4
FworkSpider/untils/tools.py

@@ -290,13 +290,24 @@ def njpc_fields_extract_special(html, data_item):
     return data_item
 
 
-def get_proxy():
+def get_proxy(scheme=None,default=None,socks5h=False):
     headers = {
         "Authorization": "Basic amlhbnl1MDAxOjEyM3F3ZSFB"
     }
-    proxy = requests.get("http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch", headers=headers).json()
-    print(f"切换代理:{proxy.get('data')}")
-    return proxy.get("data").get("http")
+    proxy_res = requests.get("http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch", headers=headers).json()
+
+    proxies = proxy_res.get('data')
+    if proxy_res and proxies:
+        if socks5h:
+            proxyh = {}
+            proxyh["http"] = proxies.get("http").replace("socks5", "socks5h")
+            proxyh["https"] = proxies.get("http").replace("socks5", "socks5h")
+            proxies = proxyh
+        print(f"切换代理:{proxies}")
+        if not scheme:
+            return proxies
+        else:
+            return proxies.get(scheme,default)
 
 
 def search(pattern, string):