Browse Source

ybw列表页维护

lizongze 2 years ago
parent
commit
ee9de19958
1 changed files with 3 additions and 3 deletions
  1. 3 3
      ybw/list_spider.py

+ 3 - 3
ybw/list_spider.py

@@ -109,11 +109,11 @@ class ListSpider:
     def crawl_response(self, response, menu: CrawlMenu, **kwargs):
         element: HtmlElement = fromstring(response.text)
         results = []
-        feature = '//div[@id="pages"]/following-sibling::table//tr'
+        feature = '//tr[@id="resultPrompt"]/parent::*/tr[not(@id)]'
         for node in element.xpath(feature):
-            publish_time = "".join(node.xpath('./td[6]/text()')).strip()
+            publish_time = "".join(node.xpath('./td[last()]/text()')).strip()
             if '-' not in publish_time:
-                publish_time = "".join(node.xpath('./td[7]/text()')).strip()
+                publish_time = "".join(node.xpath('./td[6]/text()')).strip()
             area = "".join("".join(node.xpath('./td[5]/text()')).split())
             title = "".join("".join(node.xpath('./td[2]/a/text()')).split())
             competehref = 'https://www.chinabidding.cn{}'.format("".join(node.xpath('./td[2]/a/@href')))