python源码分享

本站提供的Python代码仅供学习

请勿违法使用

58同城

默认爬北京源保存到D盘
from bs4 import BeautifulSoup
import requests
import time


headers = {
            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
          }

def get_info(url):
    tc_data = requests.get(url,headers = headers)
    data = BeautifulSoup(tc_data.text,'lxml')
    title = data.select('#__layout > div > section > section.list-main > section.list-left > section> div> a > div.property-content > div.property-content-detail > div.property-content-title > h3')
    info = data.select('#__layout > div > section > section.list-main > section.list-left > section > div > a > div.property-content > div.property-content-detail > section')
    money = data.select('#__layout > div > section > section.list-main > section.list-left > section > div > a > div.property-content > div.property-price')
    for title , info , money in zip(title , info , money):
        xx = {
            '标题': title.get_text().split(',',1),
            '信息': info.get_text().replace(" ", "").replace("\n"," "),
            '价格': money.get_text().split(',',1),

        }
        f = open("D:/58.TXT", "a")
        f.write(str(xx) + '\n')
        print(xx)




if __name__ == '__main__':
    urls = ['https://bj.58.com/ershoufang/p{}/?PGTID=0d30000c-0253-9e2a-70d5-494ade5aa8e5&ClickID=1'.format(str(i)) for i in range(1,50)]
    for url in urls:
        get_info(url)
    time.sleep(5)

 

THE END