python源码分享
本站提供的Python代码仅供学习
请勿违法使用
58同城
默认爬北京源保存到D盘
from bs4 import BeautifulSoup
import requests
import time
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
}
def get_info(url):
tc_data = requests.get(url,headers = headers)
data = BeautifulSoup(tc_data.text,'lxml')
title = data.select('#__layout > div > section > section.list-main > section.list-left > section> div> a > div.property-content > div.property-content-detail > div.property-content-title > h3')
info = data.select('#__layout > div > section > section.list-main > section.list-left > section > div > a > div.property-content > div.property-content-detail > section')
money = data.select('#__layout > div > section > section.list-main > section.list-left > section > div > a > div.property-content > div.property-price')
for title , info , money in zip(title , info , money):
xx = {
'标题': title.get_text().split(',',1),
'信息': info.get_text().replace(" ", "").replace("\n"," "),
'价格': money.get_text().split(',',1),
}
f = open("D:/58.TXT", "a")
f.write(str(xx) + '\n')
print(xx)
if __name__ == '__main__':
urls = ['https://bj.58.com/ershoufang/p{}/?PGTID=0d30000c-0253-9e2a-70d5-494ade5aa8e5&ClickID=1'.format(str(i)) for i in range(1,50)]
for url in urls:
get_info(url)
time.sleep(5)
本文链接:https://www.xhily.com/590.html
版权声明:本博客所有文章除特别声明外,均采用 CC BY 4.0 CN协议 许可协议。转载请注明出处!
版权声明:本博客所有文章除特别声明外,均采用 CC BY 4.0 CN协议 许可协议。转载请注明出处!
THE END
0
二维码
打赏
海报


python源码分享
本站提供的Python代码仅供学习
请勿违法使用
58同城默认爬北京源保存到D盘
from bs4 import BeautifulSoup
import requests
import time
headers = {
……

文章目录
关闭
共有 0 条评论