欧美free性护士vide0shd,老熟女,一区二区三区,久久久久夜夜夜精品国产,久久久久久综合网天天,欧美成人护士h版

目錄

柚子快報(bào)邀請(qǐng)碼778899分享:爬蟲,車輛數(shù)據(jù)可視化

柚子快報(bào)邀請(qǐng)碼778899分享:爬蟲,車輛數(shù)據(jù)可視化

http://yzkb.51969.com/

在懂車帝官網(wǎng)爬取車輛排行榜,由于你們沒(méi)有連接數(shù)據(jù)庫(kù),可以把spiderObj.save_to_sql()注釋掉

import requests

from lxml import etree

import csv

import os

import time

import json

import pandas as pd

import re

import django

os.environ.setdefault('DJANGO_SETTINGS_MODULE', '可視化.settings')

django.setup()

from myApp.models import CarInformation

class spider(object):

def __init__(self):

self.spiderUrl = ('https://www.dongchedi.com/motor/pc/car/rank_data?aid=1839&app_name=auto_web_pc&city_name'

'=%E8%9A%8C%E5%9F%A0&count=10&month=&new_energy_type=&rank_data_type=11&brand_id'

'=&price=&manufacturer=&outter_detail_type=&nation=0')

self.headers = {

'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, '

'like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36 Edg/120.0.0.0'

}

def init(self):

if not os.path.exists('./temp.csv'):

with open('./temp.csv', 'a', newline='', encoding='utf-8') as wf:

write = csv.writer(wf)

write.writerow(["brand", "carName", "carImg", "saleVolume", "price", "manufacturer", "rank", "carModel",

"energyType", "marketTime", "insure"])

def get_page(self):

with open('./spiderPage.txt', 'r') as r_f:

return r_f.readlines()[-1].strip()

def set_page(self, newPage):

with open('./spiderPage.txt', 'a') as a_f:

a_f.write('\n' + str(newPage))

def main(self):

count = self.get_page()

params = {

'offset': int(count)

}

print("數(shù)據(jù)從{}開始爬取".format(int(count) + 1))

pageJson = requests.get(self.spiderUrl, headers=self.headers, params=params).json()

pageJson = pageJson["data"]["list"]

try:

for index, car in enumerate(pageJson):

carData = []

print("正在爬取第%d" % (index + 1) + "數(shù)據(jù)")

# 品牌名

carData.append(car["brand_name"])

# 車名

carData.append(car["series_name"])

# 圖片鏈接

carData.append(car["image"])

# 銷量

carData.append(car["count"])

# 價(jià)格

price = []

price.append(car["min_price"])

price.append(car["max_price"])

carData.append(price)

# 廠商

carData.append(car["sub_brand_name"])

# 排名

carData.append(car["rank"])

# 第二個(gè)頁(yè)面

carNumber = car["series_id"]

infoHTML = requests.get("https://www.dongchedi.com/auto/params-carIds-x-%s" % carNumber)

infoHTMLpath = etree.HTML(infoHTML.text)

# carModel

carModel = infoHTMLpath.xpath("http://div[@data-row-anchor='jb']/div[2]/div/text()")[0]

carData.append(carModel)

# energyType

energyType = infoHTMLpath.xpath("http://div[@data-row-anchor='fuel_form']/div[2]/div/text()")[0]

carData.append(energyType)

# maketTime

marketTime = infoHTMLpath.xpath("http://div[@data-row-anchor='market_time']/div[2]/div/text()")[0]

carData.append(marketTime)

# insure

insure = infoHTMLpath.xpath("http://div[@data-row-anchor='period']/div[2]/div/text()")[0]

carData.append(insure)

print(carData)

self.save_to_csv(carData)

except:

pass

# print(pageJson)

self.set_page(int(count) + 10)

self.main()

def save_to_csv(self, resultData):

with open('temp.csv', 'a', newline='', encoding='utf-8') as f:

writer = csv.writer(f)

writer.writerow(resultData)

def clear_csv(self):

df = pd.read_csv('temp.csv')

df.dropna(inplace=True)

df.drop_duplicates(inplace=True)

print("總數(shù)量為%d" % df.shape[0])

return df.values

def save_to_sql(self):

data = self.clear_csv()

for car in data:

CarInformation.objects.create(

brand=car[0],

carName=car[1],

carImg=car[2],

saleVolume=car[3],

price=car[4],

manufacturer=car[5],

rank=car[6],

carModel=car[7],

energyType=car[8],

marketTime=car[9],

insure=car[10]

)

if __name__ == '__main__':

spiderObj = spider()

spiderObj.init()

spiderObj.main()

# spiderObj.save_to_sql()

柚子快報(bào)邀請(qǐng)碼778899分享:爬蟲,車輛數(shù)據(jù)可視化

http://yzkb.51969.com/

相關(guān)鏈接

評(píng)論可見(jiàn),查看隱藏內(nèi)容

本文內(nèi)容根據(jù)網(wǎng)絡(luò)資料整理,出于傳遞更多信息之目的,不代表金鑰匙跨境贊同其觀點(diǎn)和立場(chǎng)。

轉(zhuǎn)載請(qǐng)注明,如有侵權(quán),聯(lián)系刪除。

本文鏈接:http://gantiao.com.cn/post/19541579.html

發(fā)布評(píng)論

您暫未設(shè)置收款碼

請(qǐng)?jiān)谥黝}配置——文章設(shè)置里上傳

掃描二維碼手機(jī)訪問(wèn)

文章目錄