import requests from bs4 import BeautifulSoup from selenium import webdriver import time import os import datetime import csv from selenium.webdriver.common.keys import Keys import random from selenium.webdriver import ActionChains import json import hashlib import base64 import hmac from urllib.parse import quote_plus import socket import sys from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.common.proxy import Proxy, ProxyType headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.81 Safari/537.36", } price_url = "https://search.kongfz.com/item_result/?status=0&key=" #店铺最低售价 mix_price = 5 #发货快递费 price_kuaidi=5 #书籍最低售价 book_mix_price = 20 #书籍最低售价2 book_mix_price2 = 40 #采集最近几个月数据 month_num = 3 #几个月内最低销量 mix_number = 8 #最低数量 #书籍三个月内最低销量2 mix_number2 = 5 #采集数据最低时间间隔 time_flag = random.randint(30,120) # #限制采集标志 xianzhi_flag = 0 #采集时间间隔 time1 = 0 time2 = 0 #isbn搜索数量 isbn_num = 0 #最大isbn搜素数量 max_isbn_num = 150 #第一次打开网页的标志 first_flag = 0 #默认快递费 kuaidi_price = 8 #最多书单 max_num = 950 #最少书单 min_num = 951 #服务器地址 send_data = '121.37.184.250' #监听标志数据 msg_data = '11111111' #同行店铺数据表格 shop_path = 'shop_id.csv' name = '13047094458' passwd = 'hyk@123+-' sousuo_end_time = "07:58" urls = ["https://search.kongfz.com/product/?dataType=1&press=建筑工业出版社&price=10.00~&actionPath=dataType,press,sortType,price&sortType=10&page=", # "https://search.kongfz.com/product/?dataType=1&press=人民邮电出版社&price=10.00~&actionPath=dataType,press,sortType,price&sortType=10&page=", # "https://search.kongfz.com/product/?dataType=1&press=外文出版社&price=10.00~&actionPath=dataType,press,sortType,price&sortType=10&page=", # "https://search.kongfz.com/product/?dataType=1&press=团结出版社&price=10.00~&actionPath=dataType,press,sortType,price&sortType=10&page=", # "https://search.kongfz.com/product/?dataType=1&press=天天出版社&price=10.00~&actionPath=dataType,press,sortType,price&sortType=10&page=", # "https://search.kongfz.com/product/?dataType=1&press=台海出版社&price=10.00~&actionPath=dataType,press,sortType,price&sortType=10&page=", ] # 初始采集链接,必须是孔夫子某一个类目的链接, url_zong = "https://item.kongfz.com/Czhexue/v1w1/" # 初始采集页数 start_page = 1 # 结束采集页数,填2就是采集第一页和第二页,如果想只采集第1页 那就开始和结束页数都填1 end_page = 100 shop_book_mix_num = 100 shop_book_max_num = 2000 shop_sale_num = 0.5 #name_msg = [['19039408895','cpp4655025'],['16541154686','www9909283'],['17136641868','sssk2019'],['16563084507','aaaa2201'],['16584360824','aaaapppp102'],] name_msg = [['19039408895','cpp4655025'],] name_index = 0 sousuo_end_time = "07:58" #搜索一定数量之后重启浏览器,更换ip和账号 sousuo_num = 80