程序地带

根据业务摸索出的一个selenium代码模版(python)


前言

总算入行上班几个月了,不得不说业务是真的不消停啊。。
本人工作上经常遇到一种场景:为甲方做自动化接口处理工具,登录需要短信验证码,,
嘛算是摸索出了一套selenium代码模板,主要解决如下痛点


会话超时/断开时,又要找甲方问短信等验证码登录
调试途中增减修改功能,算是调试中热更新

分享一下


模板代码
app.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import importlib
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import backend
basepath = os.path.abspath(os.path.dirname(__file__))
driver_path = os.path.join(basepath, "chromedriver.exe")
logger = backend.logger
def init_browser(driver_path=None):
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
prefs = {
"profile.default_content_setting_values": {
"notifications": 2
}}
options.add_experimental_option("prefs", prefs)
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
browser = webdriver.Chrome(options=options, executable_path=driver_path)
browser.maximize_window()
browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, "webdriver", {
get: () => undefined
})
"""
})
return browser
def jump_security(wait, mouse):
wait.until(EC.presence_of_element_located((By.ID, "details-button"))).click()
ele = wait.until(EC.presence_of_element_located((By.ID, "proceed-link")))
mouse.move_to_element(ele).click().perform()
def init_login(driver, wait, mouse):
username_inp = wait.until(EC.presence_of_element_located((By.ID, "username")))
username_inp.send_keys("user")
password_inp = driver.find_element_by_id("password")
password_inp.send_keys("password")
class App(object):
def __new__(cls, *args, **kwargs):
if not hasattr(cls, "_instance"):
cls.error_num = 0
cls.driver = init_browser(driver_path)
cls.wait = WebDriverWait(cls.driver, 20)
cls.mouse = ActionChains(cls.driver)
cls.driver.get("https://www.target.com/login")
# jump_security(cls.wait, cls.mouse)
init_login(cls.driver, cls.wait, cls.mouse)
cls._instance = object.__new__(cls)
return cls._instance
# 模式1:client无限循环
def run_unlimited():
while True:
try:
obj = App()
input("等待登录并进入目标页面后,回此处按回车 >>> ")
back = backend.Backend(obj)
results = back.main()
except Exception as e:
pass
finally:
mode = input("供backend修改的阻塞暂停")
importlib.reload(backend)
# 模式2:构建本地api服务
from flask import Flask
app = Flask(__name__)
@app.route("/", methods=["GET"])
def main():
importlib.reload(backend)
back = backend.Backend(App())
results = back.main()
if __name__ == "__main__":
os.system("taskkill /im chromedriver.exe /F")# win专用,清残留进程
os.system("taskkill /im chrome.exe /F")
run_unlimited()
# app.run()

前端有两部分,一是单例的selenium,二是此自动化处理工具的形式:client循环形式 / api服务形式


单例的 _new_ 里init一些属性,处理登录那部分也可以放后台


两种形式其实就是看形式是要主动触发还是被动触发,至于具体做什么就放后台


backend.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
import json
import os
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
import simplejson
from loguru import logger
from retry import retry
from tqdm import tqdm, trange
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
basepath = os.path.abspath("./")
logger.add(f"{basepath}/logs/{os.path.basename(__file__)[:-3]}.log",
format="{level} | {time:YYYY-MM-DD HH:mm:ss} | {function}:{line} - {message}",
level="INFO", retention="5 days")
class Backend(object):
def __init__(self, obj):
self.sess = requests.session()
self.driver = obj.driver
self.sess.headers = {"Accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-Hans-CN, zh-Hans; q=0.5",
"Cache-Control": "no-cache",
"Connection": "Keep-Alive",
"Content-Length": "561",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Cookie": "SESSION=abcdefg",
"Host": "www.target.com",
"Referer": "https://www.target.com/path",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
"X-Requested-With": "XMLHttpRequest"
}
def get_cookie(self):
self.driver.find_element_by_xpath("//input[@class="e.g:trigger btn"]").click()
cookies = {_["name"]: _["value"] for _ in self.driver.get_cookies()}
return cookies
def get_headers(self):
cookies = self.get_cookie()
token = self.driver.execute_script("return window.sessionStorage.getItem("token")")
self.sess.headers.update({
"Authorization": token,
"Cookie": f"SESSION={cookies["SESSION"]}; acw_tc={cookies["acw_tc"]}"
})
@retry((json.decoder.JSONDecodeError, simplejson.errors.JSONDecodeError, KeyError, ValueError), tries=3, delay=1)
def do_api(self):
url = "https://www.target.com/api/path"
payload = {
"params": "31b1xu0",
}
self.get_headers()
resp = self.sess.post(url, json=payload, verify=False, timeout=10)
if resp.status_code == 200:
self.pre_api_task(resp.json()) # do what you need todo
else:
raise ValueError(f"do_api failed:: {resp.text}")
def do_selenium_command(self):
self.driver.execute_script("$("p[class=imgShow]").click()")
self.driver.execute_script("document.getElementsByClassName("supportRadioOptional1 checked")[0].click();")
pagenum = int(re.search(r"共 (d+) 页", self.driver.page_source).group(1))
for _ in trange(pagenum, ncols=40):
self.pre_page_task()# do what you need todo
self.driver.execute_script(f"PaginationpageTable.gotoPage("next", "{_+2}", "50");")
def main(self):
self.do_selenium_command()
self.do_api()
if __name__ == "__main__":
requests.get("127.0.0.1:5000")

基于前面说的短信验证码,让甲方登录后selenium一顿操作就把api的headers补完了,可以愉快地请求接口了


需要js取参数的话可以这样写token = self.driver.execute_script("return window.sessionStorage.getItem("token")")


目前遇到的一些注意点:


渲染的页面带frame,需要switch_to再xpath等处理,可把driver.page_source写进文件判断是否该目标页顺带测定位
有时driver.find_element_by_*无法定位,试试用js;有些JS/Jquery功能在老版IE上用不了,回用mouse处理(套娃呢喂);连续使用js时要注意响应等待时间
basepath处用"./"取巧了一下(与pyinstaller打包有关),可以基于此变量做一些本地文件处理
Last

毕竟最终是为甲方做的,程序要以甲方设备为准 即使它是win7,用pywin32定位句柄出现兼容问题即使业务网站只兼容IE内核,js部分功能无法用头发掉光了啊


毕竟是个人摸索出的,可能有更优解,如大佬路过还请不要吝啬交(p)流(y)一下心得


版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://www.cnblogs.com/jsoneri/p/14287930.html

随机推荐

spring ioc

目录springioc1控制反转,创建对象1.1 无参构造函数1.2set方式1.3工厂模式 2依赖注入2.1手动注入2.1.1有参构造函数注入2.1.2set方法注入2.2自动注入2....

yang_zzu 阅读(881)

动态分辨率是什么意思_数字化仪的8大动态参数

关注我们更多精彩等你发现!在选择用于获取电子信号的数字化仪时,考虑的主要规格往往是采样率,带宽和分辨率。前两个参数有助于定义数字化仪能够捕获的最大频率范围。作为精确和可重复...

weixin_39937447 阅读(664)

关于字符串(压缩与解压)

这次是第二篇了最近发现自己字符串学的好差然后写到了两道好玩的题关于字符串的下面先挂第一道简单的题目如果字符串中出现了连续的N(N>1)个相同字符串S,那么它们将会被压缩为[N|S]...

郑大偷懒第一名 阅读(448)

切比雪夫距离 ( Chebyshev Distance )

切比雪夫距离ChebyshevDistance前言一、简介二、自己的理解总结前言听说进互联网大厂都要刷题,我虽然是做Androidgraphics,不是做互联网的ÿ...

程序员小W和他的小伙伴们 阅读(913)

移动/web端性能指标

移动端性能指标1、内存80%2、CPU3、流量4、电量5、启动速度6、滑动速度、界面切换速度7、与服务器交互的网络速度web端的性能测试1.cpu2.内存3.网络4.IO指标5.磁盘6.兼容...

A停车做枫林晚 阅读(929)