import time
from lxml import etree
from appium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
PLATFROM = "Android"
DEVIE_NAME = "0815F8612D924005"
APP_PACKAGE = "com.tencent.mm"
APP_ACTIVITY = ".ui.LauncherUI"
DEIVER_SERVER = "http://localhost:4723/wd/hub"
TIMROUT = 15 # 单位秒
FLICK_START_X = 300
FLICK_START_Y = 300
FLICK_DISTANCE = 700
class WX(object):
def __init__(self):
"""
初始化操作
"""
# 驱动配置操作
self.desired_caps = {
"platformName": PLATFROM,
"deviceName": DEVIE_NAME,
"appPackage": APP_PACKAGE,
"appActivity": APP_ACTIVITY,
"noReset": True,
"chromeOptions": {
"androidProcess": "com.tencent.mm:tools"
},
"chromedriverExecutable": "D:\\Program Files (x86)\\Appium\\node_modules\\appium\\node_modules\\appium-chromedriver\\chromedriver\\win\\chromedriver.exe",
"recreateChromeDriverSessions": True # 如果需要切换到H5页面 这一句就很重要
}
self.driver = webdriver.Remote(DEIVER_SERVER, self.desired_caps)
self.wait = WebDriverWait(self.driver, TIMROUT)
def enter(self, name):
# 切换到联系人
tab = self.wait.until(
EC.presence_of_all_elements_located((By.XPATH, '//*[@resource-id="com.tencent.mm:id/po"]')))[0]
tab.click()
# 点击公众号
tab = self.wait.until(
EC.presence_of_all_elements_located((By.XPATH, """//*[@resource-id="com.tencent.mm:id/a2n"]""")))[0]
tab.click()
for i in range(50):
# 因为只能点击当前页面的标签,所以需要循环向下滑动,找到需要点击的标签,在进行点击
# 滑动查找需要爬取的公众号,并点击进入这个公众号
tab_list = self.wait.until(
EC.presence_of_all_elements_located((By.XPATH, """//*[@resource-id="com.tencent.mm:id/a6e"]""")))
for tab in tab_list:
name1 = tab.get_attribute("text")
if name == name1:
tab.click()
# 点击右上角进历史页面
tab = self.wait.until(EC.presence_of_element_located(
(By.XPATH, """//*[@resource-id="com.tencent.mm:id/j1"]""")))
tab.click()
self.driver.swipe(FLICK_START_X, FLICK_START_Y + FLICK_DISTANCE, FLICK_START_X, FLICK_START_Y, 1000)
time.sleep(1)
self.driver.swipe(FLICK_START_X, FLICK_START_Y + FLICK_DISTANCE, FLICK_START_X, FLICK_START_Y, 1000)
tab = self.wait.until(EC.presence_of_element_located(
(By.XPATH, """//*[@resource-id="com.tencent.mm:id/avt"]""")))
tab.click()
return
self.driver.swipe(FLICK_START_X, FLICK_START_Y + FLICK_DISTANCE, FLICK_START_X, FLICK_START_Y, 1000)
time.sleep(1)
# 点击右上角进入公众号的介绍页面
def crawl(self, name):
time.sleep(3)
# 滑动结束以后,需要一段时间让手机反应
self.driver.switch_to.context("WEBVIEW_com.tencent.mm:tools") # 切换到H5页面
print(self.driver.contexts)
for link in self.driver.find_elements_by_xpath("//*[@href]"):
print(link.get_attribute('href'))
return link.get_attribute('href')
self.driver.switch_to.context("NATIVE_APP") # 切换回去 native 页面
if __name__ == '__main__':
wx = WX()
name_list = ["浙能俪都钱塘", "尖叫设计"]
for name in name_list:
wx.enter(name)
wx.crawl(name)
先关注公众号浙能俪都钱塘、尖叫设计,运行代码,能进入公众号全部历史信息页面并滑动,之后停止运行,显示 timeout 错误,无法获取 H5 页面链接 print(link.get_attribute('href'))