百度搜索自动化

下面就通过例子:百度搜索自动化,来说明不同平台的自动化具体是什么样子:

PC端

selenium

代码:

  • 文件:seleniumDemoBaiduSearch.py
  • 贴出来是

    # Function: demo selenium do baidu search and extract result
    # Author: Crifan Li
    # Update: 20210327
    
    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    
    from bs4 import BeautifulSoup
    import re
    
    chromeDriver = webdriver.Chrome()
    
    ################################################################################
    # Open url
    ################################################################################
    baiduUrl = "https://www.baidu.com"
    chromeDriver.get(baiduUrl)
    print("title=%s" % chromeDriver.title)
    
    assert chromeDriver.title == "百度一下,你就知道"
    # assert '百度' in chromeDriver.title
    
    ################################################################################
    # Find/Locate search button
    ################################################################################
    SearchInputId = "kw"
    searchInputElem = chromeDriver.find_element_by_id(SearchInputId)
    print("searchInputElem=%s" % searchInputElem)
    # searchInputElem=<selenium.webdriver.remote.webelement.WebElement (session="e0d8b72f2fc31e27220f66fcbdb22bfc", element="21aa0e9d-04fa-4385-b16b-d82998068887")>
    
    ################################################################################
    # Input text
    ################################################################################
    searchInputElem.clear()
    print("Clear existed content")
    
    searchStr = "crifan"
    searchInputElem.send_keys(searchStr)
    print("Entered %s to search box" % searchStr)
    
    ################################################################################
    # Click button
    ################################################################################
    
    # Method 1: emulate press Enter key
    # searchButtonElem.send_keys(Keys.RETURN)
    # print("Pressed Enter/Return key")
    
    # Method 2: find button and click
    BaiduSearchId = "su"
    baiduSearchButtonElem = chromeDriver.find_element_by_id(BaiduSearchId)
    print("baiduSearchButtonElem=%s" % baiduSearchButtonElem)
    baiduSearchButtonElem.click()
    print("Clicked button %s" % baiduSearchButtonElem)
    
    ################################################################################
    # Wait page change/loading completed
    #   -> following element makesure show = visible
    #   -> otherwise possibly can NOT find elements
    ################################################################################
    MaxWaitSeconds = 10
    numTextElem = WebDriverWait(chromeDriver, MaxWaitSeconds).until(
        EC.presence_of_element_located((By.XPATH, "//span[@class='nums_text']"))
    )
    print("Search complete, showing: %s" % numTextElem)
    
    ################################################################################
    # Extract result
    ################################################################################
    
    # Method 1: use Selenium to extract title list
    searchResultAList = chromeDriver.find_elements_by_xpath("//h3[contains(@class, 't')]/a")
    print("searchResultAList=%s" % searchResultAList)
    searchResultANum = len(searchResultAList)
    print("searchResultANum=%s" % searchResultANum)
    for curIdx, curSearchResultAElem in enumerate(searchResultAList):
      curNum = curIdx + 1
      print("%s [%d] %s" % ("-"*20, curNum, "-"*20))
      baiduLinkUrl = curSearchResultAElem.get_attribute("href")
      print("baiduLinkUrl=%s" % baiduLinkUrl)
      title = curSearchResultAElem.text
      print("title=%s" % title)
    
    # # Method 2: use BeautifulSoup to extract title list
    # curHtml = chromeDriver.page_source
    # curSoup = BeautifulSoup(curHtml, 'html.parser')
    # beginTP = re.compile("^t.*")
    # searchResultH3List = curSoup.find_all("h3", {"class": beginTP})
    # print("searchResultH3List=%s" % searchResultH3List)
    # searchResultH3Num = len(searchResultH3List)
    # print("searchResultH3Num=%s" % searchResultH3Num)
    # for curIdx, searchResultH3Item in enumerate(searchResultH3List):
    #   curNum = curIdx + 1
    #   print("%s [%d] %s" % ("-"*20, curNum, "-"*20))
    #   aElem = searchResultH3Item.find("a")
    #   # print("aElem=%s" % aElem)
    #   baiduLinkUrl = aElem.attrs["href"]
    #   print("baiduLinkUrl=%s" % baiduLinkUrl)
    #   title = aElem.text
    #   print("title=%s" % title)
    
    ################################################################################
    # End close
    ################################################################################
    chromeDriver.close()
    

效果:

puppeteer

代码:

  • 文件:puppeteerDemoBaiduSearch.py
  • 贴出来是

    # Function: pyppeteer (python version puppeteer) do baidu search
    # Author: Crifan Li
    # Update: 20210330
    
    import asyncio
    from pyppeteer import launch
    
    async def main():
        browser = await launch(headless=False)
        page = await browser.newPage()
    
        await page.setJavaScriptEnabled(enabled=True)
    
        baiduUrl = "https://www.baidu.com"
        await page.goto(baiduUrl)
        # await page.screenshot({'path': 'baidu.png'})
    
        ################################################################################
        # Input text
        ################################################################################
        searchStr = "crifan"
    
        # SearchInputSelector = "input[id=kw]"
        SearchInputSelector = "input[id='kw']"
    
        # SearchInputXpath = "//input[@id='kw']"
        # searchInputElem = page.xpath(SearchInputXpath)
    
        # # Input method 1: selector + click + keyboard type
        # searchInputElem = await page.querySelector(SearchInputSelector)
        # print("searchInputElem=%s" % searchInputElem)
        # await searchInputElem.click()
        # await page.keyboard.type(searchStr)
    
        # Input method 2: focus then type
        # await page.focus(SearchInputSelector)
        # await page.keyboard.type(searchStr)
    
        # Input method 3: selector and input once using type
        await page.type(SearchInputSelector, searchStr, delay=20)
    
        ################################################################################
        # Trigger search
        ################################################################################
    
        # Method 1: press ENTER key
        await page.keyboard.press('Enter')
    
        # # Method 2: locator search button then click
        # SearchButtonSelector = "input[id='su']"
        # searchButtonElem = await page.querySelector(SearchButtonSelector)
        # print("searchButtonElem=%s" % searchButtonElem)
        # await searchButtonElem.click()
        # # await searchButtonElem.press("Enter")
    
        ################################################################################
        # Wait page reload complete
        ################################################################################
        SearchFoundWordsSelector = 'span.nums_text'
        SearchFoundWordsXpath = "//span[@class='nums_text']"
    
        # await page.waitForSelector(SearchFoundWordsSelector)
        # await page.waitFor(SearchFoundWordsSelector)
        # await page.waitForXPath(SearchFoundWordsXpath)
        # Note: all above exception: 发生异常: ElementHandleError Evaluation failed: TypeError: MutationObserver is not a constructor
        #   so change to following
    
        # # Method 1: just wait
        # await page.waitFor(2000) # millisecond
    
        # Method 2: wait element showing
        SingleWaitSeconds = 1
        while not await page.querySelector(SearchFoundWordsSelector):
          print("Still not found %s, wait %s seconds" % (SearchFoundWordsSelector, SingleWaitSeconds))
          await asyncio.sleep(SingleWaitSeconds)
          # pass
    
        ################################################################################
        # Extract result
        ################################################################################
    
        resultASelector = "h3[class^='t'] a"
        searchResultAList = await page.querySelectorAll(resultASelector)
        # print("searchResultAList=%s" % searchResultAList)
        searchResultANum = len(searchResultAList)
        print("Found %s search result:" % searchResultANum)
        for curIdx, aElem in enumerate(searchResultAList):
          curNum = curIdx + 1
          print("%s [%d] %s" % ("-"*20, curNum, "-"*20))
          aTextJSHandle = await aElem.getProperty('textContent')
          # print("type(aTextJSHandle)=%s" % type(aTextJSHandle))
          # type(aTextJSHandle)=<class 'pyppeteer.execution_context.JSHandle'>
          # print("aTextJSHandle=%s" % aTextJSHandle)
          # aTextJSHandle=<pyppeteer.execution_context.JSHandle object at 0x10309c9b0>
          title = await aTextJSHandle.jsonValue()
          # print("type(title)=%s" % type(title))
          # type(title)=<class 'str'>
          print("title=%s" % title)
    
          baiduLinkUrl = await (await aElem.getProperty("href")).jsonValue()
          print("baiduLinkUrl=%s" % baiduLinkUrl)
    
        await browser.close()
    
    asyncio.get_event_loop().run_until_complete(main())
    

效果:

playwright

代码:

  • 文件:playwrithDemoBaiduSearch.py
  • 贴出来是

    # Function: Playwright demo baidu search
    # Author: Crifan Li
    # Update: 20210331
    
    from playwright.sync_api import sync_playwright
    
    # here use sync mode
    with sync_playwright() as p:
        chromiumBrowserType = p.chromium
        print("chromiumBrowserType=%s" % chromiumBrowserType)
        browser = chromiumBrowserType.launch(headless=False)
        # chromiumBrowserType=<BrowserType name=chromium executable_path=/Users/limao/Library/Caches/ms-playwright/chromium-857950/chrome-mac/Chromium.app/Contents/MacOS/Chromium>
        print("browser=%s" % browser)
        # browser=<Browser type=<BrowserType name=chromium executable_path=/Users/limao/Library/Caches/ms-playwright/chromium-857950/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=90.0.4430.0>
        page = browser.new_page()
        print("page=%s" % page)
        # page=<Page url='about:blank'>
    
        ################################################################################
        # Open url
        ################################################################################
        page.goto('http://www.baidu.com')
        print("page=%s" % page)
        # page=<Page url='https://www.baidu.com/'>
    
        ################################################################################
        # Input text
        ################################################################################
        searchStr = "crifan"
        SearchInputSelector = "input#kw.s_ipt"
    
        # page.click(SearchInputSelector)
        page.fill(SearchInputSelector, searchStr)
    
        ################################################################################
        # Trigger search
        ################################################################################
        EnterKey = "Enter"
    
        # Method 1: press Enter key
        # page.keyboard.press(EnterKey)
    
        # Method 2: locate element then click
        SearchButtonSelector = "input#su"
        page.press(SearchButtonSelector, EnterKey)
    
        # wait -> makesure element visible
        SearchFoundWordsSelector = 'span.nums_text'
        # SearchFoundWordsXpath = "//span[@class='nums_text']"
        page.wait_for_selector(SearchFoundWordsSelector, state="visible")
    
        ################################################################################
        # Extract content
        ################################################################################
        resultASelector = "h3[class^='t'] a"
        searchResultAList = page.query_selector_all(resultASelector)
        print("searchResultAList=%s" % searchResultAList)
        # searchResultAList=[<JSHandle [email protected]<a target="_blank" href="http://www.baidu.com/link?…>在路上on the way - 走别人没走过的路,让别人有路可走</a>>, <JSHandle [email protected]>, 。。。, <JSHandle [email protected]>]
        searchResultANum = len(searchResultAList)
        print("Found %s search result:" % searchResultANum)
        for curIdx, aElem in enumerate(searchResultAList):
            curNum = curIdx + 1
            print("%s [%d] %s" % ("-"*20, curNum, "-"*20))
            title = aElem.text_content()
            print("title=%s" % title)
            # title=在路上on the way - 走别人没走过的路,让别人有路可走
            baiduLinkUrl = aElem.get_attribute("href")
            print("baiduLinkUrl=%s" % baiduLinkUrl)
            # baiduLinkUrl=http://www.baidu.com/link?url=fB3F0xZmwig9r2M_1pK4BJG00xFPLjJ85X39GheP_fzEA_zJIjX-IleEH_ZL8pfo
    
        # do sceeenshot
        screenshotFilename = 'baidu_search_%s_result.png' % searchStr
        page.screenshot(path=screenshotFilename)
    
        browser.close()
    

效果:

移动端

Android端

uiautomator2

代码:

# Function: uiautomator2 demo baidu search
# Author: Crifan Li
# Update: 20210417

# import time
import uiautomator2 as u2

d = u2.connect() # connect to device
print("d.info=%s" % d.info)
# d.info={'currentPackageName': 'com.android.browser', 'displayHeight': 2201, 'displayRotation': 0, 'displaySizeDpX': 393, 'displaySizeDpY': 873, 'displayWidth': 1080, 'productName': 'atom', 'screenOn': True, 'sdkInt': 29, 'naturalOrientation': True}

# for debug: get current app info
# curApp = d.app_current()
# print("curApp=%s" % curApp)

# for debug: get running app list
# activeAppList = d.app_list_running()
# print("activeAppList=%s" % activeAppList)

################################################################################
# Launch browser
################################################################################
Browser_XiaomiBuiltin = "com.android.browser"
browserPackage = Browser_XiaomiBuiltin
# d.app_start(browserPackage)
d.app_start(browserPackage, stop=True)

# wait util browser launch complete -> appear 我的 tab
# MustShowTabName = "主页"
MustShowTabName = "我的"
# d(text=MustShowTabName).exists(timeout=10)
d(text=MustShowTabName, packageName=browserPackage).exists(timeout=10)
print("Browser homepage loaded")

################################################################################
# Open baidu homepage
################################################################################
SearchInputId = "com.android.browser:id/b4w"

# # open new window
# windowUiObj = d(resourceId="com.android.browser:id/dm")
# windowUiObj.click()

# # click add to new window
# addNewWindowUiObj = d(resourceId="com.android.browser:id/akr")
# addNewWindowUiObj.click()


# for debug
# curPageXml = d.dump_hierarchy(compressed=False, pretty=False)
# print("curPageXml=%s" % curPageXml)

# find input box inside address bar

# # Method 1: use driver pass in parameter
# inputUiObj = d(resourceId=SearchInputId, className="android.widget.TextView")
# # inputUiObj = d(resourceId=SearchInputId)
# print("type(inputUiObj)=%s" % type(inputUiObj)) # type(inputUiObj)=<class 'uiautomator2.session.UiObject'>
# print("inputUiObj=%s" % inputUiObj) # inputUiObj=<uiautomator2.session.UiObject object at 0x10a0bea00>
# inputUiObjectInfo = inputUiObj.info
# print("type(inputUiObjectInfo)=%s" % type(inputUiObjectInfo)) # type(inputUiObjectInfo)=<class 'dict'>
# print("inputUiObjectInfo=%s" % inputUiObjectInfo) # inputUiObjectInfo={'bounds': {'bottom': 172, 'left': 160, 'right': 797, 'top': 107}, 'childCount': 0, 'className': 'android.widget.TextView', 'contentDescription': '搜索框', 'packageName': 'com.android.browser', 'resourceName': 'com.android.browser:id/b4h', 'text': '', 'visibleBounds': {'bottom': 172, 'left': 160, 'right': 797, 'top': 107}, 'checkable': False, 'checked': False, 'clickable': True, 'enabled': True, 'focusable': False, 'focused': False, 'longClickable': False, 'scrollable': False, 'selected': False}
# isFoundInput = inputUiObj.exists # True

# # Method 2: use xpath
# inputXpathSelector = d.xpath("//android.widget.TextView[@resource-id=SearchInputId]")
# # inputXpathSelector = d.xpath("//*[@resource-id=SearchInputId]")
# print("type(inputXpathSelector)=%s" % type(inputXpathSelector)) # type(inputXpathSelector)=<class 'uiautomator2.xpath.XPathSelector'>
# inputXpathElem = inputXpathSelector.get()
# print("type(inputXpathElem)=%s" % type(inputXpathElem)) # type(inputXpathElem)=<class 'uiautomator2.xpath.XMLElement'>
# print("inputXpathElem=%s" % inputXpathElem) # inputXpathElem=<uiautomator2.xpath.XMLElement object at 0x108585d30>
# print("type(inputXpathElem.attrib)=%s" % type(inputXpathElem.attrib)) # type(inputXpathElem.attrib)=<class 'lxml.etree._Attrib'>
# print("inputXpathElem.attrib=%s" % inputXpathElem.attrib) # inputXpathElem.attrib={'index': '1', 'text': '', 'resource-id': 'com.android.browser:id/b4h', 'package': 'com.android.browser', 'content-desc': '搜索框', 'checkable': 'false', 'checked': 'false', 'clickable': 'true', 'enabled': 'true', 'focusable': 'false', 'focused': 'false', 'scrollable': 'false', 'long-clickable': 'false', 'password': 'false', 'selected': 'false', 'visible-to-user': 'true', 'bounds': '[160,107][797,172]'}
# isFoundInput = inputXpathSelector.exists # True


# trigger into input page

# Method 1
inputUiObj = d(resourceId=SearchInputId, className="android.widget.TextView")
inputUiObj.click()
print("Clicked search box")

# # Method 2
# inputXpathSelector = d.xpath("//android.widget.TextView[@resource-id=%s]" % SearchInputId)
# inputXpathSelector.click()

# input baidu homr url
BaiduHomeUrl = "https://www.baidu.com/"
AddressInputId = "com.android.browser:id/bqi"
searchUiObj = d(resourceId=AddressInputId, className="android.widget.EditText")
searchUiObj.set_text(BaiduHomeUrl)
print("Inputed baidu homepage url: %s" % BaiduHomeUrl)

# trigger jump to baidu home
EnterKey = "enter"
d.press(EnterKey)
print("Emulated press key %s" % EnterKey)

# wait util baidu home loaded
# d(text="百度一下", resourceId="com.android.browser:id/bq3").exists(timeout=10)
d(text="百度一下,你就知道", className="android.view.View").exists(timeout=10)
print("Baidu home loaded")

################################################################################
# Input text
################################################################################
searchStr = "crifan"

baiduSearchKeywordUiObj = d(resourceId="index-kw", className="android.widget.EditText")
baiduSearchKeywordUiObj.set_text(searchStr)
print("Inputed baidu search text %s" % searchStr)

################################################################################
# Trigger baidu search
################################################################################

# # Method 1: press key
# TriggerSearchKey = "enter" # work
# # TriggerSearchKey = "search" # not work
# # TriggerSearchKey = "go" # not work
# # TriggerSearchKey = "done" # not work
# d.press(TriggerSearchKey)
# print("Emulated press key %s" % TriggerSearchKey)

# Method 2: find 百度一下 button then click
baiduSearchButtonUiObj = d(resourceId="index-bn", className="android.widget.Button")
baiduSearchButtonUiObj.click()
print("Clicked baidu search button")

################################################################################
# Extract search result content
################################################################################

# Special: for fixbug of get page xml is not latest, so using following code to refresh to get latest page source xml
d.service("uiautomator").stop()
d.service("uiautomator").start()
# time.sleep(1)

# for debug
# get page source xml
# curPageXml = d.dump_hierarchy(compressed=False, pretty=False)
# print("curPageXml=%s" % curPageXml)
# with open("baidu_search_%s_result_pageSource_reloaded.xml" % searchStr, "w") as fp:
#     fp.write(curPageXml)

d(resourceId="results").exists(timeout=10)

# Note: following syntax can NOT find elements
# resultsSelector = d.xpath("//*[@resource-id='results']")
# titleButtonSelectorList = resultsSelector.xpath("//android.widget.Button[@clickable='true']").all()
# titleButtonSelectorList = resultsSelector.xpath(".//android.widget.Button[@clickable='true']").all()

# Xpath chain search can find elements
titleButtonElementList = d.xpath("//*[@resource-id='results']//android.widget.Button[@clickable='true']").all()
titleButtonNum = len(titleButtonElementList)
print("Found %s search result title" % titleButtonNum)

# descriptionElementList = d.xpath("//*[@resource-id='results']/android.view.View[1]/android.view.View[1]/android.view.View[2]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]").all()
descriptionElementList = d.xpath("//*[@resource-id='results']/android.view.View/android.view.View[1]/android.view.View[2]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]").all()
descriptionNum = len(descriptionElementList)
print("Found %s description" % descriptionNum)

# # sourceWebsiteElementList = d.xpath('//*[@resource-id="results"]/android.view.View/android.view.View[1]/android.view.View[2]/android.view.View[1]').all()
# sourceWebsiteElementList = d.xpath('//*[@resource-id="results"]/android.view.View/android.view.View[1]/android.view.View[2]/android.view.View[2]').all()
# sourceWebsiteNum = len(sourceWebsiteElementList)
# print("Found %s source website" % sourceWebsiteNum)

for curIdx, eachTitleButtonElement in enumerate(titleButtonElementList):
    curNum = curIdx + 1
    print("%s  [%d/%d] %s" % ("-"*20, curNum, titleButtonNum, "-"*20))
    # eachTitleButtonElemAttrib = eachTitleButtonElement.attrib
    # print("title attrib: %s" % eachTitleButtonElemAttrib)
    # curTitle = eachTitleButtonElemAttrib["text"]
    curTitle = eachTitleButtonElement.text
    print("title=%s" % curTitle)

    curDescriptionElem = descriptionElementList[curIdx]
    curDescription = curDescriptionElem.text
    print("description=%s" % curDescription)

    # curSourceWebsiteElem = sourceWebsiteElementList[curIdx]
    # curSourceWebsite = curSourceWebsiteElem.text
    # print("curSourceWebsite=%s" % curSourceWebsite)

print("Demo baidu search complete")

效果:

iOS端

facebook-wda

代码:

  • 文件:facebookWdaDemoBaiduSearch.py
  • 贴出来是

    # Function: facebook-wda demo baidu search
    # Author: Crifan Li
    # Update: 20210410
    
    import wda
    
    # for debug
    # Enable debug will see http Request and Response
    # wda.DEBUG = True
    
    c = wda.Client('http://localhost:8100')
    
    curStatus = c.status()
    print("curStatus=%s" % curStatus)
    

注:由于苹果开发者过期,导致:未完待续

详见:

【未解决】Mac中用facebook-wda自动操作安卓手机浏览器实现百度搜索

results matching ""

    No results matching ""