API 概覽 && 編碼Tips
文檔地址
github Chrome DevTools Protocol 協(xié)議本身的倉庫 有問題可以在這里提issue
github debugger-protocol-viewer 協(xié)議API文檔的倉庫
API 文檔地址 API展示的地方,這個經(jīng)常用
常用API
Network 網(wǎng)絡(luò)請求、Cookie、緩存、證書等相關(guān)內(nèi)容
Page 頁面的加載、資源內(nèi)容、彈層、截圖、打印等相關(guān)內(nèi)容
DOM 文檔DOM的獲取、修改、刪除、查詢等相關(guān)內(nèi)容
Runtime JavaScript代碼的執(zhí)行,這里面我們可以搞事情~~
編碼Tips
我們這里不會直接調(diào)用Websocket相關(guān)的內(nèi)容來調(diào)用chrome的調(diào)試命令,而是用chrome-remote-interface 這個封裝的庫來做,它是基于Promise風(fēng)格的
每一個功能塊成為一個單獨的
domain
,像Network,Page,DOM等都是不同的domain
幾乎每一個個頭大的
domain
都有enable
方法,需要先調(diào)用這個方法啟用之后再使用各個
domain
的接口方法參數(shù)都是第一個對象或者說一個Map,不用考慮參數(shù)的位置了各個
domain
的接口返回值也是一個對象,取對應(yīng)的key就行參數(shù)值和返回值經(jīng)常是meta信息,經(jīng)常是各種對象的id信息,而不是具體的對象內(nèi)容(這里可能需要切一下風(fēng)格)
編碼實例
首先做一個簡單的封裝,準(zhǔn)備API的執(zhí)行環(huán)境,具體可參考前一篇關(guān)于工具庫的。
const chromeLauncher = require('chrome-launcher');const chromeRemoteInterface = require('chrome-remote-interface');const prepareAPI = (config = {}) => { const {host = 'localhost', port = 9222, autoSelectChrome = true, headless = true} = config; const wrapperEntry = chromeLauncher.launch({ host, port, autoSelectChrome, additionalFlags: [ '--disable-gpu', headless ? '--headless' : '' ] }).then(chromeInstance => { const remoteInterface = chromeRemoteInterface(config).then(chromeAPI => chromeAPI).catch(err => { throw err; }); return Promise.all([chromeInstance, remoteInterface]) }).catch(err => { throw err }); return wrapperEntry};
打開百度,獲取頁面性能數(shù)據(jù),參考 Navigation Timing W3C規(guī)范
const wrapper = require('the-wrapper-module');const performanceParser = (perforceTiming) => { let timingGather = {}; perforceTiming = perforceTiming || {}; timingGather.redirect = perforceTiming.redirectEnd - perforceTiming.redirectEnd-perforceTiming.redirectStart; timingGather.dns = perforceTiming.domainLookupEnd - perforceTiming.domainLookupStart; timingGather.tcp = perforceTiming.connectEnd - perforceTiming.connectStart; timingGather.request = perforceTiming.responseStart - perforceTiming.requestStart; timingGather.response = perforceTiming.responseEnd - perforceTiming.responseStart; timingGather.domReady = perforceTiming.domContentLoadedEventStart - perforceTiming.navigationStart; timingGather.load = perforceTiming.loadEventStart - perforceTiming.navigationStart; return timingGather;};const showPerformanceInfo = (performanceInfo) => { performanceInfo = performanceInfo || {}; console.log(`頁面重定向耗時:${performanceInfo.redirect}`); console.log(`DNS查找耗時:${performanceInfo.dns}`); console.log(`TCP連接耗時:${performanceInfo.tcp}`); console.log(`請求發(fā)送耗時:${performanceInfo.request}`); console.log(`響應(yīng)接收耗時:${performanceInfo.response}`); console.log(`DOMReady耗時:${performanceInfo.domReady}`); console.log(`頁面加載耗時:${performanceInfo.load}`);};wrapper.prepareAPI().then(([chromeInstance, remoteInterface]) => { const {Runtime,Page} = remoteInterface; Page.loadEventFired(() => { Runtime.evaluate({ expression:'window.performance.timing.toJSON()', returnByValue:true //不加這個參數(shù),拿到的是一個對象的meta信息,還需要getProperties }).then((resultObj) => { let {result,exceptionDetails} = resultObj; if(!exceptionDetails){ showPerformanceInfo(performanceParser(result.value)) }else{ throw exceptionDetails; } }); }); Page.enable().then(() => { Page.navigate({ url:'http://www.baidu.com' }) });});
打開百度 搜索Web自動化 headless chrome
,并爬取首屏結(jié)果鏈接
const wrapper = require('the-wrapper-module');//有this的地方寫成箭頭函數(shù)要注意,這里會有問題const buttonClick = function () { this.click();};const setInputValue = () => { var input = document.getElementById('kw'); input.value = 'Web自動化 headless chrome';};const parseSearchResult = () => { let resultList = []; const linkBlocks = document.querySelectorAll('div.result.c-container'); for (let block of Array.from(linkBlocks)) { let targetObj = block.querySelector('h3'); resultList.push({ title: targetObj.textContent, link: targetObj.querySelector('a').getAttribute('href') }); } return resultList;};wrapper.prepareAPI({ // headless: false //加上這行代碼可以查看瀏覽器的變化}).then(([chromeInstance, remoteInterface]) => { const {Runtime, DOM, Page, Network} = remoteInterface; let framePointer; Promise.all([Page.enable(), Network.enable(), DOM.enable(),Page.setAutoAttachToCreatedPages({autoAttach:true})]).then(() => { Page.domContentEventFired(() => { console.log('Page.domContentEventFired') Runtime.evaluate({ expression:`window.location.href`, returnByValue:true }).then(result => { console.log(result) }) }); Page.frameNavigated(() => { console.log('Page.frameNavigated') Runtime.evaluate({ expression:`window.location.href`, returnByValue:true }).then(result => { console.log(result) }) }) Page.loadEventFired(() => { console.log('Page.loadEventFired') Runtime.evaluate({ expression:`window.location.href`, returnByValue:true }).then(result => { console.log(result) }) DOM.getDocument().then(({root}) => { //百度首頁表單 DOM.querySelector({ nodeId: root.nodeId, selector: '#form' }).then(({nodeId}) => { Promise.all([ //找到 搜索框填入值 DOM.querySelector({ nodeId: nodeId, selector: '#kw' }).then((inputNode) => { Runtime.evaluate({ // 兩種寫法 // expression:'document.getElementById("kw").value = "Web自動化 headless chrome"', expression: `(${setInputValue})()` }); //這段代碼不起作用 日狗 // DOM.setNodeValue({ // nodeId:inputNode.nodeId, // value:'Web自動化 headless chrome' // }); //上面的代碼需求要這么寫 // DOM.setAttributeValue({ // nodeId:inputNode.nodeId, // name:'value', // value:'headless chrome' // }); }) //找到 提交按鈕setInputValue , DOM.querySelector({ nodeId, selector: '#su' }) ]).then(([inputNode, buttonNode]) => { Runtime.evaluate({ expression: 'document.getElementById("kw").value', }).then(({result}) => { console.log(result) }); return DOM.resolveNode({ nodeId: buttonNode.nodeId }).then(({object}) => { const {objectId} = object; return Runtime.callFunctionOn({ objectId, functionDeclaration: `${buttonClick}` }) }); }).then(() => { setTimeout(() => { Runtime.evaluate({ expression: `(${parseSearchResult})()`, returnByValue: true }).then(({result}) => { console.log(result.value) //百度的URL有加密,需要再請求一次拿到真實URL }) },3e3) }); }) }); }); Page.navigate({ url: 'http://www.baidu.com' }).then((frameObj) => { framePointer = frameObj }); })
http://www.cnblogs.com/rawbin/p/7135417.html