diff options
author | Petter Reinholdtsen <pere@hungry.com> | 2015-02-12 22:40:10 +0100 |
---|---|---|
committer | Petter Reinholdtsen <pere@hungry.com> | 2015-02-12 22:40:10 +0100 |
commit | 4010c1ece08cd3297830fdf8cd7dcb8eb57ca559 (patch) | |
tree | a5a19cf938886fd1849a62178d6dc4b54eb5fbd3 |
Start on framework.
-rw-r--r-- | README | 6 | ||||
-rw-r--r-- | netsniff.js | 143 | ||||
-rwxr-xr-x | runcheck | 7 | ||||
-rw-r--r-- | testurls.txt | 5 |
4 files changed, 161 insertions, 0 deletions
@@ -0,0 +1,6 @@ +Extract HAR formatted information for Norwegian web sites +========================================================= + +Using PhantomJS and the +https://github.com/ariya/phantomjs/blob/master/examples/netsniff.js +script. diff --git a/netsniff.js b/netsniff.js new file mode 100644 index 0000000..b702543 --- /dev/null +++ b/netsniff.js @@ -0,0 +1,143 @@ +if (!Date.prototype.toISOString) { + Date.prototype.toISOString = function () { + function pad(n) { return n < 10 ? '0' + n : n; } + function ms(n) { return n < 10 ? '00'+ n : n < 100 ? '0' + n : n } + return this.getFullYear() + '-' + + pad(this.getMonth() + 1) + '-' + + pad(this.getDate()) + 'T' + + pad(this.getHours()) + ':' + + pad(this.getMinutes()) + ':' + + pad(this.getSeconds()) + '.' + + ms(this.getMilliseconds()) + 'Z'; + } +} + +function createHAR(address, title, startTime, resources) +{ + var entries = []; + + resources.forEach(function (resource) { + var request = resource.request, + startReply = resource.startReply, + endReply = resource.endReply; + + if (!request || !startReply || !endReply) { + return; + } + + // Exclude Data URI from HAR file because + // they aren't included in specification + if (request.url.match(/(^data:image\/.*)/i)) { + return; + } + + entries.push({ + startedDateTime: request.time.toISOString(), + time: endReply.time - request.time, + request: { + method: request.method, + url: request.url, + httpVersion: "HTTP/1.1", + cookies: [], + headers: request.headers, + queryString: [], + headersSize: -1, + bodySize: -1 + }, + response: { + status: endReply.status, + statusText: endReply.statusText, + httpVersion: "HTTP/1.1", + cookies: [], + headers: endReply.headers, + redirectURL: "", + headersSize: -1, + bodySize: startReply.bodySize, + content: { + size: startReply.bodySize, + mimeType: endReply.contentType + } + }, + cache: {}, + timings: { + blocked: 0, + dns: -1, + connect: -1, + send: 0, + wait: startReply.time - request.time, + receive: endReply.time - startReply.time, + ssl: -1 + }, + pageref: address + }); + }); + + return { + log: { + version: '1.2', + creator: { + name: "PhantomJS", + version: phantom.version.major + '.' + phantom.version.minor + + '.' + phantom.version.patch + }, + pages: [{ + startedDateTime: startTime.toISOString(), + id: address, + title: title, + pageTimings: { + onLoad: page.endTime - page.startTime + } + }], + entries: entries + } + }; +} + +var page = require('webpage').create(), + system = require('system'); + +if (system.args.length === 1) { + console.log('Usage: netsniff.js <some URL>'); + phantom.exit(1); +} else { + + page.address = system.args[1]; + page.resources = []; + + page.onLoadStarted = function () { + page.startTime = new Date(); + }; + + page.onResourceRequested = function (req) { + page.resources[req.id] = { + request: req, + startReply: null, + endReply: null + }; + }; + + page.onResourceReceived = function (res) { + if (res.stage === 'start') { + page.resources[res.id].startReply = res; + } + if (res.stage === 'end') { + page.resources[res.id].endReply = res; + } + }; + + page.open(page.address, function (status) { + var har; + if (status !== 'success') { + console.log('FAIL to load the address'); + phantom.exit(1); + } else { + page.endTime = new Date(); + page.title = page.evaluate(function () { + return document.title; + }); + har = createHAR(page.address, page.title, page.startTime, page.resources); + console.log(JSON.stringify(har, undefined, 4)); + phantom.exit(); + } + }); +} diff --git a/runcheck b/runcheck new file mode 100755 index 0000000..b9f54fc --- /dev/null +++ b/runcheck @@ -0,0 +1,7 @@ +#!/bin/sh + +cat testurls.txt | while read url ; do + filename=$(echo "$url" | cut -d/ -f3-| sed 's%/$%%' |tr / %) + echo "$url" + phantomjs netsniff.js "$url" > "har-data/$filename.har" +done diff --git a/testurls.txt b/testurls.txt new file mode 100644 index 0000000..05e6e88 --- /dev/null +++ b/testurls.txt @@ -0,0 +1,5 @@ +http://www.nrk.no/ +http://www.stortinget.no/ +http://www.aftenposten.no/ +http://www.vg.no/ +http://www.usit.uio.no/ |