initial working version

This commit is contained in:
2020-05-24 21:06:46 +03:00
commit c38cf8c8c9
14 changed files with 5618 additions and 0 deletions

8
.babelrc.js Normal file
View File

@@ -0,0 +1,8 @@
module.exports = {
presets: [
"@babel/preset-env"
],
plugins: [
'@babel/plugin-proposal-class-properties',
],
};

10
.editorconfig Normal file
View File

@@ -0,0 +1,10 @@
root = true
[*]
end_of_line = lf
insert_final_newline = true
[*.js]
charset = utf-8
indent_style = space
indent_size = 2

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
node_modules/
.vscode/
tmp/

5260
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

26
package.json Normal file
View File

@@ -0,0 +1,26 @@
{
"name": "maanteeamet-fetch",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"start": "nodemon --watch src --exec babel-node src/index.js"
},
"author": "",
"license": "ISC",
"dependencies": {
"cacheman": "2.2.1",
"cacheman-file": "0.2.1",
"jsdom": "16.2.2",
"node-fetch": "2.6.0",
"puppeteer": "^3.1.0"
},
"devDependencies": {
"@babel/cli": "7.8.4",
"@babel/core": "7.9.6",
"@babel/node": "7.8.7",
"@babel/plugin-proposal-class-properties": "7.8.3",
"@babel/preset-env": "7.9.6",
"nodemon": "2.0.4"
}
}

36
src/api/Api.js Normal file
View File

@@ -0,0 +1,36 @@
import fetch from 'node-fetch';
import { BASE_URL, BASE_HEADERS, BASE_PARAMS } from '../util/Constants';
const cookie = {
JSESSIONID: "ffu-WeUdcd3ICqoCimutvnpdERv3XONZ-VMxwg4-.eteenindusw-n1",
mntClientId: "15QY8LHHIST1C",
ROUTEID: ".et1",
plumbr_user_tracker: "a71e97ae-1d54-5fce-a758-cbf37e01c093",
plumbr_session_tracker_bkae1x: "004aec88-b58e-036b-2c05-ee309bfaa88b|1590327356024"
};
const formatCookie = 'JSESSIONID=OWbYYl4zGt6GOBW8nHFf9jX836540u0Bi-y5T6hf.eteenindusw-n1; mntClientId=15QY8LHHIST1C; ROUTEID=.et1; eteenindus_lang=en';
class Api {
constructor() {}
/**
* @param {string} plate @TODO implement usage
*/
async fetch(plate = "") {
try {
console.log(`Fetching data for ${plate}`);
return (await fetch(BASE_URL, {
"headers": {
...BASE_HEADERS,
"Cookie": formatCookie
},
...BASE_PARAMS,
})).text();
} catch (e) {
throw Error(`Failed to fetch data, reason: ${e.message}`);
}
}
}
export default Api;

36
src/api/Cache.js Normal file
View File

@@ -0,0 +1,36 @@
import Cacheman from 'cacheman';
import { CACHE } from '../util/Constants';
class Cache {
manager;
constructor() {
this.manager = new Cacheman({
ttl: CACHE.ttl,
engine: CACHE.engine,
tmpDir: CACHE.directory
});
}
formatKey(name) {
if (!name) {
throw Error('No number plate specified');
}
return `${CACHE.PREFIX.plate}${name}`;
}
async get(name) {
const key = this.formatKey(name);
return this.manager.get(key);
}
save(name, data) {
const key = this.formatKey(name);
if (!data) {
throw Error(`No data for caching car ${name}`);
}
this.manager.set(key, data);
}
}
export default Cache;

64
src/components/Scraper.js Normal file
View File

@@ -0,0 +1,64 @@
import jsdom from "jsdom";
import Car from "../model/Car";
import Selectors from '../util/Selectors';
const {
JSDOM
} = jsdom;
class Scraper {
document;
car;
constructor() {}
setContent(text) {
const parsedContent = new JSDOM(text).window.document;
if (parsedContent.querySelector(Selectors.properties.main.container) === null) {
throw Error('No data was received. Cookie is probably expired.')
}
this.document = parsedContent;
}
getTextBySelector(selector) {
return this.document.querySelector(selector).innerHTML;
}
scrapeMainProperties() {
const {
main: selector
} = Selectors.properties;
this.document
.querySelector(selector.container)
.querySelectorAll(selector.rows)
.forEach((field) => {
const value = field.querySelectorAll(selector.cell);
let data;
if (value[1].childElementCount > 0) {
data = value[1].querySelector(selector.irregularText).innerHTML;
} else {
data = value[1].innerHTML;
}
this.car[value[0].innerHTML] = data;
})
}
scrapeBasicProperties() {
if (!this.document) {
throw Error('No data to scrape.');
}
const properties = this.document.querySelector('#content');
const {
properties: selector
} = Selectors;
const plate = this.getTextBySelector(selector.plate);
const carName = this.getTextBySelector(selector.name);
const vin = this.getTextBySelector(selector.vin);
console.log(plate, carName, vin);
this.car = new Car(plate, carName, vin.substring(5));
return this.car;
}
}
export default Scraper;

View File

@@ -0,0 +1,58 @@
import puppeteer from 'puppeteer';
import {
SEARCH_URL,
NAVIGATION_TIMEOUT,
TEMP_DIR
} from '../util/Constants';
import Selectors from '../util/Selectors';
class CookieMonster {
cache;
browser;
page;
constructor(cache) {
this.cache = cache;
}
async submitForm(plate) {
await this.page.focus(Selectors.form.plate)
await this.page.keyboard.type(plate);
await this.page.evaluate(_ => {
PrimeFaces.ab({
s: "j_idt104:j_idt131",
u: "j_idt104"
});
});
await this.page.waitForNavigation({
timeout: NAVIGATION_TIMEOUT,
waitUntil: 'domcontentloaded',
});
}
async launchPage() {
this.browser = await puppeteer.launch();
this.page = await this.browser.newPage();
await this.page.goto(SEARCH_URL);
}
async cleanup(plate) {
await this.page.screenshot({
path: `${TEMP_DIR.screenshots}/${plate}.png`
});
await this.browser.close();
}
async init(plate) {
console.log(`Fetching data for ${plate}`);
await this.launchPage();
await this.submitForm(plate)
const pageContent = await this.page
.$eval(Selectors.container.main, (element) => element.innerHTML);
await this.cleanup(plate);
console.log(`Successfully fetched fresh data for ${plate}`);
return pageContent;
}
};
export default CookieMonster;

47
src/index.js Normal file
View File

@@ -0,0 +1,47 @@
import { BASE_URL, BASE_HEADERS, BASE_PARAMS } from './util/Constants';
import Car from './model/Car';
import Api from './api/Api';
import Cache from './api/Cache';
import Scraper from './components/Scraper';
import CookieMonster from './cookies/CookieMonster';
class Hack {
api;
scraper;
cache;
cookieMonster;
constructor() {
this.api = new Api();
this.scraper = new Scraper();
this.cache = new Cache();
this.cookieMonster = new CookieMonster(this.cache);
}
async getData(plate) {
let cached = await this.cache.get(plate);
if (cached) {
console.log(`Using cached data for ${plate}`);
return cached;
}
const data = await this.cookieMonster.init(plate);
this.cache.save(plate, data);
return data;
}
async init(plate) {
try {
const data = await this.getData(plate);
this.scraper.setContent(data);
} catch (e) {
console.error(e);
return;
}
this.scraper.scrapeBasicProperties();
this.scraper.scrapeMainProperties();
console.log(this.scraper.car);
}
}
const hack = new Hack();
hack.init('540BLG');

13
src/model/Car.js Normal file
View File

@@ -0,0 +1,13 @@
class Car {
plate;
name;
vin;
constructor(plate, name, vin) {
this.plate = plate;
this.name = name;
this.vin = vin;
}
}
export default Car;

34
src/util/Constants.js Normal file
View File

@@ -0,0 +1,34 @@
export const BASE_URL = 'https://eteenindus.mnt.ee/public/soidukDetailvaadeAvalik.jsf';
export const SEARCH_URL = 'https://eteenindus.mnt.ee/public/soidukTaustakontroll.jsf';
export const BASE_HEADERS = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.5',
'Connection': 'keep-alive',
'Host': 'eteenindus.mnt.ee',
'Referrer': BASE_URL,
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0',
'Upgrade-Insecure-Requests': '1',
};
export const BASE_PARAMS = {
'method': 'GET',
'mode': 'cors',
'credentials': 'include',
};
export const NAVIGATION_TIMEOUT = 2500;
export const CACHE = {
ttl: 600,
engine: 'file',
directory: 'tmp/cache',
PREFIX: {
plate: 'car-',
},
};
export const TEMP_DIR = {
screenshots: 'tmp/screenshots',
}

3
src/util/Formatter.js Normal file
View File

@@ -0,0 +1,3 @@
export const formatCookie = Object.entries(cookie)
.map(([key, value]) => `${key}=${value}`)
.join('; ');

20
src/util/Selectors.js Normal file
View File

@@ -0,0 +1,20 @@
export default {
form: {
plate: '#j_idt104\\:regMark',
},
container: {
main: '#content',
form: '#j_idt104',
},
properties: {
plate: '.content-title h1',
name: '.content-title p:first-of-type',
vin: '.content-title p:nth-of-type(2)',
main: {
container: '.asset',
rows: '.asset-details table tbody tr',
cell: 'td',
irregularText: 'span:first-child', // to get past spans, superscripts and such
},
},
};