initial working version
This commit is contained in:
8
.babelrc.js
Normal file
8
.babelrc.js
Normal file
@@ -0,0 +1,8 @@
|
||||
module.exports = {
|
||||
presets: [
|
||||
"@babel/preset-env"
|
||||
],
|
||||
plugins: [
|
||||
'@babel/plugin-proposal-class-properties',
|
||||
],
|
||||
};
|
||||
10
.editorconfig
Normal file
10
.editorconfig
Normal file
@@ -0,0 +1,10 @@
|
||||
root = true
|
||||
|
||||
[*]
|
||||
end_of_line = lf
|
||||
insert_final_newline = true
|
||||
|
||||
[*.js]
|
||||
charset = utf-8
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
node_modules/
|
||||
.vscode/
|
||||
tmp/
|
||||
5260
package-lock.json
generated
Normal file
5260
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
26
package.json
Normal file
26
package.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"name": "maanteeamet-fetch",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"start": "nodemon --watch src --exec babel-node src/index.js"
|
||||
},
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"cacheman": "2.2.1",
|
||||
"cacheman-file": "0.2.1",
|
||||
"jsdom": "16.2.2",
|
||||
"node-fetch": "2.6.0",
|
||||
"puppeteer": "^3.1.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/cli": "7.8.4",
|
||||
"@babel/core": "7.9.6",
|
||||
"@babel/node": "7.8.7",
|
||||
"@babel/plugin-proposal-class-properties": "7.8.3",
|
||||
"@babel/preset-env": "7.9.6",
|
||||
"nodemon": "2.0.4"
|
||||
}
|
||||
}
|
||||
36
src/api/Api.js
Normal file
36
src/api/Api.js
Normal file
@@ -0,0 +1,36 @@
|
||||
import fetch from 'node-fetch';
|
||||
import { BASE_URL, BASE_HEADERS, BASE_PARAMS } from '../util/Constants';
|
||||
|
||||
const cookie = {
|
||||
JSESSIONID: "ffu-WeUdcd3ICqoCimutvnpdERv3XONZ-VMxwg4-.eteenindusw-n1",
|
||||
mntClientId: "15QY8LHHIST1C",
|
||||
ROUTEID: ".et1",
|
||||
plumbr_user_tracker: "a71e97ae-1d54-5fce-a758-cbf37e01c093",
|
||||
plumbr_session_tracker_bkae1x: "004aec88-b58e-036b-2c05-ee309bfaa88b|1590327356024"
|
||||
};
|
||||
|
||||
const formatCookie = 'JSESSIONID=OWbYYl4zGt6GOBW8nHFf9jX836540u0Bi-y5T6hf.eteenindusw-n1; mntClientId=15QY8LHHIST1C; ROUTEID=.et1; eteenindus_lang=en';
|
||||
|
||||
class Api {
|
||||
constructor() {}
|
||||
|
||||
/**
|
||||
* @param {string} plate @TODO implement usage
|
||||
*/
|
||||
async fetch(plate = "") {
|
||||
try {
|
||||
console.log(`Fetching data for ${plate}`);
|
||||
return (await fetch(BASE_URL, {
|
||||
"headers": {
|
||||
...BASE_HEADERS,
|
||||
"Cookie": formatCookie
|
||||
},
|
||||
...BASE_PARAMS,
|
||||
})).text();
|
||||
} catch (e) {
|
||||
throw Error(`Failed to fetch data, reason: ${e.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export default Api;
|
||||
36
src/api/Cache.js
Normal file
36
src/api/Cache.js
Normal file
@@ -0,0 +1,36 @@
|
||||
import Cacheman from 'cacheman';
|
||||
import { CACHE } from '../util/Constants';
|
||||
|
||||
class Cache {
|
||||
manager;
|
||||
|
||||
constructor() {
|
||||
this.manager = new Cacheman({
|
||||
ttl: CACHE.ttl,
|
||||
engine: CACHE.engine,
|
||||
tmpDir: CACHE.directory
|
||||
});
|
||||
}
|
||||
|
||||
formatKey(name) {
|
||||
if (!name) {
|
||||
throw Error('No number plate specified');
|
||||
}
|
||||
return `${CACHE.PREFIX.plate}${name}`;
|
||||
}
|
||||
|
||||
async get(name) {
|
||||
const key = this.formatKey(name);
|
||||
return this.manager.get(key);
|
||||
}
|
||||
|
||||
save(name, data) {
|
||||
const key = this.formatKey(name);
|
||||
if (!data) {
|
||||
throw Error(`No data for caching car ${name}`);
|
||||
}
|
||||
this.manager.set(key, data);
|
||||
}
|
||||
}
|
||||
|
||||
export default Cache;
|
||||
64
src/components/Scraper.js
Normal file
64
src/components/Scraper.js
Normal file
@@ -0,0 +1,64 @@
|
||||
import jsdom from "jsdom";
|
||||
import Car from "../model/Car";
|
||||
import Selectors from '../util/Selectors';
|
||||
|
||||
const {
|
||||
JSDOM
|
||||
} = jsdom;
|
||||
|
||||
class Scraper {
|
||||
document;
|
||||
car;
|
||||
|
||||
constructor() {}
|
||||
|
||||
setContent(text) {
|
||||
const parsedContent = new JSDOM(text).window.document;
|
||||
if (parsedContent.querySelector(Selectors.properties.main.container) === null) {
|
||||
throw Error('No data was received. Cookie is probably expired.')
|
||||
}
|
||||
this.document = parsedContent;
|
||||
}
|
||||
|
||||
getTextBySelector(selector) {
|
||||
return this.document.querySelector(selector).innerHTML;
|
||||
}
|
||||
|
||||
scrapeMainProperties() {
|
||||
const {
|
||||
main: selector
|
||||
} = Selectors.properties;
|
||||
this.document
|
||||
.querySelector(selector.container)
|
||||
.querySelectorAll(selector.rows)
|
||||
.forEach((field) => {
|
||||
const value = field.querySelectorAll(selector.cell);
|
||||
let data;
|
||||
if (value[1].childElementCount > 0) {
|
||||
data = value[1].querySelector(selector.irregularText).innerHTML;
|
||||
} else {
|
||||
data = value[1].innerHTML;
|
||||
}
|
||||
this.car[value[0].innerHTML] = data;
|
||||
})
|
||||
}
|
||||
|
||||
scrapeBasicProperties() {
|
||||
if (!this.document) {
|
||||
throw Error('No data to scrape.');
|
||||
}
|
||||
const properties = this.document.querySelector('#content');
|
||||
const {
|
||||
properties: selector
|
||||
} = Selectors;
|
||||
const plate = this.getTextBySelector(selector.plate);
|
||||
const carName = this.getTextBySelector(selector.name);
|
||||
const vin = this.getTextBySelector(selector.vin);
|
||||
console.log(plate, carName, vin);
|
||||
this.car = new Car(plate, carName, vin.substring(5));
|
||||
return this.car;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export default Scraper;
|
||||
58
src/cookies/CookieMonster.js
Normal file
58
src/cookies/CookieMonster.js
Normal file
@@ -0,0 +1,58 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
import {
|
||||
SEARCH_URL,
|
||||
NAVIGATION_TIMEOUT,
|
||||
TEMP_DIR
|
||||
} from '../util/Constants';
|
||||
import Selectors from '../util/Selectors';
|
||||
|
||||
class CookieMonster {
|
||||
cache;
|
||||
browser;
|
||||
page;
|
||||
|
||||
constructor(cache) {
|
||||
this.cache = cache;
|
||||
}
|
||||
|
||||
async submitForm(plate) {
|
||||
await this.page.focus(Selectors.form.plate)
|
||||
await this.page.keyboard.type(plate);
|
||||
await this.page.evaluate(_ => {
|
||||
PrimeFaces.ab({
|
||||
s: "j_idt104:j_idt131",
|
||||
u: "j_idt104"
|
||||
});
|
||||
});
|
||||
await this.page.waitForNavigation({
|
||||
timeout: NAVIGATION_TIMEOUT,
|
||||
waitUntil: 'domcontentloaded',
|
||||
});
|
||||
}
|
||||
|
||||
async launchPage() {
|
||||
this.browser = await puppeteer.launch();
|
||||
this.page = await this.browser.newPage();
|
||||
await this.page.goto(SEARCH_URL);
|
||||
}
|
||||
|
||||
async cleanup(plate) {
|
||||
await this.page.screenshot({
|
||||
path: `${TEMP_DIR.screenshots}/${plate}.png`
|
||||
});
|
||||
await this.browser.close();
|
||||
}
|
||||
|
||||
async init(plate) {
|
||||
console.log(`Fetching data for ${plate}`);
|
||||
await this.launchPage();
|
||||
await this.submitForm(plate)
|
||||
const pageContent = await this.page
|
||||
.$eval(Selectors.container.main, (element) => element.innerHTML);
|
||||
await this.cleanup(plate);
|
||||
console.log(`Successfully fetched fresh data for ${plate}`);
|
||||
return pageContent;
|
||||
}
|
||||
};
|
||||
|
||||
export default CookieMonster;
|
||||
47
src/index.js
Normal file
47
src/index.js
Normal file
@@ -0,0 +1,47 @@
|
||||
import { BASE_URL, BASE_HEADERS, BASE_PARAMS } from './util/Constants';
|
||||
import Car from './model/Car';
|
||||
import Api from './api/Api';
|
||||
import Cache from './api/Cache';
|
||||
import Scraper from './components/Scraper';
|
||||
import CookieMonster from './cookies/CookieMonster';
|
||||
|
||||
class Hack {
|
||||
api;
|
||||
scraper;
|
||||
cache;
|
||||
cookieMonster;
|
||||
|
||||
constructor() {
|
||||
this.api = new Api();
|
||||
this.scraper = new Scraper();
|
||||
this.cache = new Cache();
|
||||
this.cookieMonster = new CookieMonster(this.cache);
|
||||
}
|
||||
|
||||
async getData(plate) {
|
||||
let cached = await this.cache.get(plate);
|
||||
if (cached) {
|
||||
console.log(`Using cached data for ${plate}`);
|
||||
return cached;
|
||||
}
|
||||
const data = await this.cookieMonster.init(plate);
|
||||
this.cache.save(plate, data);
|
||||
return data;
|
||||
}
|
||||
|
||||
async init(plate) {
|
||||
try {
|
||||
const data = await this.getData(plate);
|
||||
this.scraper.setContent(data);
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
return;
|
||||
}
|
||||
this.scraper.scrapeBasicProperties();
|
||||
this.scraper.scrapeMainProperties();
|
||||
console.log(this.scraper.car);
|
||||
}
|
||||
}
|
||||
|
||||
const hack = new Hack();
|
||||
hack.init('540BLG');
|
||||
13
src/model/Car.js
Normal file
13
src/model/Car.js
Normal file
@@ -0,0 +1,13 @@
|
||||
class Car {
|
||||
plate;
|
||||
name;
|
||||
vin;
|
||||
|
||||
constructor(plate, name, vin) {
|
||||
this.plate = plate;
|
||||
this.name = name;
|
||||
this.vin = vin;
|
||||
}
|
||||
}
|
||||
|
||||
export default Car;
|
||||
34
src/util/Constants.js
Normal file
34
src/util/Constants.js
Normal file
@@ -0,0 +1,34 @@
|
||||
export const BASE_URL = 'https://eteenindus.mnt.ee/public/soidukDetailvaadeAvalik.jsf';
|
||||
export const SEARCH_URL = 'https://eteenindus.mnt.ee/public/soidukTaustakontroll.jsf';
|
||||
|
||||
export const BASE_HEADERS = {
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Connection': 'keep-alive',
|
||||
'Host': 'eteenindus.mnt.ee',
|
||||
'Referrer': BASE_URL,
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
};
|
||||
|
||||
export const BASE_PARAMS = {
|
||||
'method': 'GET',
|
||||
'mode': 'cors',
|
||||
'credentials': 'include',
|
||||
};
|
||||
|
||||
export const NAVIGATION_TIMEOUT = 2500;
|
||||
|
||||
export const CACHE = {
|
||||
ttl: 600,
|
||||
engine: 'file',
|
||||
directory: 'tmp/cache',
|
||||
PREFIX: {
|
||||
plate: 'car-',
|
||||
},
|
||||
};
|
||||
|
||||
export const TEMP_DIR = {
|
||||
screenshots: 'tmp/screenshots',
|
||||
}
|
||||
3
src/util/Formatter.js
Normal file
3
src/util/Formatter.js
Normal file
@@ -0,0 +1,3 @@
|
||||
export const formatCookie = Object.entries(cookie)
|
||||
.map(([key, value]) => `${key}=${value}`)
|
||||
.join('; ');
|
||||
20
src/util/Selectors.js
Normal file
20
src/util/Selectors.js
Normal file
@@ -0,0 +1,20 @@
|
||||
export default {
|
||||
form: {
|
||||
plate: '#j_idt104\\:regMark',
|
||||
},
|
||||
container: {
|
||||
main: '#content',
|
||||
form: '#j_idt104',
|
||||
},
|
||||
properties: {
|
||||
plate: '.content-title h1',
|
||||
name: '.content-title p:first-of-type',
|
||||
vin: '.content-title p:nth-of-type(2)',
|
||||
main: {
|
||||
container: '.asset',
|
||||
rows: '.asset-details table tbody tr',
|
||||
cell: 'td',
|
||||
irregularText: 'span:first-child', // to get past spans, superscripts and such
|
||||
},
|
||||
},
|
||||
};
|
||||
Reference in New Issue
Block a user