Server scrapping 4 sites

This commit is contained in:
Oshgnacknak 2020-08-26 19:56:29 +02:00
commit b4d53c3875
4 changed files with 1715 additions and 0 deletions

28
server/index.js Normal file
View file

@ -0,0 +1,28 @@
const express = require('express');
const morgan = require('morgan');
const cors = require('cors');
const scrape = require('./scrape.js');
const shuffle = require('shuffle-array');
const app = express();
app.use(cors())
const prod = process.env.NODE_ENV === 'production';
app.use(morgan(prod ? 'short' : 'dev'))
app.use(express.static(prod ? 'public' : '../client/build'))
let lastScrape;
let tags;
app.get('/spin', async (req, res) => {
if (!lastScrape || lastScrape < new Date() - 30*1000) {
tags = await scrape();
lastScrape = new Date();
}
res.json(shuffle.pick(tags, { picks: 30 }));
})
const port = process.env.PORT || 5000;
app.listen(port, () => {
console.log('Listening at port', port);
})

1570
server/package-lock.json generated Normal file

File diff suppressed because it is too large Load diff

23
server/package.json Normal file
View file

@ -0,0 +1,23 @@
{
"name": "wheelofporn",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"dev": "nodemon index.js"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"cheerio": "^1.0.0-rc.3",
"cors": "^2.8.5",
"express": "^4.17.1",
"morgan": "^1.10.0",
"node-fetch": "^2.6.0",
"shuffle-array": "^1.0.1"
},
"devDependencies": {
"nodemon": "^2.0.4"
}
}

94
server/scrape.js Normal file
View file

@ -0,0 +1,94 @@
const cheerio = require('cheerio');
const fetch = require('node-fetch');
const makeScrape = ({ baseUrl, tagPage, icon }, callback) => {
const url = new URL(tagPage || '/', baseUrl);
return async () => {
const res = await fetch(url);
const $ = cheerio.load(await res.text());
const tags = callback($, { tagPage, baseUrl, url, res }).get();
return tags.map(tag => ({
icon: icon || baseUrl + '/favicon.ico',
baseUrl,
hostname: url.hostname,
...tag,
}));
}
}
const scrapeNhentai = makeScrape({
baseUrl: 'https://nhentai.net',
tagPage: '/tags/popular',
},
($, { baseUrl }) =>
$('a.tag').map((_, el) => {
const a = $(el);
const url = baseUrl + a.attr('href');
const name = a.children('.name').text();
return { url, name };
})
);
const scrapeTubeBDSM = makeScrape({
baseUrl: 'https://www.tubebdsm.com',
icon: 'https://www.tubebdsm.com/templates/tubebdsm/images/favicon.ico?c4b5704b',
},
($, { baseUrl }) =>
$('div.card-body-main a.item-link').map((_, el) => {
const a = $(el);
const url = baseUrl + a.attr('href');
const name = a.attr('title').trim();
return { url, name };
})
);
const scrapeXVideos = makeScrape({
baseUrl: 'https://www.xvideos.com',
tagPage: '/tags',
},
($, { baseUrl }) =>
$('ul#tags > li > a').map((_, el) => {
const a = $(el);
const url = baseUrl + a.attr('href');
const name = a.children('b').text().trim();
return { url, name };
})
);
const scrapePornhub = makeScrape({
baseUrl: 'https://www.pornhub.com',
tagPage: '/categories',
},
($, { baseUrl }) =>
$('.category-wrapper > a').map((_, el) => {
const a = $(el);
const url = baseUrl + a.attr('href');
const name = a.attr('alt');
return { url, name };
})
);
const scrape = async () => {
const tags = Array.prototype.concat(
...await Promise.all([
scrapeNhentai(),
scrapeTubeBDSM(),
scrapeXVideos(),
scrapePornhub(),
])
);
const byName = {};
for (const tag of tags) {
const name = tag.name.toLowerCase();
byName[name] = byName[name] || [];
byName[name].push(tag);
}
return Object.entries(byName).map(([name, sites]) => ({
name, sites
}));
}
module.exports = scrape;