Server scrapping 4 sites

2020-08-26 19:56:29 +02:00 · 2020-08-26 19:56:29 +02:00 · b4d53c3875
commit b4d53c3875
4 changed files with 1715 additions and 0 deletions
--- a/server/index.js
+++ b/server/index.js
@ -0,0 +1,28 @@
+const express = require('express');
+const morgan = require('morgan');
+const cors = require('cors');
+const scrape = require('./scrape.js');
+const shuffle = require('shuffle-array');
+
+const app = express();
+
+app.use(cors())
+
+const prod = process.env.NODE_ENV === 'production';
+app.use(morgan(prod ? 'short' : 'dev'))
+app.use(express.static(prod ? 'public' : '../client/build'))
+
+let lastScrape;
+let tags;
+app.get('/spin', async (req, res) => {
+    if (!lastScrape || lastScrape < new Date() - 30*1000) {
+        tags = await scrape();
+        lastScrape = new Date();
+    }
+    res.json(shuffle.pick(tags, { picks: 30 }));
+})
+
+const port = process.env.PORT || 5000;
+app.listen(port, () => {
+  console.log('Listening at port', port);
+})
--- a/server/package-lock.json
+++ b/server/package-lock.json
--- a/server/package.json
+++ b/server/package.json
@ -0,0 +1,23 @@
+{
+  "name": "wheelofporn",
+  "version": "1.0.0",
+  "description": "",
+  "main": "index.js",
+  "scripts": {
+    "dev": "nodemon index.js"
+  },
+  "keywords": [],
+  "author": "",
+  "license": "ISC",
+  "dependencies": {
+    "cheerio": "^1.0.0-rc.3",
+    "cors": "^2.8.5",
+    "express": "^4.17.1",
+    "morgan": "^1.10.0",
+    "node-fetch": "^2.6.0",
+    "shuffle-array": "^1.0.1"
+  },
+  "devDependencies": {
+    "nodemon": "^2.0.4"
+  }
+}
--- a/server/scrape.js
+++ b/server/scrape.js
@ -0,0 +1,94 @@
+const cheerio = require('cheerio');
+const fetch = require('node-fetch');
+
+const makeScrape = ({ baseUrl, tagPage, icon }, callback) => {
+    const url = new URL(tagPage || '/', baseUrl);
+    return async () => {
+        const res = await fetch(url);
+        const $ = cheerio.load(await res.text());
+
+        const tags = callback($, { tagPage, baseUrl, url, res }).get();
+        return tags.map(tag => ({
+            icon: icon || baseUrl + '/favicon.ico',
+            baseUrl,
+            hostname: url.hostname,
+            ...tag,
+        }));
+    }
+}
+
+const scrapeNhentai = makeScrape({
+        baseUrl: 'https://nhentai.net',
+        tagPage: '/tags/popular',
+    }, 
+    ($, { baseUrl }) => 
+        $('a.tag').map((_, el) => {
+            const a = $(el);
+            const url = baseUrl + a.attr('href');
+            const name = a.children('.name').text();
+            return { url, name };
+        })
+);
+
+const scrapeTubeBDSM = makeScrape({
+        baseUrl: 'https://www.tubebdsm.com',
+        icon: 'https://www.tubebdsm.com/templates/tubebdsm/images/favicon.ico?c4b5704b',
+    }, 
+    ($, { baseUrl }) => 
+        $('div.card-body-main a.item-link').map((_, el) => {
+            const a = $(el);
+            const url = baseUrl + a.attr('href');
+            const name = a.attr('title').trim();
+            return { url, name };
+        })
+);
+
+const scrapeXVideos = makeScrape({
+        baseUrl: 'https://www.xvideos.com',
+        tagPage: '/tags',
+    }, 
+    ($, { baseUrl }) => 
+        $('ul#tags > li > a').map((_, el) => {
+            const a = $(el);
+            const url = baseUrl + a.attr('href');
+            const name = a.children('b').text().trim();
+            return { url, name };
+        })
+);
+
+const scrapePornhub = makeScrape({
+        baseUrl: 'https://www.pornhub.com',
+        tagPage: '/categories',
+    }, 
+    ($, { baseUrl }) => 
+        $('.category-wrapper > a').map((_, el) => {
+            const a = $(el);
+            const url = baseUrl + a.attr('href');
+            const name = a.attr('alt');
+            return { url, name };
+        })
+);
+
+const scrape = async () => {
+    const tags = Array.prototype.concat(
+        ...await Promise.all([
+            scrapeNhentai(), 
+            scrapeTubeBDSM(),
+            scrapeXVideos(),
+            scrapePornhub(),
+        ])
+    );
+
+    const byName = {};
+    for (const tag of tags) {
+        const name = tag.name.toLowerCase();
+        byName[name] = byName[name] || [];
+        byName[name].push(tag);
+    }
+
+    return Object.entries(byName).map(([name, sites]) => ({
+        name, sites
+    }));
+}
+
+module.exports = scrape;