WheelOfPorn

The Wheel of Porn from Family Guy (Season 13 Episode 9) programmed in react.
git clone git://git.oshgnacknak.de/WheelOfPorn.git
Log | Files | Refs | README

scrape.js (2626B)


      1 const cheerio = require('cheerio');
      2 const fetch = require('node-fetch');
      3 
      4 const makeScrape = ({ baseUrl, tagPage, icon }, callback) => {
      5     const url = new URL(tagPage || '/', baseUrl);
      6     return async () => {
      7         const res = await fetch(url);
      8         const $ = cheerio.load(await res.text());
      9 
     10         const tags = callback($, { tagPage, baseUrl, url, res }).get();
     11         return tags.map(tag => ({
     12             icon: icon || baseUrl + '/favicon.ico',
     13             baseUrl,
     14             hostname: url.hostname,
     15             ...tag,
     16         }));
     17     }
     18 }
     19 
     20 const scrapeNhentai = makeScrape({
     21         baseUrl: 'https://nhentai.net',
     22         tagPage: '/tags/popular',
     23     }, 
     24     ($, { baseUrl }) => 
     25         $('a.tag').map((_, el) => {
     26             const a = $(el);
     27             const url = baseUrl + a.attr('href');
     28             const name = a.children('.name').text();
     29             return { url, name };
     30         })
     31 );
     32 
     33 const scrapeTubeBDSM = makeScrape({
     34         baseUrl: 'https://www.tubebdsm.com',
     35         icon: 'https://www.tubebdsm.com/templates/tubebdsm/images/favicon.ico?c4b5704b',
     36     }, 
     37     ($, { baseUrl }) => 
     38         $('div.card-body-main a.item-link').map((_, el) => {
     39             const a = $(el);
     40             const url = baseUrl + a.attr('href');
     41             const name = a.attr('title').trim();
     42             return { url, name };
     43         })
     44 );
     45 
     46 const scrapeXVideos = makeScrape({
     47         baseUrl: 'https://www.xvideos.com',
     48         tagPage: '/tags',
     49     }, 
     50     ($, { baseUrl }) => 
     51         $('ul#tags > li > a').map((_, el) => {
     52             const a = $(el);
     53             const url = baseUrl + a.attr('href');
     54             const name = a.children('b').text().trim();
     55             return { url, name };
     56         })
     57 );
     58 
     59 const scrapePornhub = makeScrape({
     60         baseUrl: 'https://www.pornhub.com',
     61         tagPage: '/categories',
     62     }, 
     63     ($, { baseUrl }) => 
     64         $('.category-wrapper > a').map((_, el) => {
     65             const a = $(el);
     66             const url = baseUrl + a.attr('href');
     67             const name = a.attr('alt');
     68             return { url, name };
     69         })
     70 );
     71 
     72 const scrape = async () => {
     73     const tags = Array.prototype.concat(
     74         ...await Promise.all([
     75             scrapeNhentai(), 
     76             scrapeTubeBDSM(),
     77             scrapeXVideos(),
     78             scrapePornhub(),
     79         ])
     80     );
     81 
     82     const byName = {};
     83     for (const tag of tags) {
     84         const name = tag.name.toLowerCase();
     85         byName[name] = byName[name] || [];
     86         byName[name].push(tag);
     87     }
     88 
     89     return Object.entries(byName).map(([name, sites]) => ({
     90         name, sites
     91     }));
     92 }
     93 
     94 module.exports = scrape;