scrape.js (2626B)
1 const cheerio = require('cheerio'); 2 const fetch = require('node-fetch'); 3 4 const makeScrape = ({ baseUrl, tagPage, icon }, callback) => { 5 const url = new URL(tagPage || '/', baseUrl); 6 return async () => { 7 const res = await fetch(url); 8 const $ = cheerio.load(await res.text()); 9 10 const tags = callback($, { tagPage, baseUrl, url, res }).get(); 11 return tags.map(tag => ({ 12 icon: icon || baseUrl + '/favicon.ico', 13 baseUrl, 14 hostname: url.hostname, 15 ...tag, 16 })); 17 } 18 } 19 20 const scrapeNhentai = makeScrape({ 21 baseUrl: 'https://nhentai.net', 22 tagPage: '/tags/popular', 23 }, 24 ($, { baseUrl }) => 25 $('a.tag').map((_, el) => { 26 const a = $(el); 27 const url = baseUrl + a.attr('href'); 28 const name = a.children('.name').text(); 29 return { url, name }; 30 }) 31 ); 32 33 const scrapeTubeBDSM = makeScrape({ 34 baseUrl: 'https://www.tubebdsm.com', 35 icon: 'https://www.tubebdsm.com/templates/tubebdsm/images/favicon.ico?c4b5704b', 36 }, 37 ($, { baseUrl }) => 38 $('div.card-body-main a.item-link').map((_, el) => { 39 const a = $(el); 40 const url = baseUrl + a.attr('href'); 41 const name = a.attr('title').trim(); 42 return { url, name }; 43 }) 44 ); 45 46 const scrapeXVideos = makeScrape({ 47 baseUrl: 'https://www.xvideos.com', 48 tagPage: '/tags', 49 }, 50 ($, { baseUrl }) => 51 $('ul#tags > li > a').map((_, el) => { 52 const a = $(el); 53 const url = baseUrl + a.attr('href'); 54 const name = a.children('b').text().trim(); 55 return { url, name }; 56 }) 57 ); 58 59 const scrapePornhub = makeScrape({ 60 baseUrl: 'https://www.pornhub.com', 61 tagPage: '/categories', 62 }, 63 ($, { baseUrl }) => 64 $('.category-wrapper > a').map((_, el) => { 65 const a = $(el); 66 const url = baseUrl + a.attr('href'); 67 const name = a.attr('alt'); 68 return { url, name }; 69 }) 70 ); 71 72 const scrape = async () => { 73 const tags = Array.prototype.concat( 74 ...await Promise.all([ 75 scrapeNhentai(), 76 scrapeTubeBDSM(), 77 scrapeXVideos(), 78 scrapePornhub(), 79 ]) 80 ); 81 82 const byName = {}; 83 for (const tag of tags) { 84 const name = tag.name.toLowerCase(); 85 byName[name] = byName[name] || []; 86 byName[name].push(tag); 87 } 88 89 return Object.entries(byName).map(([name, sites]) => ({ 90 name, sites 91 })); 92 } 93 94 module.exports = scrape;