const fetch = require('node-fetch'); const cheerio = require('cheerio'); function makeUrlScrape(scrape, defaultUrl) { return async (url=defaultUrl) => { const res = await fetch(url); const $ = cheerio.load(await res.text()); return { url, ...scrape($, { url }) }; }; } const scrapeAnimevibeDownloadPage = makeUrlScrape($ => { const entries = $('.alert > p').map((_, p) => { const key = $(p).text().split(' ')[0].toLowerCase(); const href = $(p).next('a').attr('href'); return [[key, href]]; }).get(); return Object.fromEntries(entries); }); const scrapeVidCDNDownloadPage = makeUrlScrape($ => { const downloads = scrapeDownloads($); const info = scrapeInfo($); return { ...info, downloads }; }); function scrapeDownloads($) { return $('div.mirror_link').first() .find('.dowload > a').map((_, a) => { const info = parseDownload($(a).text()); const url = $(a).attr('href'); return { ...info, url }; }).get(); } function scrapeInfo($) { const keys = ['filename', 'filesize', 'duration', 'resolution']; const entries = $('.sumer_l > ul > li > span').map((i, span) => { const key = keys[i]; const value = $(span).text(); return [[key, value]]; }).get(); return Object.fromEntries(entries); } function parseDownload(text) { const regex = /Download\s+\((?[\w\d]+)P\s+-\s+(?[\w\d]+)\)/; const { groups } = regex.exec(text); return groups; } const scrapeAnimevibeSeriesPage = makeUrlScrape(($, { url }) => { const seriesId = url.split('/')[4]; const downloadId = scrapeDownloadId($); const currentEpisodeNumber = scrapeEpisodeNumber($); const infoDiv = $('#blogShort'); const englishTitle = infoDiv.find('h5.title-av-search-res').text(); const thumbnailUrl = infoDiv.find('#thumb-rsz').attr('data-bg'); const info = scrapeInfoDiv(infoDiv, $); const episodeCount = parseInt(info['Number of Episodes'].split(' ', 1)); const views = parseInt(info['Views'].split(' ', 1)); const summary = info['Summary']; const gernes = info['Genre'].split(', '); const otherTitles = parseTitles(info['Alternate Titles']); const myAnimeListScore = parseFloat(info['[MyAnimeList] Score']); const type = info['Type']; const status = info['Status']; const dates = parseDates(info['Date']); return { seriesId, downloadId, currentEpisodeNumber, episodeCount, titles: { english: englishTitle, ...otherTitles }, thumbnailUrl, type, ...dates, status, gernes, summary, myAnimeListScore, views, }; }); function scrapeEpisodeNumber($) { const button = $('.current-episode-button'); return button.length ? parseInt(button.text()) : null; } function scrapeDownloadId($) { const href = $('.download-av > a:nth-child(1)').attr('href'); return new URL(href).searchParams.get('id'); } function parseTitles(text) { const regex = /(?[^,]*), (?[^,]*), (?\[.*\])/; const { groups } = regex.exec(text); const json = groups.jsonParsableAbbreviations.replace(/'/g, '"'); return { nativeJapanese: groups.nativeJapanese, japanese: groups.japanese, abbreviations: JSON.parse(json), }; } function parseDates(text) { const regex = /(?\w{3} \d{1,2}, \d{4})( to (\?|(?\w{3} \d{1,2}, \d{4})))?/; const { groups } = regex.exec(text); const releaseDate = groups.releaseDate; const finishedDate = groups.finishedDate || null; return { releaseDate, finishedDate }; } function scrapeInfoDiv(infoDiv, $) { const entries = infoDiv.find('h6.excerpt-anime-info').map((_, h6) => { const [key, value] = $(h6).text().split(/(^[^:]+): /).slice(1); return [[key, value]]; }).get(); return Object.fromEntries(entries); } async function scrapeMp4UploadVideoFileUrl(url) { const id = url.split('/').pop(); const res = await fetch(url, { method: 'POST', body: new URLSearchParams({ id, op: 'download2' }), redirect: 'manual', }); return res.headers.get('location'); } module.exports = { scrapeVidCDNDownloadPage, scrapeAnimevibeSeriesPage, scrapeAnimevibeDownloadPage, scrapeMp4UploadVideoFileUrl, };