144 lines
4.4 KiB
JavaScript
Executable file
144 lines
4.4 KiB
JavaScript
Executable file
const fetch = require('node-fetch');
|
|
const cheerio = require('cheerio');
|
|
|
|
function makeUrlScrape(scrape, defaultUrl) {
|
|
return async (url=defaultUrl) => {
|
|
const res = await fetch(url);
|
|
const $ = cheerio.load(await res.text());
|
|
return { url, ...scrape($, { url }) };
|
|
};
|
|
}
|
|
|
|
const scrapeAnimevibeDownloadPage = makeUrlScrape($ => {
|
|
const entries = $('.alert > p').map((_, p) => {
|
|
const key = $(p).text().split(' ')[0].toLowerCase();
|
|
const href = $(p).next('a').attr('href');
|
|
return [[key, href]];
|
|
}).get();
|
|
return Object.fromEntries(entries);
|
|
});
|
|
|
|
const scrapeVidCDNDownloadPage = makeUrlScrape($ => {
|
|
const downloads = scrapeDownloads($);
|
|
const info = scrapeInfo($);
|
|
return { ...info, downloads };
|
|
});
|
|
|
|
function scrapeDownloads($) {
|
|
return $('div.mirror_link').first()
|
|
.find('.dowload > a').map((_, a) => {
|
|
const info = parseDownload($(a).text());
|
|
const url = $(a).attr('href');
|
|
return { ...info, url };
|
|
}).get();
|
|
}
|
|
|
|
function scrapeInfo($) {
|
|
const keys = ['filename', 'filesize', 'duration', 'resolution'];
|
|
const entries = $('.sumer_l > ul > li > span').map((i, span) => {
|
|
const key = keys[i];
|
|
const value = $(span).text();
|
|
return [[key, value]];
|
|
}).get();
|
|
return Object.fromEntries(entries);
|
|
}
|
|
|
|
function parseDownload(text) {
|
|
const regex = /Download\s+\((?<quality>[\w\d]+)P\s+-\s+(?<format>[\w\d]+)\)/;
|
|
const { groups } = regex.exec(text);
|
|
return groups;
|
|
}
|
|
|
|
const scrapeAnimevibeSeriesPage = makeUrlScrape(($, { url }) => {
|
|
const seriesId = url.split('/')[4];
|
|
const downloadId = scrapeDownloadId($);
|
|
const currentEpisodeNumber = scrapeEpisodeNumber($);
|
|
|
|
const infoDiv = $('#blogShort');
|
|
const englishTitle = infoDiv.find('h5.title-av-search-res').text();
|
|
const thumbnailUrl = infoDiv.find('#thumb-rsz').attr('data-bg');
|
|
|
|
const info = scrapeInfoDiv(infoDiv, $);
|
|
const episodeCount = parseInt(info['Number of Episodes'].split(' ', 1));
|
|
const views = parseInt(info['Views'].split(' ', 1));
|
|
const summary = info['Summary'];
|
|
const gernes = info['Genre'].split(', ');
|
|
const otherTitles = parseTitles(info['Alternate Titles']);
|
|
const myAnimeListScore = parseFloat(info['[MyAnimeList] Score']);
|
|
const type = info['Type'];
|
|
const status = info['Status'];
|
|
const dates = parseDates(info['Date']);
|
|
|
|
return {
|
|
seriesId,
|
|
downloadId,
|
|
currentEpisodeNumber,
|
|
episodeCount,
|
|
titles: { english: englishTitle, ...otherTitles },
|
|
thumbnailUrl,
|
|
type,
|
|
...dates,
|
|
status,
|
|
gernes,
|
|
summary,
|
|
myAnimeListScore,
|
|
views,
|
|
};
|
|
});
|
|
|
|
function scrapeEpisodeNumber($) {
|
|
const button = $('.current-episode-button');
|
|
return button.length ? parseInt(button.text()) : null;
|
|
}
|
|
|
|
function scrapeDownloadId($) {
|
|
const href = $('.download-av > a:nth-child(1)').attr('href');
|
|
return new URL(href).searchParams.get('id');
|
|
}
|
|
|
|
function parseTitles(text) {
|
|
const regex = /(?<nativeJapanese>[^,]*), (?<japanese>[^,]*), (?<jsonParsableAbbreviations>\[.*\])/;
|
|
const { groups } = regex.exec(text);
|
|
const json = groups.jsonParsableAbbreviations.replace(/'/g, '"');
|
|
return {
|
|
nativeJapanese: groups.nativeJapanese,
|
|
japanese: groups.japanese,
|
|
abbreviations: JSON.parse(json),
|
|
};
|
|
}
|
|
|
|
function parseDates(text) {
|
|
const regex = /(?<releaseDate>\w{3} \d{1,2}, \d{4})( to (\?|(?<finishedDate>\w{3} \d{1,2}, \d{4})))?/;
|
|
const { groups } = regex.exec(text);
|
|
const releaseDate = groups.releaseDate;
|
|
const finishedDate = groups.finishedDate || null;
|
|
return { releaseDate, finishedDate };
|
|
}
|
|
|
|
function scrapeInfoDiv(infoDiv, $) {
|
|
const entries = infoDiv.find('h6.excerpt-anime-info').map((_, h6) => {
|
|
const [key, value] = $(h6).text().split(/(^[^:]+): /).slice(1);
|
|
return [[key, value]];
|
|
}).get();
|
|
|
|
return Object.fromEntries(entries);
|
|
}
|
|
|
|
async function scrapeMp4UploadVideoFileUrl(url) {
|
|
const id = url.split('/').pop();
|
|
const res = await fetch(url, {
|
|
method: 'POST',
|
|
body: new URLSearchParams({
|
|
id, op: 'download2'
|
|
}),
|
|
redirect: 'manual',
|
|
});
|
|
return res.headers.get('location');
|
|
}
|
|
|
|
module.exports = {
|
|
scrapeVidCDNDownloadPage,
|
|
scrapeAnimevibeSeriesPage,
|
|
scrapeAnimevibeDownloadPage,
|
|
scrapeMp4UploadVideoFileUrl,
|
|
};
|