-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.js
58 lines (50 loc) · 1.99 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
const fetch = require('node-fetch');
const config = require('./config.json');
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms));
const postUrlsToWebhook = async (urls, webhook, interval) => {
await Promise.all(urls.map(async (url, i) => {
await sleep(interval * i);
try {
const response = await fetch(webhook, {
method: 'POST',
headers: {
'content-type': 'application/json',
},
body: JSON.stringify({ content: url }),
});
if (!response.ok) throw new Error(response);
} catch (err) {
console.log({ err });
}
}));
};
const getUrlsFromSubreddit = async (subreddit, sort, time, itemsPerPage, amountOfPages, after, urls) => {
if (amountOfPages === 0) return urls;
const url = `https://www.reddit.com/r/${subreddit}/${sort}/.json?t=${time}&limit=${itemsPerPage}&after=${after || ''}`;
const response = await fetch(url);
const json = await response.json();
const { data: { children, after: newAfter } } = json;
const urlsFromJson = children.map(child => child.data.url);
const newUrls = urls.concat(urlsFromJson);
return getUrlsFromSubreddit(subreddit, sort, time, itemsPerPage, amountOfPages - 1, newAfter, newUrls);
};
const getUrlsFromSubreddits = async (subreddits) => {
const {
sort, time, itemsPerPage, amountOfPages,
} = config;
const arrOfarrOfUrls = await Promise.all(subreddits.map(subreddit => getUrlsFromSubreddit(subreddit, sort, time, itemsPerPage, amountOfPages, null, [])));
return [...new Set([].concat(...arrOfarrOfUrls))];
};
if (JSON.stringify(config).includes(null)) {
console.error('config.json contains null values');
process.exitCode = 1;
} else if (config.subreddits.constructor !== Array) {
console.error('config.subreddits has to be an array');
process.exitCode = 1;
} else {
(async () => {
const { webhook, interval, subreddits } = config;
const urls = await getUrlsFromSubreddits(subreddits);
await postUrlsToWebhook(urls, webhook, interval);
})();
}