sclass WebScraper { S baseURL; new Set urlsSeen; new LinkedHashSet linksToFollow; Int maxPages; // includes cached pages *(S baseAndStartURL) { this(baseAndStartURL, baseAndStartURL); } *(S *baseURL, S startURL) { addLink(startURL); } void addLinks(Iterable urls) { for (S url : urls) addLink(url); } void addLink(S url) { if (!urlsSeen.contains(url) && startsWith(url, baseURL)) linksToFollow.add(url); } bool step() { ping(); if (maxPages != null && l(urlsSeen) >= maxPages) ret false with print("Maximum number of pages reached: " + maxPages + ". Queue size: " + l(linksToFollow)); if (empty(linksToFollow)) false; _loadURL(popFirst(linksToFollow)); true; } void _loadURL(S url) { urlsSeen.add(url); addLinks(pairsA(webScraper_getLinks(url))); } run { while (step()) print("URLs checked: " + l(urlsSeen) + ", queue size: " + l(linksToFollow)); print("Scraping done. " + n2(l(urlsSeen), "URL") + " checked."); } }