Skip to content

Commit

Permalink
ArchiveIt data source optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
shelld3v committed Jun 2, 2022
1 parent 8a7cd5d commit 1f9e3b0
Showing 1 changed file with 2 additions and 39 deletions.
41 changes: 2 additions & 39 deletions resources/scripts/archive/archiveit.ads
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,10 @@ function start()
end

function vertical(ctx, domain)
scrape(ctx, {['url']=first_url(domain)})

local found = pages(ctx, domain)
if not found then
return
end

for i=1,50,1 do
local ok = scrape(ctx, {['url']=second_url(domain, i)})
if not ok then
break
end
end
scrape(ctx, {['url']=build_url(domain)})
end

function first_url(domain)
function build_url(domain)
local params = {
['url']=domain,
['matchType']="domain",
Expand All @@ -35,28 +23,3 @@ function first_url(domain)
}
return "https://wayback.archive-it.org/all/timemap/cdx?" .. url.build_query_string(params)
end

function second_url(domain, pagenum)
local params = {
['show']="Sites",
['q']=domain,
['page']=pagenum,
}
return "https://archive-it.org/explore?" .. url.build_query_string(params)
end

function pages(ctx, domain)
local u = "https://archive-it.org/explore?show=Sites&q=" .. domain
local resp, err = request(ctx, {['url']=u})
if (err ~= nil and err ~= "") then
log(ctx, "pages request to service failed: " .. err)
return false
end

local match = find(resp, "No metadata results")
if (match == nil or #match == 0) then
return false
end

return true
end

0 comments on commit 1f9e3b0

Please sign in to comment.