forked from humanprotocol/fortune
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Web scraper from Fiverr * Fix typo and rating string to number
- Loading branch information
1 parent
ca1fc8f
commit 2874988
Showing
3 changed files
with
560 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
import puppeteer from "puppeteer"; | ||
import randUserAgent from "rand-user-agent"; | ||
|
||
export const scrapeFiverrProfile = async (url) => { | ||
const browser = await puppeteer.launch(); | ||
|
||
const page = await browser.newPage(); | ||
const agent = randUserAgent("desktop"); | ||
|
||
await page.setUserAgent(agent); | ||
await page.goto(url); | ||
|
||
const profileData = await page.evaluate(() => { | ||
const name = document.querySelector(".username-line > a")?.textContent; | ||
|
||
const location = document.querySelector( | ||
".user-stats .location b" | ||
)?.textContent; | ||
|
||
const description = document.querySelector( | ||
".seller-profile .description p" | ||
)?.textContent; | ||
|
||
let overallRating = document.querySelector( | ||
".rating-score.rating-num" | ||
)?.textContent; | ||
overallRating = overallRating ? Number(overallRating) : overallRating; | ||
|
||
const languageElements = document.querySelectorAll(".languages ul li"); | ||
const languages = Array.from(languageElements).map((lang) => { | ||
const langAndProficiency = lang?.textContent.split("-"); | ||
const [language, proficiency] = langAndProficiency; | ||
return { lang: language.trim(), proficiency: proficiency.trim() }; | ||
}); | ||
|
||
const skillsElements = document.querySelectorAll(".skills ul li a"); | ||
const skills = Array.from(skillsElements).map((skill) => { | ||
return skill?.textContent.trim(); | ||
}); | ||
|
||
const educationLists = document.querySelectorAll(".education-list ul li"); | ||
const education = Array.from(educationLists).map((educationElement) => { | ||
const [degreeElement, institutionElement] = educationElement.children; | ||
return { | ||
degree: degreeElement?.textContent, | ||
institution: institutionElement?.textContent, | ||
}; | ||
}); | ||
|
||
const notableClientsElements = document.querySelectorAll(".client-name"); | ||
let notableClients = Array.from(notableClientsElements).map( | ||
(clientElement) => { | ||
return clientElement?.textContent; | ||
} | ||
); | ||
notableClients = Array.from(new Set(notableClients)); | ||
|
||
let numOfReviews = document.querySelector( | ||
".reviews-header h2 span span" | ||
)?.textContent; | ||
numOfReviews = Number(numOfReviews.split(",").join("")); | ||
|
||
const starCountersElements = | ||
document.querySelectorAll(".stars-counters tr"); | ||
|
||
function removeParentheses(stringifiedNum) { | ||
const openingParenIndex = stringifiedNum.indexOf("("); | ||
const closingParenIndex = stringifiedNum.indexOf(")"); | ||
return stringifiedNum.slice(openingParenIndex + 1, closingParenIndex); | ||
} | ||
|
||
const starCounters = Array.from(starCountersElements).map( | ||
(starCounterElement) => { | ||
let [typeElement, _, countElement] = starCounterElement.children; | ||
type = typeElement.querySelector("button")?.textContent; | ||
count = removeParentheses(countElement?.textContent); | ||
count = Number(count.split(",").join("")); | ||
|
||
return { type, count }; | ||
} | ||
); | ||
|
||
const ratingBreakdownElements = document.querySelectorAll(".ranking ul li"); | ||
|
||
const ratingBreakdown = Array.from(ratingBreakdownElements).map( | ||
(ratingBreakdownElement) => { | ||
let [textElement, ratingElement] = ratingBreakdownElement?.childNodes; | ||
|
||
return { | ||
type: textElement?.textContent, | ||
rating: Number(ratingElement?.textContent), | ||
}; | ||
} | ||
); | ||
|
||
const skillTestsElements = document.querySelectorAll(".skill-tests ul li"); | ||
const skillTests = Array.from(skillTestsElements).map( | ||
(skillTestElement) => { | ||
const [skillElement, scoreElement, statusElement] = | ||
skillTestElement?.childNodes; | ||
const skill = skillElement?.textContent; | ||
const scorePercentage = eval(scoreElement?.textContent); | ||
const status = statusElement?.textContent; | ||
|
||
return { skill, scorePercentage, status }; | ||
} | ||
); | ||
|
||
return { | ||
name, | ||
location, | ||
education, | ||
description, | ||
overallRating, | ||
languages, | ||
skills, | ||
notableClients, | ||
numOfReviews, | ||
ratingBreakdown, | ||
starCounters, | ||
skillTests, | ||
}; | ||
}); | ||
|
||
await browser.close(); | ||
|
||
return profileData; | ||
}; |
Oops, something went wrong.