Skip to content

Commit

Permalink
chore: run linter
Browse files Browse the repository at this point in the history
  • Loading branch information
iZooGooD committed Feb 26, 2024
1 parent bacb4d3 commit b9b926b
Show file tree
Hide file tree
Showing 15 changed files with 188 additions and 175 deletions.
4 changes: 2 additions & 2 deletions manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

def main():
"""Run administrative tasks."""
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'webapps.settings')
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "webapps.settings")
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
Expand All @@ -18,5 +18,5 @@ def main():
execute_from_command_line(sys.argv)


if __name__ == '__main__':
if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion torrscrapper/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@


class TorrscrapperConfig(AppConfig):
name = 'torrscrapper'
name = "torrscrapper"
34 changes: 19 additions & 15 deletions torrscrapper/migrations/0001_initial.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,29 @@ class Migration(migrations.Migration):

initial = True

dependencies = [
]
dependencies = []

operations = [
migrations.CreateModel(
name='Movies',
name="Movies",
fields=[
('id', models.AutoField(auto_created=True,
primary_key=True, serialize=False, verbose_name='ID')),
('title', models.TextField()),
('image_url', models.TextField()),
('release_date', models.CharField(max_length=20)),
('synopsis', models.TextField()),
('quality_720p', models.TextField(default='NULL')),
('quality_720p_size', models.CharField(
default='NULL', max_length=20)),
('quality_1080p', models.TextField(default='NULL')),
('quality_1080p_size', models.CharField(
default='NULL', max_length=20)),
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("title", models.TextField()),
("image_url", models.TextField()),
("release_date", models.CharField(max_length=20)),
("synopsis", models.TextField()),
("quality_720p", models.TextField(default="NULL")),
("quality_720p_size", models.CharField(default="NULL", max_length=20)),
("quality_1080p", models.TextField(default="NULL")),
("quality_1080p_size", models.CharField(default="NULL", max_length=20)),
],
),
]
Binary file modified torrscrapper/migrations/__pycache__/0001_initial.cpython-39.pyc
Binary file not shown.
8 changes: 4 additions & 4 deletions torrscrapper/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ class Movies(models.Model):
image_url = models.TextField()
release_date = models.CharField(max_length=20)
synopsis = models.TextField()
quality_720p = models.TextField(default='NULL')
quality_720p_size = models.CharField(max_length=20, default='NULL')
quality_1080p = models.TextField(default='NULL')
quality_1080p_size = models.CharField(max_length=20, default='NULL')
quality_720p = models.TextField(default="NULL")
quality_720p_size = models.CharField(max_length=20, default="NULL")
quality_1080p = models.TextField(default="NULL")
quality_1080p_size = models.CharField(max_length=20, default="NULL")


class Games(models.Model):
Expand Down
134 changes: 73 additions & 61 deletions torrscrapper/scraping_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@

# Formatter for the log messages
formatter = logging.Formatter(
'%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
"%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)

# File handler for outputting log messages to a file
file_handler = logging.FileHandler('scraping_logs.log', encoding='utf-8')
file_handler = logging.FileHandler("scraping_logs.log", encoding="utf-8")
file_handler.setFormatter(formatter)

# Stream handler for outputting log messages to the console
Expand All @@ -33,13 +33,13 @@
logger.addHandler(stream_handler)

## global variables
scraper = cloudscraper.create_scraper(browser='chrome')
scraper = cloudscraper.create_scraper(browser="chrome")


def scrape_data(keywords, selected_sites):
site_scrapers = {
'pirate_bay': get_pirate_bay_torrents,
'1337x': get_1337x_torrents,
"pirate_bay": get_pirate_bay_torrents,
"1337x": get_1337x_torrents,
}
combined_results = []

Expand All @@ -56,45 +56,48 @@ def scrape_data(keywords, selected_sites):
if site_key in site_scrapers:
scraper_function = site_scrapers[site_key]
start_time = time.time()
logging.info(
f"🌐 Site #{index} - Starting scraping for site: {site_key}")
logging.info(f"🌐 Site #{index} - Starting scraping for site: {site_key}")

# Append results from each site to the combined_results list
combined_results.extend(scraper_function(keywords, index))

end_time = time.time()
time_taken = end_time - start_time
logging.info(
f"Site #{index} - Completed scraping. Time taken: {time_taken:.2f} seconds")
f"Site #{index} - Completed scraping. Time taken: {time_taken:.2f} seconds"
)
logging.info(f"Site #{index} - --------------------------------")

overall_end_time = time.time()
overall_time_taken = overall_end_time - overall_start_time
logging.info(
f"🎉 Ending the scraping session. Total time taken: {overall_time_taken:.2f} seconds")
f"🎉 Ending the scraping session. Total time taken: {overall_time_taken:.2f} seconds"
)
logging.info(f"Overall collected {len(combined_results)} torrents")
logging.info("---------------------------------------------------")

return sort_torrents_by_seeds(combined_results)


# Function to sort torrents by the number of seeds


def sort_torrents_by_seeds(torrents):
return sorted(torrents, key=lambda x: int(x['seeds']), reverse=True)
return sorted(torrents, key=lambda x: int(x["seeds"]), reverse=True)


async def fetch_magnet(session, magnet_url, torrent):
async with session.get(magnet_url) as response:
if response.status == 200:
magnet_content = await response.read()
magnet_soup = BeautifulSoup(magnet_content, 'html.parser')
magnet_soup = BeautifulSoup(magnet_content, "html.parser")
magnet_link = magnet_soup.find(
'a', href=lambda href: href and 'magnet:?' in href)
"a", href=lambda href: href and "magnet:?" in href
)
if magnet_link:
torrent['magnet'] = magnet_link.get('href')
torrent["magnet"] = magnet_link.get("href")
else:
torrent['magnet'] = ""
torrent["magnet"] = ""
else:
print(f"Error fetching magnet link for {torrent['title']}")

Expand All @@ -103,53 +106,57 @@ async def get_1337x_torrents_async(keywords, torrents):
async with aiohttp.ClientSession() as session:
tasks = []
for torrent in torrents:
magnet_url = torrent['magnet']
magnet_url = torrent["magnet"]
if magnet_url:
task = asyncio.create_task(
fetch_magnet(session, magnet_url, torrent))
task = asyncio.create_task(fetch_magnet(session, magnet_url, torrent))
tasks.append(task)

# Limiting the number of parallel requests to 5 for now
chunk_size = 5
for i in range(0, len(tasks), chunk_size):
await asyncio.gather(*tasks[i:i + chunk_size])
await asyncio.gather(*tasks[i : i + chunk_size])


def get_1337x_torrents(keywords, index):
torrents = []
search_url = SiteURLs.X1337_BASE_URL + '/search/' + keywords + '/1/'
search_url = SiteURLs.X1337_BASE_URL + "/search/" + keywords + "/1/"
try:
# Add timeout of 10 seconds
response = scraper.get(search_url, timeout=10)
parsed_url = urlparse(response.url)
query_params = parse_qs(parsed_url.query)
url_status_code = int(query_params.get('status', [None])[0])
url_status_code = int(query_params.get("status", [None])[0])
if url_status_code:
url_status_code = int(url_status_code)
if response.status_code == 200 and url_status_code is not None and url_status_code != 403:
if (
response.status_code == 200
and url_status_code is not None
and url_status_code != 403
):
logging.info(
f"Site #{index} - Initial request to site {search_url} was successful")
soup = BeautifulSoup(response.content, 'html.parser')
rows = soup.find_all('tr')
f"Site #{index} - Initial request to site {search_url} was successful"
)
soup = BeautifulSoup(response.content, "html.parser")
rows = soup.find_all("tr")
for row in rows:
cols = row.find_all('td')
cols = row.find_all("td")
if cols:
name_col = cols[0].find_all('a', href=True)
if len(name_col) >= 2 and name_col[1]['href'].startswith(
'/torrent/'):
name_col = cols[0].find_all("a", href=True)
if len(name_col) >= 2 and name_col[1]["href"].startswith(
"/torrent/"
):
name = name_col[1].text.strip()
href = SiteURLs.X1337_BASE_URL + name_col[1]['href']
href = SiteURLs.X1337_BASE_URL + name_col[1]["href"]
seeds = cols[1].text
leeches = cols[2].text
size_element = cols[4].find(
text=True, recursive=False).strip()
size_element = cols[4].find(text=True, recursive=False).strip()
size = size_element if size_element else None
torrent = {
'title': name,
'magnet': href,
'seeds': seeds,
'peers': leeches,
'size': size
"title": name,
"magnet": href,
"seeds": seeds,
"peers": leeches,
"size": size,
}
torrents.append(torrent)

Expand All @@ -159,11 +166,11 @@ def get_1337x_torrents(keywords, index):
return torrents
else:
if url_status_code:
logging.error(
f"Failed to scrape 1337x. Status code: {url_status_code}")
logging.error(f"Failed to scrape 1337x. Status code: {url_status_code}")
else:
logging.error(
f"Failed to scrape 1337x. Status code: {response.status_code}")
f"Failed to scrape 1337x. Status code: {response.status_code}"
)
return []
except cloudscraper.requests.exceptions.ConnectionError as e:
logging.error(f"Connection error occurred: {str(e)}")
Expand All @@ -187,33 +194,37 @@ def create_magnet_pirate_bay(info_hash, name):
"""

trackers = [
'udp://tracker.coppersurfer.tk:6969/announce',
'udp://tracker.openbittorrent.com:6969/announce',
'udp://9.rarbg.to:2710/announce',
'udp://9.rarbg.me:2780/announce',
'udp://9.rarbg.to:2730/announce',
'udp://tracker.opentrackr.org:1337',
'http://p4p.arenabg.com:1337/announce',
'udp://tracker.torrent.eu.org:451/announce',
'udp://tracker.tiny-vps.com:6969/announce',
'udp://open.stealth.si:80/announce'
"udp://tracker.coppersurfer.tk:6969/announce",
"udp://tracker.openbittorrent.com:6969/announce",
"udp://9.rarbg.to:2710/announce",
"udp://9.rarbg.me:2780/announce",
"udp://9.rarbg.to:2730/announce",
"udp://tracker.opentrackr.org:1337",
"http://p4p.arenabg.com:1337/announce",
"udp://tracker.torrent.eu.org:451/announce",
"udp://tracker.tiny-vps.com:6969/announce",
"udp://open.stealth.si:80/announce",
]

tracker_str = ''.join(['&tr=' + urllib.parse.quote(tracker)
for tracker in trackers])
magnet_link = f'magnet:?xt=urn:btih:{info_hash}&dn={urllib.parse.quote(name)}{tracker_str}'
tracker_str = "".join(
["&tr=" + urllib.parse.quote(tracker) for tracker in trackers]
)
magnet_link = (
f"magnet:?xt=urn:btih:{info_hash}&dn={urllib.parse.quote(name)}{tracker_str}"
)

return magnet_link


def get_pirate_bay_torrents(keywords, index):
torrents = []
search_url = SiteURLs.PIRATE_BAY_BASE_URL + 'q=' + keywords
search_url = SiteURLs.PIRATE_BAY_BASE_URL + "q=" + keywords
response = scraper.get(search_url)

if response.status_code == 200:
logging.info(
f"Site #{index} - Initial request to site {search_url} was successful")
f"Site #{index} - Initial request to site {search_url} was successful"
)
json_data = response.json()
for item in json_data:
name = item.get("name")
Expand All @@ -223,16 +234,17 @@ def get_pirate_bay_torrents(keywords, index):
# Converts bytes to human-readable format
size = humanize.naturalsize(int(item.get("size")), binary=True)
torrent = {
'title': name,
'seeds': seeders,
'peers': leechers,
'magnet': create_magnet_pirate_bay(info_hash, name),
'size': size
"title": name,
"seeds": seeders,
"peers": leechers,
"magnet": create_magnet_pirate_bay(info_hash, name),
"size": size,
}
torrents.append(torrent)
logging.info(f"Site #{index} - Collected {len(torrents)} torrents")
else:
logging.error(
f"Site #{index} - Failed to scrape Pirate Bay. Status code: {response.status_code}")
f"Site #{index} - Failed to scrape Pirate Bay. Status code: {response.status_code}"
)

return torrents
15 changes: 4 additions & 11 deletions torrscrapper/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,10 @@
path("privacy_policy", views.privacy_policy, name="privacy_policy"),
path("contact_us", views.contact_us, name="contact_us"),
path(
"contact_us/submitform",
views.contact_form_submit,
name="contact_form_submit"),
"contact_us/submitform", views.contact_form_submit, name="contact_form_submit"
),
path("categories/movies", views.movies, name="movies"),
path(
"categories/movies/<int:movie_id>",
views.movies_single,
name="movies_single"),
path("categories/movies/<int:movie_id>", views.movies_single, name="movies_single"),
path("categories/games", views.games, name="games"),
path(
"categories/games/<int:game_id>",
views.games_single,
name="games_single"),
path("categories/games/<int:game_id>", views.games_single, name="games_single"),
]
Loading

0 comments on commit b9b926b

Please sign in to comment.