Skip to content

Commit

Permalink
chore: run autopep8
Browse files Browse the repository at this point in the history
  • Loading branch information
iZooGooD committed Feb 26, 2024
1 parent 1030057 commit bacb4d3
Show file tree
Hide file tree
Showing 11 changed files with 135 additions and 67 deletions.
2 changes: 1 addition & 1 deletion torrscrapper/constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
class SiteURLs:
PIRATE_BAY_BASE_URL = "https://pirate-proxy.dad/newapi/q.php?"
X1337_BASE_URL = "https://1337x.unblockit.ing"
X1337_BASE_URL = "https://1337x.unblockit.ing"
9 changes: 6 additions & 3 deletions torrscrapper/migrations/0001_initial.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,18 @@ class Migration(migrations.Migration):
migrations.CreateModel(
name='Movies',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('id', models.AutoField(auto_created=True,
primary_key=True, serialize=False, verbose_name='ID')),
('title', models.TextField()),
('image_url', models.TextField()),
('release_date', models.CharField(max_length=20)),
('synopsis', models.TextField()),
('quality_720p', models.TextField(default='NULL')),
('quality_720p_size', models.CharField(default='NULL', max_length=20)),
('quality_720p_size', models.CharField(
default='NULL', max_length=20)),
('quality_1080p', models.TextField(default='NULL')),
('quality_1080p_size', models.CharField(default='NULL', max_length=20)),
('quality_1080p_size', models.CharField(
default='NULL', max_length=20)),
],
),
]
Binary file modified torrscrapper/migrations/__pycache__/0001_initial.cpython-39.pyc
Binary file not shown.
39 changes: 20 additions & 19 deletions torrscrapper/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,28 @@


class Movies(models.Model):
title=models.TextField()
image_url=models.TextField()
release_date=models.CharField(max_length=20)
synopsis=models.TextField()
quality_720p=models.TextField(default='NULL')
quality_720p_size=models.CharField(max_length=20,default='NULL')
quality_1080p=models.TextField(default='NULL')
quality_1080p_size=models.CharField(max_length=20,default='NULL')
title = models.TextField()
image_url = models.TextField()
release_date = models.CharField(max_length=20)
synopsis = models.TextField()
quality_720p = models.TextField(default='NULL')
quality_720p_size = models.CharField(max_length=20, default='NULL')
quality_1080p = models.TextField(default='NULL')
quality_1080p_size = models.CharField(max_length=20, default='NULL')


class Games(models.Model):
title=models.TextField()
image_url=models.TextField()
release_date=models.CharField(max_length=20)
description=models.TextField()
size=models.CharField(max_length=40)
developer=models.CharField(max_length=100)
magnet=models.TextField()
title = models.TextField()
image_url = models.TextField()
release_date = models.CharField(max_length=20)
description = models.TextField()
size = models.CharField(max_length=40)
developer = models.CharField(max_length=100)
magnet = models.TextField()


class Contact(models.Model):
name=models.CharField(max_length=100)
email=models.EmailField()
subject=models.CharField(max_length=200)
message=models.CharField(max_length=500)
name = models.CharField(max_length=100)
email = models.EmailField()
subject = models.CharField(max_length=200)
message = models.CharField(max_length=500)
62 changes: 44 additions & 18 deletions torrscrapper/scraping_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
logger.setLevel(logging.INFO)

# Formatter for the log messages
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
formatter = logging.Formatter(
'%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')

# File handler for outputting log messages to a file
file_handler = logging.FileHandler('scraping_logs.log', encoding='utf-8')
Expand All @@ -33,6 +35,7 @@
## global variables
scraper = cloudscraper.create_scraper(browser='chrome')


def scrape_data(keywords, selected_sites):
site_scrapers = {
'pirate_bay': get_pirate_bay_torrents,
Expand All @@ -53,79 +56,93 @@ def scrape_data(keywords, selected_sites):
if site_key in site_scrapers:
scraper_function = site_scrapers[site_key]
start_time = time.time()
logging.info(f"🌐 Site #{index} - Starting scraping for site: {site_key}")
logging.info(
f"🌐 Site #{index} - Starting scraping for site: {site_key}")

# Append results from each site to the combined_results list
combined_results.extend(scraper_function(keywords, index))

end_time = time.time()
time_taken = end_time - start_time
logging.info(f"Site #{index} - Completed scraping. Time taken: {time_taken:.2f} seconds")
logging.info(
f"Site #{index} - Completed scraping. Time taken: {time_taken:.2f} seconds")
logging.info(f"Site #{index} - --------------------------------")

overall_end_time = time.time()
overall_time_taken = overall_end_time - overall_start_time
logging.info(f"🎉 Ending the scraping session. Total time taken: {overall_time_taken:.2f} seconds")
logging.info(
f"🎉 Ending the scraping session. Total time taken: {overall_time_taken:.2f} seconds")
logging.info(f"Overall collected {len(combined_results)} torrents")
logging.info("---------------------------------------------------")

return sort_torrents_by_seeds(combined_results)

# Function to sort torrents by the number of seeds


def sort_torrents_by_seeds(torrents):
return sorted(torrents, key=lambda x: int(x['seeds']), reverse=True)


async def fetch_magnet(session, magnet_url, torrent):
async with session.get(magnet_url) as response:
if response.status == 200:
magnet_content = await response.read()
magnet_soup = BeautifulSoup(magnet_content, 'html.parser')
magnet_link = magnet_soup.find('a', href=lambda href: href and 'magnet:?' in href)
magnet_link = magnet_soup.find(
'a', href=lambda href: href and 'magnet:?' in href)
if magnet_link:
torrent['magnet'] = magnet_link.get('href')
else:
torrent['magnet'] = ""
else:
print(f"Error fetching magnet link for {torrent['title']}")


async def get_1337x_torrents_async(keywords, torrents):
async with aiohttp.ClientSession() as session:
tasks = []
for torrent in torrents:
magnet_url = torrent['magnet']
if magnet_url:
task = asyncio.create_task(fetch_magnet(session, magnet_url, torrent))
task = asyncio.create_task(
fetch_magnet(session, magnet_url, torrent))
tasks.append(task)

# Limiting the number of parallel requests to 5 for now
chunk_size = 5
for i in range(0, len(tasks), chunk_size):
await asyncio.gather(*tasks[i:i + chunk_size])


def get_1337x_torrents(keywords, index):
torrents = []
search_url = SiteURLs.X1337_BASE_URL + '/search/' + keywords + '/1/'
try:
response = scraper.get(search_url, timeout=10) # Add timeout of 10 seconds
# Add timeout of 10 seconds
response = scraper.get(search_url, timeout=10)
parsed_url = urlparse(response.url)
query_params = parse_qs(parsed_url.query)
url_status_code = int(query_params.get('status', [None])[0])
if url_status_code:
url_status_code = int(url_status_code)
if response.status_code == 200 and url_status_code is not None and url_status_code != 403:
logging.info(f"Site #{index} - Initial request to site {search_url} was successful")
logging.info(
f"Site #{index} - Initial request to site {search_url} was successful")
soup = BeautifulSoup(response.content, 'html.parser')
rows = soup.find_all('tr')
for row in rows:
cols = row.find_all('td')
if cols:
name_col = cols[0].find_all('a', href=True)
if len(name_col) >= 2 and name_col[1]['href'].startswith('/torrent/'):
if len(name_col) >= 2 and name_col[1]['href'].startswith(
'/torrent/'):
name = name_col[1].text.strip()
href = SiteURLs.X1337_BASE_URL + name_col[1]['href']
seeds = cols[1].text
leeches = cols[2].text
size_element = cols[4].find(text=True, recursive=False).strip()
size_element = cols[4].find(
text=True, recursive=False).strip()
size = size_element if size_element else None
torrent = {
'title': name,
Expand All @@ -136,19 +153,23 @@ def get_1337x_torrents(keywords, index):
}
torrents.append(torrent)

asyncio.run(get_1337x_torrents_async(keywords, torrents)) # Call the asynchronous function
# Call the asynchronous function
asyncio.run(get_1337x_torrents_async(keywords, torrents))
logging.info(f"Site #{index} - Collected {len(torrents)} torrents")
return torrents
else:
if url_status_code:
logging.error(f"Failed to scrape 1337x. Status code: {url_status_code}")
logging.error(
f"Failed to scrape 1337x. Status code: {url_status_code}")
else:
logging.error(f"Failed to scrape 1337x. Status code: {response.status_code}")
logging.error(
f"Failed to scrape 1337x. Status code: {response.status_code}")
return []
except cloudscraper.requests.exceptions.ConnectionError as e:
logging.error(f"Connection error occurred: {str(e)}")
return []


def create_magnet_pirate_bay(info_hash, name):
"""
Generates a magnet link for a torrent from The Pirate Bay.
Expand Down Expand Up @@ -178,25 +199,29 @@ def create_magnet_pirate_bay(info_hash, name):
'udp://open.stealth.si:80/announce'
]

tracker_str = ''.join(['&tr=' + urllib.parse.quote(tracker) for tracker in trackers])
tracker_str = ''.join(['&tr=' + urllib.parse.quote(tracker)
for tracker in trackers])
magnet_link = f'magnet:?xt=urn:btih:{info_hash}&dn={urllib.parse.quote(name)}{tracker_str}'

return magnet_link


def get_pirate_bay_torrents(keywords, index):
torrents = []
search_url = SiteURLs.PIRATE_BAY_BASE_URL + 'q=' + keywords
response = scraper.get(search_url)

if response.status_code == 200:
logging.info(f"Site #{index} - Initial request to site {search_url} was successful")
logging.info(
f"Site #{index} - Initial request to site {search_url} was successful")
json_data = response.json()
for item in json_data:
name = item.get("name")
info_hash = item.get("info_hash")
seeders = item.get("seeders")
leechers = item.get("leechers")
size = humanize.naturalsize(int(item.get("size")), binary=True) # Converts bytes to human-readable format
# Converts bytes to human-readable format
size = humanize.naturalsize(int(item.get("size")), binary=True)
torrent = {
'title': name,
'seeds': seeders,
Expand All @@ -207,6 +232,7 @@ def get_pirate_bay_torrents(keywords, index):
torrents.append(torrent)
logging.info(f"Site #{index} - Collected {len(torrents)} torrents")
else:
logging.error(f"Site #{index} - Failed to scrape Pirate Bay. Status code: {response.status_code}")
logging.error(
f"Site #{index} - Failed to scrape Pirate Bay. Status code: {response.status_code}")

return torrents
return torrents
33 changes: 21 additions & 12 deletions torrscrapper/urls.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
from django.urls import path,include
from django.urls import path, include
from . import views

urlpatterns=[
path("",views.index,name="index"),
path("searchTorrents",views.searchTorrents,name="searchTorrents"),
path("dmca",views.dmca,name="dmca"),
path("privacy_policy",views.privacy_policy,name="privacy_policy"),
path("contact_us",views.contact_us,name="contact_us"),
path("contact_us/submitform",views.contact_form_submit,name="contact_form_submit"),
path("categories/movies",views.movies,name="movies"),
path("categories/movies/<int:movie_id>",views.movies_single,name="movies_single"),
path("categories/games",views.games,name="games"),
path("categories/games/<int:game_id>",views.games_single,name="games_single"),
urlpatterns = [
path("", views.index, name="index"),
path("searchTorrents", views.searchTorrents, name="searchTorrents"),
path("dmca", views.dmca, name="dmca"),
path("privacy_policy", views.privacy_policy, name="privacy_policy"),
path("contact_us", views.contact_us, name="contact_us"),
path(
"contact_us/submitform",
views.contact_form_submit,
name="contact_form_submit"),
path("categories/movies", views.movies, name="movies"),
path(
"categories/movies/<int:movie_id>",
views.movies_single,
name="movies_single"),
path("categories/games", views.games, name="games"),
path(
"categories/games/<int:game_id>",
views.games_single,
name="games_single"),
]
Loading

0 comments on commit bacb4d3

Please sign in to comment.