-
Notifications
You must be signed in to change notification settings - Fork 0
/
screpin.py
114 lines (86 loc) · 2.53 KB
/
screpin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from operator import itemgetter
import pandas as pd
import requests
from bs4 import BeautifulSoup
import seaborn as sns
import matplotlib.pyplot as plt
# kun_uz = BeautifulSoup(requests.get('https://kun.uz/').text, 'html.parser')
# v = kun_uz.find_all("dev", class_='big-news__content')[0].select_one('div.big-news__description')
# print(v)
def kun_uz(url):
return requests.get(url)
def qator(page):
soup = BeautifulSoup(page.text, "html.parser")
a = soup.find_all("div", class_="single-content")
return a
def aylantirish(html_repos):
result = []
# print(html_repos.text)
for row in html_repos:
kun = ','.join(sorted(row.text.split()))
result.append(kun.split(','))
# print(result)
return result
def toza_data(repositories_data):
res = []
# a = pd.Series(repositories_data)
# print(a)
for x in repositories_data[0]:
if x.startswith("("):
continue
if x.startswith("-"):
continue
if x.startswith("«"):
continue
if x.startswith("Анн"):
continue
if x.startswith("У"):
continue
if x.startswith("НАТОга"):
continue
if x.startswith("НАТОнинг"):
continue
if x.startswith("икки"):
continue
if x.isdigit():
continue
if x == "/":
continue
if x == "ва":
continue
if x == "билан":
continue
if x == "бу":
continue
if x == "учун":
continue
if x == "бир":
continue
if x == "ҳам":
continue
if x == '':
continue
else:
res.append(x)
return res
def data_sanash(data):
sanash = {element: data.count(element) for element in data}
sort = sorted(sanash.items(), key=itemgetter(1), reverse=True)
main = sort[0:16]
return main
def vizual(tayyor):
viz = pd.DataFrame.from_dict(tayyor)
viz = viz.rename(columns={0: 'words', 1: 'count'})
sns.barplot(x='count', y='words', data=viz)
return plt.show()
# return viz
def _main():
url = "https://kun.uz/news/2022/05/17/nato-kengaymoqda-kremlning-reaksiyasi-hayratlanarli-darajada-bosiq-nima-uchun"
page = kun_uz(url)
html_repos = qator(page)
repositories_data = aylantirish(html_repos)
data = toza_data(repositories_data)
tayyor = data_sanash(data)
vizualiz = vizual(tayyor)
print(vizualiz)
_main()