File tree 4 files changed +1221
-0
lines changed
4 files changed +1221
-0
lines changed Original file line number Diff line number Diff line change
1
+ /Library/Frameworks/Python.framework/Versions/3.5/Python
Original file line number Diff line number Diff line change
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+
4
+ base_url = 'https://www.yelp.com/search?find_desc=Restaurants&find_loc='
5
+ loc = 'Newport+Beach,+CA,+United+States'
6
+ current_page = 0
7
+
8
+ while current_page < 201 :
9
+ print (current_page )
10
+ url = base_url + loc + "&start=" + str (current_page )
11
+ yelp_r = requests .get (url )
12
+ yelp_soup = BeautifulSoup (yelp_r .text , 'html.parser' )
13
+ businesses = yelp_soup .findAll ('div' , {'class' : 'biz-listing-large' })
14
+ file_path = 'yelp-{loc}-2.txt' .format (loc = loc )
15
+ with open (file_path , "a" ) as textfile :
16
+ businesses = yelp_soup .findAll ('div' , {'class' : 'biz-listing-large' })
17
+ for biz in businesses :
18
+ #print(biz)
19
+ title = biz .findAll ('a' , {'class' : 'biz-name' })[0 ].text
20
+ print (title )
21
+ second_line = ""
22
+ first_line = ""
23
+ try :
24
+ address = biz .findAll ('address' )[0 ].contents
25
+ for item in address :
26
+ if "br" in str (item ):
27
+ #print(item.getText())
28
+ second_line += item .getText ().strip (" \n \t \r " )
29
+ else :
30
+ #print(item.strip(" \n\t\r"))
31
+ first_line = item .strip (" \n \t \r " )
32
+ print (first_line )
33
+ print (second_line )
34
+ except :
35
+ pass
36
+ print ('\n ' )
37
+ try :
38
+ phone = biz .findAll ('span' , {'class' : 'biz-phone' })[0 ].getText ().strip (" \n \t \r " )
39
+ except :
40
+ phone = None
41
+ print (phone )
42
+ page_line = "{title}\n {address_1}\n {address_2}\n {phone}\n \n " .format (
43
+ title = title ,
44
+ address_1 = first_line ,
45
+ address_2 = second_line ,
46
+ phone = phone
47
+ )
48
+ textfile .write (page_line )
49
+ current_page += 10
50
+
51
+
52
+
53
+ '''
54
+ Working with Django
55
+ obj = SomeModel()
56
+ obj.title = title
57
+ obj.line_1 = first_line
58
+ obj.save()
59
+ '''
60
+
61
+
62
+
63
+
Original file line number Diff line number Diff line change
1
+ beautifulsoup4 == 4.5.1
2
+ requests == 2.11.1
You can’t perform that action at this time.
0 commit comments