Skip to content

Commit e1f9611

Browse files
Day 24 - Web Scraping Part 4
1 parent 35b86ae commit e1f9611

File tree

4 files changed

+1221
-0
lines changed

4 files changed

+1221
-0
lines changed

Day 24/scrape/.Python

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/Library/Frameworks/Python.framework/Versions/3.5/Python

Day 24/scrape/code/scrape.py

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
4+
base_url = 'https://www.yelp.com/search?find_desc=Restaurants&find_loc='
5+
loc = 'Newport+Beach,+CA,+United+States'
6+
current_page = 0
7+
8+
while current_page < 201:
9+
print(current_page)
10+
url = base_url + loc + "&start=" + str(current_page)
11+
yelp_r = requests.get(url)
12+
yelp_soup = BeautifulSoup(yelp_r.text, 'html.parser')
13+
businesses = yelp_soup.findAll('div', {'class': 'biz-listing-large'})
14+
file_path = 'yelp-{loc}-2.txt'.format(loc=loc)
15+
with open(file_path, "a") as textfile:
16+
businesses = yelp_soup.findAll('div', {'class': 'biz-listing-large'})
17+
for biz in businesses:
18+
#print(biz)
19+
title = biz.findAll('a', {'class': 'biz-name'})[0].text
20+
print(title)
21+
second_line = ""
22+
first_line = ""
23+
try:
24+
address = biz.findAll('address')[0].contents
25+
for item in address:
26+
if "br" in str(item):
27+
#print(item.getText())
28+
second_line += item.getText().strip(" \n\t\r")
29+
else:
30+
#print(item.strip(" \n\t\r"))
31+
first_line = item.strip(" \n\t\r")
32+
print(first_line)
33+
print(second_line)
34+
except:
35+
pass
36+
print('\n')
37+
try:
38+
phone = biz.findAll('span', {'class': 'biz-phone'})[0].getText().strip(" \n\t\r")
39+
except:
40+
phone = None
41+
print(phone)
42+
page_line = "{title}\n{address_1}\n{address_2}\n{phone}\n\n".format(
43+
title=title,
44+
address_1=first_line,
45+
address_2=second_line,
46+
phone = phone
47+
)
48+
textfile.write(page_line)
49+
current_page += 10
50+
51+
52+
53+
'''
54+
Working with Django
55+
obj = SomeModel()
56+
obj.title = title
57+
obj.line_1 = first_line
58+
obj.save()
59+
'''
60+
61+
62+
63+

Day 24/scrape/requirements.txt

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
beautifulsoup4==4.5.1
2+
requests==2.11.1

0 commit comments

Comments
 (0)