Skip to content

Commit 3c313d2

Browse files
author
Daniel Tharp
committed
Bring current, turning off logs.
1 parent 5b0c5c4 commit 3c313d2

11 files changed

+94
-81
lines changed

2stacks-get-forum-categories.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
import os
1010

1111
#temp
12-
import logging
13-
logger = logging.getLogger()
14-
logger.setLevel(logging.INFO)
12+
# import logging
13+
# logger = logging.getLogger()
14+
# logger.setLevel(logging.INFO)
1515

1616
def lambda_handler(event, context):
1717
for record in event['Records']:
@@ -57,6 +57,6 @@ def lambda_handler(event, context):
5757
# Send everything to SCUTTLE
5858
headers = {"Authorization": "Bearer " + config.scuttle_token, "Content-Type": "application/json"}
5959
r = requests.put(callback_url + '/2stacks/forum/metadata', data=output, headers=headers)
60-
if r.status_code == 500:
61-
logger.info('500:')
62-
logger.info(r.text)
60+
# if r.status_code == 500:
61+
# logger.info('500:')
62+
# logger.info(r.text)

2stacks-get-forum-threads.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@
44
import config
55
import helpers
66

7-
import logging
8-
logger = logging.getLogger()
9-
logger.setLevel(logging.INFO)
7+
# import logging
8+
# logger = logging.getLogger()
9+
# logger.setLevel(logging.INFO)
1010

1111
def lambda_handler(event, context):
1212
for record in event['Records']:
1313
callback_url = record['messageAttributes']['callback_url']['stringValue']
1414
forum_id = record['messageAttributes']['forum_id']['stringValue']
1515
wikidot_site = record['messageAttributes']['wikidot_site']['stringValue']
16-
logger.info('Fetching forum ' + forum_id + ' for ' + wikidot_site)
16+
# logger.info('Fetching forum ' + forum_id + ' for ' + wikidot_site)
1717

1818
page_no = 1
1919

@@ -35,7 +35,7 @@ def lambda_handler(event, context):
3535
pages = re.findall('(?:<span class="pager-no">페이지: 1 / )(\d*)', haystack) # This technically returns 2 indistinguishable objects because Wikidot.
3636
else: # SCP-EN and English-speaking wikis (Some -INT sites didn't have this translated, like -RU, -UA, -CN...)
3737
pages = re.findall('(?:<span class="pager-no">page 1 of )(\d*)', haystack) # This technically returns 2 indistinguishable objects because Wikidot.
38-
logger.info('There are ' + str(pages) + ' pages of threads to look through.')
38+
# logger.info('There are ' + str(pages) + ' pages of threads to look through.')
3939
except: # This only really fails on a deleted page.
4040
# TODO Make scuttle handle this.
4141
return False
@@ -62,7 +62,7 @@ def lambda_handler(event, context):
6262
return False
6363
payload = {"wd_forum_id": forum_id, "threads": threads}
6464
output = json.dumps(payload)
65-
logger.info('Sending page ' + str(page_no) + ' to SCUTTLE')
65+
# logger.info('Sending page ' + str(page_no) + ' to SCUTTLE')
6666
# Send everything to SCUTTLE
6767
headers = {"Authorization": "Bearer " + config.scuttle_token, "Content-Type": "application/json"}
6868
r = requests.put(callback_url + '/2stacks/forum/threads', data=output, headers=headers)

2stacks-get-page-files.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
from time import sleep
1010
from xmlrpc.client import ServerProxy
1111

12-
import logging
13-
logger = logging.getLogger()
14-
logger.setLevel(logging.INFO)
12+
# import logging
13+
# logger = logging.getLogger()
14+
# logger.setLevel(logging.INFO)
1515

1616
def lambda_handler(event, context):
1717
for record in event['Records']:
@@ -60,7 +60,7 @@ def lambda_handler(event, context):
6060
else:
6161
pass
6262
for idx, file in enumerate(files):
63-
logger.info(idx)
63+
# logger.info(idx)
6464
sanitizedfilename = urllib.parse.quote(file, safe='')
6565
info = s.files.get_meta({"site": wikidot_site, "page": slug, "files": [file]})
6666
# logger.info(info[file]['download_url'])
@@ -85,9 +85,9 @@ def lambda_handler(event, context):
8585
headers = {"Authorization": "Bearer " + config.scuttle_token, "Content-Type": "application/json"}
8686
j = json.dumps(payload)
8787
r = requests.put(callback_url + '/2stacks/page/files', data=j, headers=headers)
88-
if r.status_code == 500:
89-
logger.info('500:')
90-
logger.info(r.text)
88+
# if r.status_code == 500:
89+
# logger.info('500:')
90+
# logger.info(r.text)
9191
# Be Nice
9292
sleep(0.25)
9393

2stacks-get-page-metadata.py

+28-14
Original file line numberDiff line numberDiff line change
@@ -5,47 +5,61 @@
55
import config
66
import helpers
77
import requests
8+
import xmlrpc.client
89
from time import sleep
910
from xmlrpc.client import ServerProxy
1011

1112
#temp
12-
import logging
13-
logger = logging.getLogger()
14-
logger.setLevel(logging.INFO)
13+
# import logging
14+
# logger = logging.getLogger()
15+
# logger.setLevel(logging.INFO)
1516

1617
def lambda_handler(event, context):
1718
for record in event['Records']:
1819
callback_url = record['messageAttributes']['callback_url']['stringValue']
1920
slug = record['messageAttributes']['page_slug']['stringValue']
2021
wikidot_site = record['messageAttributes']['wikidot_site']['stringValue']
21-
logger.info('get-page-metadata for ' + slug)
22+
# logger.info('get-page-metadata for ' + slug)
2223
# Hit Wikidot's API
2324
s = ServerProxy('https://' + config.wikidot_username + ':' + config.wikidot_api_key + '@www.wikidot.com/xml-rpc-api.php')
2425
try:
2526
wp = s.pages.get_one({'site': wikidot_site, 'page': slug})
26-
except:
27-
# Page has been deleted
28-
return False # Clean this up later.
27+
except xmlrpc.client.Fault as err:
28+
if err.faultCode == 403:
29+
# Certain pages can be forbidden from access via the API. Yes, this is dumb.
30+
# We'll get the page ID and send that to SCUTTLE
31+
wdpage = requests.get('http://' + wikidot_site + ".wikidot.com/" + slug + "/norender/true")
32+
# logger.info(wdpage)
33+
wd_page_id = re.search("(?:WIKIREQUEST.info.pageId = )([^;]*)", wdpage.text).group(1)
34+
payload = json.dumps({"slug": slug, "wd_page_id": wd_page_id, "api_status": 403})
35+
# Let SCUTTLE know we're not gonna get everything.
36+
# logger.info('Page protected 403, ending early, making SCUTTLE request at ' + callback_url)
37+
headers = {"Authorization": "Bearer " + config.scuttle_token, "Content-Type": "application/json"}
38+
r = requests.put(callback_url + '/2stacks/page/metadata', data=payload, headers=headers)
39+
return { "job": "complete-403" }
40+
# logger.info(r.text)
41+
# return r.text
2942

3043
# Be Nice
3144
sleep(0.25)
3245

3346
# Hit Wikidot's Frontend
34-
wdpage = requests.get('http://' + wikidot_site + ".wikidot.com/" + slug)
35-
logger.info(wdpage)
47+
wdpage = requests.get('http://' + wikidot_site + ".wikidot.com/" + slug + "/norender/true")
48+
# logger.info(wdpage.text)
3649

3750
try:
3851
wd_page_id = re.search("(?:WIKIREQUEST.info.pageId = )([^;]*)", wdpage.text).group(1)
39-
logger.info(wd_page_id)
52+
# logger.info(wd_page_id)
4053
except AttributeError: # The page we got from Wikidot doesn't have a page ID.
4154
# This can happen if they're returning a 5XX error, or the page has been deleted.
4255
# TODO: Look at wdpage for a 200 status to better determine why it's crapping out.
4356
for i in range(5):
4457
sleep(1) # Give wikidot a chance.
4558
try:
46-
wd_page_id = re.search("(?:WIKIREQUEST.info.pageId = )([^;]*)", wdpage).group(1)
59+
wd_page_id = re.search("(?:WIKIREQUEST.info.pageId = )([^;]*)", wdpage.text).group(1)
4760
except AttributeError:
48-
continue
61+
# It gone.
62+
return {"Slug": slug, "status": "No_page_ID"}
4963
else:
5064
pass
5165

@@ -64,9 +78,9 @@ def lambda_handler(event, context):
6478
# Send everything to SCUTTLE
6579
headers = {"Authorization": "Bearer " + config.scuttle_token, "Content-Type": "application/json"}
6680
j = json.dumps(payload)
67-
logger.info('Making SCUTTLE request at ' + callback_url)
81+
# logger.info('Making SCUTTLE request at ' + callback_url)
6882
r = requests.put(callback_url + '/2stacks/page/metadata', data=j, headers=headers)
69-
logger.info(r.text)
83+
# logger.info(r.text)
7084
return {
7185
'job': 'complete'
7286
}

2stacks-get-page-revisions.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,18 @@
99
import os
1010

1111
#temp
12-
import logging
13-
logger = logging.getLogger()
14-
logger.setLevel(logging.INFO)
12+
# import logging
13+
# logger = logging.getLogger()
14+
# logger.setLevel(logging.INFO)
1515

1616

1717
def lambda_handler(event, context):
1818
for record in event['Records']:
1919
callback_url = record['messageAttributes']['callback_url']['stringValue']
2020
wd_page_id = record['messageAttributes']['page_id']['stringValue']
2121
wikidot_site = record['messageAttributes']['wikidot_site']['stringValue']
22-
logger.info(wikidot_site)
23-
logger.info(wd_page_id)
22+
# logger.info(wikidot_site)
23+
# logger.info(wd_page_id)
2424

2525
data = {'page_id': wd_page_id, 'moduleName': 'history/PageRevisionListModule', 'perpage': 99999}
2626
haystack = helpers.fetch(data, wikidot_site)
@@ -57,10 +57,10 @@ def lambda_handler(event, context):
5757
revision_type[idx] = ''.join(revision) # Flatten the tuple to one string object.
5858

5959
innerpayload = {}
60-
logger.info(str(len(revision_ids)) + " revisions.")
61-
logger.info(str(len(revision_type)) + " revision type rows.")
62-
for row in range(len(revision_type)):
63-
logger.info(revision_type[row])
60+
# logger.info(str(len(revision_ids)) + " revisions.")
61+
# logger.info(str(len(revision_type)) + " revision type rows.")
62+
# for row in range(len(revision_type)):
63+
# logger.info(revision_type[row])
6464
for row in range(len(revision_ids)):
6565
# logger.info("Processing revision " + revision_numbers[row])
6666
# We need to handle some edge cases for deleted and anonymous users.
@@ -85,9 +85,9 @@ def lambda_handler(event, context):
8585
# Send everything to SCUTTLE
8686
headers = {"Authorization": "Bearer " + config.scuttle_token, "Content-Type": "application/json"}
8787
r = requests.put(callback_url + '/2stacks/page/revisions', data=output, headers=headers)
88-
if r.status_code == 500:
89-
logger.info('500:')
90-
logger.info(r.text)
88+
# if r.status_code == 500:
89+
# logger.info('500:')
90+
# logger.info(r.text)
9191

9292
return {
9393
'job': 'complete'

2stacks-get-page-thread-id.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
import os
1010

1111
#temp
12-
import logging
13-
logger = logging.getLogger()
14-
logger.setLevel(logging.INFO)
12+
# import logging
13+
# logger = logging.getLogger()
14+
# logger.setLevel(logging.INFO)
1515

1616

1717
def lambda_handler(event, context):
@@ -22,7 +22,7 @@ def lambda_handler(event, context):
2222

2323
data = {'pageId': wd_page_id, 'moduleName': 'forum/ForumCommentsListModule'}
2424
haystack = helpers.fetch(data, wikidot_site)
25-
logger.info(haystack)
25+
# logger.info(haystack)
2626
try:
2727
thread_id = re.search('(?:forumThreadId = )(\d*)', haystack).group(1)
2828
except: # This only really fails on a deleted page.

2stacks-get-revision-content.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -9,33 +9,33 @@
99
import os
1010

1111
#temp
12-
import logging
13-
logger = logging.getLogger()
14-
logger.setLevel(logging.INFO)
12+
# import logging
13+
# logger = logging.getLogger()
14+
# logger.setLevel(logging.INFO)
1515

1616

1717
def lambda_handler(event, context):
1818
for record in event['Records']:
19-
logger.info(record['messageAttributes'])
19+
# logger.info(record['messageAttributes'])
2020
callback_url = record['messageAttributes']['callback_url']['stringValue']
2121
wd_revision_id = record['messageAttributes']['revision_id']['stringValue']
2222
wikidot_site = record['messageAttributes']['wikidot_site']['stringValue']
2323
wd_url = record['messageAttributes']['wikidot_url']['stringValue']
2424

25-
logger.info(wd_revision_id)
26-
logger.info(wikidot_site)
25+
# logger.info(wd_revision_id)
26+
# logger.info(wikidot_site)
2727

2828
data = {'revision_id': wd_revision_id, 'moduleName': 'history/PageSourceModule'}
2929
haystack = helpers.fetch(data, wd_url)
3030
if haystack is None:
3131
return { 'revision': 'deleted ' }
3232
else:
33-
logger.info('haystack:')
34-
logger.info(haystack)
33+
# logger.info('haystack:')
34+
# logger.info(haystack)
3535
content = re.search('(?:<div class="page-source">)(.*)(?:<\/div>$)', haystack, re.DOTALL).group(1)
3636
payload = {"wd_revision_id": str(wd_revision_id), "content": content}
3737
output = json.dumps(payload)
38-
logger.info("got output")
38+
# logger.info("got output")
3939
# Send everything to SCUTTLE
4040
headers = {"Authorization": "Bearer " + config.scuttle_token, "Content-Type": "application/json"}
4141
r = requests.put(callback_url + '/2stacks/revision/content', data=output, headers=headers)

2stacks-get-thread-posts.py

+13-13
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
from bs4 import BeautifulSoup
1111

1212
#temp
13-
import logging
14-
logger = logging.getLogger()
15-
logger.setLevel(logging.INFO)
13+
# import logging
14+
# logger = logging.getLogger()
15+
# logger.setLevel(logging.INFO)
1616

1717

1818
def lambda_handler(event, context):
@@ -54,7 +54,7 @@ def lambda_handler(event, context):
5454

5555
# Get the OP of the thread. This is Wikidot for a per-page discussion thread or a user id otherwise.
5656
attribution = descriptionblock.find("span", {"class": "printuser"})
57-
logger.info(attribution)
57+
# logger.info(attribution)
5858
if attribution.string == "Wikidot":
5959
op_user_id = 0
6060
op_username = "Wikidot"
@@ -99,7 +99,7 @@ def lambda_handler(event, context):
9999
# Let's handle things the same way for one page or many.
100100
for page in range(maxpages):
101101
actualpage = page + 1
102-
logger.info('On Page ' + str(actualpage))
102+
# logger.info('On Page ' + str(actualpage))
103103
innerpayload = {}
104104
haystack = get_thread_page(thread=wd_thread_id, page=actualpage, wikidot_site=wikidot_site) # I'm too lazy to not just increment this range by one to make it work.
105105
soup = BeautifulSoup(haystack.replace("\\","")[2:], 'html.parser')
@@ -108,7 +108,7 @@ def lambda_handler(event, context):
108108
# logger.info(len(posts))
109109
for idx, post in enumerate(posts):
110110
wd_post_id = int(re.search('(?:<div class="post" id="post-)(\d*)', str(post)).group(1))
111-
logger.info("Post " + str(idx) + ", ID " + str(wd_post_id))
111+
# logger.info("Post " + str(idx) + ", ID " + str(wd_post_id))
112112
subject = re.search('(?:<div class="title" id="post-title-\d*">\s*)([^\n]*)', str(post)).group(1)
113113
# On a blank subject this returns as "</div>"
114114
if subject == "</div>":
@@ -117,10 +117,10 @@ def lambda_handler(event, context):
117117
username = re.search('(?:return false;">)([^<]*)(?:<\/a><\/span>,)', str(post)).group(1)
118118
wd_user_id = int(re.search('(?:www\.wikidot\.com\/userkarma.php\?u=)([^\)]*)', str(post)).group(1))
119119
except AttributeError: #NoneType, deleted user.
120-
logger.info('thread:')
121-
logger.info(wd_thread_id)
122-
logger.info('post:')
123-
logger.info(wd_post_id)
120+
# logger.info('thread:')
121+
# logger.info(wd_thread_id)
122+
# logger.info('post:')
123+
# logger.info(wd_post_id)
124124
try:
125125
wd_user_id = int(re.search('(?:data-id=")(\d*)', str(post)).group(1))
126126
username = "Deleted Account " + str(wd_user_id)
@@ -135,7 +135,7 @@ def lambda_handler(event, context):
135135
wd_user_id = 0
136136
except AttributeError: # This is getting ridiculous. More guest account types.
137137
try:
138-
logger.info(str(post))
138+
# logger.info(str(post))
139139
username = re.search('(?:&amp;default=http:\/\/www.wikidot.com/common--images/avatars/default/a16.png&amp;size=16"\/><\/a>)([^>]*)(?:<\/span>,)', str(post)).group(1)
140140
wd_user_id = 0
141141
except AttributeError:
@@ -188,7 +188,7 @@ def lambda_handler(event, context):
188188
# logger.info('text is a ')
189189
# logger.info(type(body))
190190
# While we could wait and send one big payload, that's a risky proposition on threads with lots of posts so let's not.
191-
logger.info('out of the loop for a single page')
191+
# logger.info('out of the loop for a single page')
192192

193193
# Wrap the payload and send it, SCUTTLE can sort out posts it already has.
194194
outerpayload = {"wd_thread_id": int(wd_thread_id), "wd_forum_id": forum,
@@ -215,7 +215,7 @@ def lambda_handler(event, context):
215215
output = json.dumps(outerpayload)
216216
headers = {"Authorization": "Bearer " + config.scuttle_token, "Content-Type": "application/json"}
217217
r = requests.put(callback_url + '/2stacks/thread/posts', data=output, headers=headers)
218-
logger.info('Made a SCUTTLE Request!')
218+
# logger.info('Made a SCUTTLE Request!')
219219
# logger.info('DATA: ')
220220
# logger.info(outerpayload)
221221

0 commit comments

Comments
 (0)