|
| 1 | +#pip install beautifulsoup4 |
| 2 | + |
| 3 | +import email |
| 4 | +import imaplib #imap pop |
| 5 | +from bs4 import BeautifulSoup |
| 6 | + |
| 7 | +import os |
| 8 | +import mimetypes |
| 9 | + |
| 10 | + |
| 11 | + |
| 12 | + |
| 13 | +password = 'iamhungry2016day19' |
| 14 | + |
| 15 | +mail = imaplib.IMAP4_SSL("imap.gmail.com") # https://www.google.com/settings/security/lesssecureapps |
| 16 | +mail.login(username, password) |
| 17 | + |
| 18 | +mail.select("inbox") |
| 19 | + |
| 20 | +#Create new folder |
| 21 | +# mail.create("Item2") |
| 22 | + |
| 23 | +#list Folders |
| 24 | +#mail.list() |
| 25 | + |
| 26 | +result, data = mail.uid('search', None, "ALL") |
| 27 | + |
| 28 | +inbox_item_list = data[0].split() |
| 29 | + |
| 30 | + |
| 31 | +for item in inbox_item_list: |
| 32 | + result2, email_data = mail.uid('fetch', item, '(RFC822)') |
| 33 | + raw_email = email_data[0][1].decode("utf-8") |
| 34 | + email_message = email.message_from_string(raw_email) |
| 35 | + to_ = email_message['To'] |
| 36 | + from_ = email_message['From'] |
| 37 | + subject_ = email_message['Subject'] |
| 38 | + date_ = email_message['date'] |
| 39 | + counter = 1 |
| 40 | + for part in email_message.walk(): |
| 41 | + if part.get_content_maintype() == "multipart": |
| 42 | + continue |
| 43 | + filename = part.get_filename() |
| 44 | + content_type = part.get_content_type() |
| 45 | + if not filename: |
| 46 | + ext = mimetypes.guess_extension(content_type) |
| 47 | + if not ext: |
| 48 | + ext = '.bin' |
| 49 | + if 'text' in content_type: |
| 50 | + ext = '.txt' |
| 51 | + elif 'html' in content_type: |
| 52 | + ext = '.html' |
| 53 | + filename = 'msg-part-%08d%s' %(counter, ext) |
| 54 | + counter += 1 |
| 55 | + #save file |
| 56 | + save_path = os.path.join(os.getcwd(), "emails", date_, subject_) |
| 57 | + if not os.path.exists(save_path): |
| 58 | + os.makedirs(save_path) |
| 59 | + with open(os.path.join(save_path, filename), 'wb') as fp: |
| 60 | + fp.write(part.get_payload(decode=True)) |
| 61 | + |
| 62 | + |
| 63 | + |
| 64 | + |
| 65 | +# if "plain" in content_type: |
| 66 | +# #print(part.get_payload()) |
| 67 | +# pass |
| 68 | +# elif "html" in content_type: |
| 69 | +# html_ = part.get_payload() |
| 70 | +# soup = BeautifulSoup(html_, "html.parser") |
| 71 | +# text = soup.get_text() |
| 72 | +# print(subject_) |
| 73 | +# print(text) |
| 74 | +# else: |
| 75 | +# pass |
| 76 | +# #print(content_type) |
| 77 | +# #email_message.get_payload() |
| 78 | + |
| 79 | + |
| 80 | + |
| 81 | + |
| 82 | + |
| 83 | + |
0 commit comments