#!/hint/python3
# -*- indent-tabs-mode: t -*-

import os
import os.path
import subprocess
import datetime
import re
import email.utils
#
# Mail attribute utilities

def parse_address(x):
	return email.utils.parseaddr(str(x))[1]
def parse_addresses(x):
	return [p[1] for p in email.utils.getaddresses([str(x)])]

def match_re(x, pat):
	return re.fullmatch(pat, x, re.IGNORECASE)
def match_glob(x, glob):
	if '@' in glob:
		if glob.startswith('@'):
			return x.lower().endswith(glob.lower())
		return x.lower() == glob.lower()
	else:
		return x.lower().endswith('@'+glob.lower()) or x.lower().endswith('.'+glob.lower())

def originator_addresses(mail):
	return [p[1] for p in email.utils.getaddresses([
		str(mail['From']),
		str(mail['Sender']),
		str(mail['Reply-To']),
	]) if p[1] != '']

def destination_addresses(mail):
	return [p[1] for p in email.utils.getaddresses([
		str(mail['To']),
		str(mail['Cc']),
		str(mail['Bcc']),
	]) if p[1] != '']
def all_addresses(mail):
	return originator_addresses(mail) + destination_addresses(mail)

def mailman_domain(mail, domain):
	"""
	Return a string that is the list-name for a mailman domain
	"""
	m = match_re(parse_address(mail["List-Id"]), "(.*)\."+re.escape(domain))
	if m:
		return m.group(1)
	if mail["List-Id"] != "":
		return None
	for addr in destination_addresses(mail):
		m = match_re(addr, "(.*)@"+re.escape(domain))
		if m:
			ret = m.group(1)
			if mail["Subject"].contains("["+ret+"]"):
				return ret
	return None

def ezmlm_domain(mail, domain):
	"""
	Return a string that is the list-name for a ezmlm domain
	"""
	m = match_re(parse_address(mail["List-Id"]), "(.*)\."+re.escape(domain))
	if m:
		return m.group(1)
	if mail["List-Id"] != "":
		return None
	ml = [s.strip() for s in str(mail["Mailing-List"]).split(";")]
	if 'run by ezmlm' in ml:
		contacts = [s[7:].strip() for s in ml if s.startswith('contact ')]
		for contact in contacts:
			m = match_re(contact, "(.+)-help@"+re.escape(domain))
			if m:
				return m.group(1)
	for addr in destination_addresses(mail):
		m = match_re(addr, "(.*)@"+re.escape(domain))
		if m:
			return m.group(1)
	return None

def majordomo_domain(mail, domain):
	"""
	Return a string that is the list-name for a majordomo domain
	"""
	m = match_re(parse_address(mail["List-Id"]), "(.*)\."+re.escape(domain))
	if m:
		return m.group(1)
	if mail["List-Id"] != "":
		return None
	for addr in destination_addresses(mail):
		m = match_re(addr, "(.*)@"+re.escape(domain))
		if m:
			return m.group(1)
	return None

def is_from(mail, address):
	return any(match_glob(addr, address) for addr in originator_addresses(mail))
def is_from_re(mail, address):
	return any(match_re(addr, address) for addr in originator_addresses(mail))

def is_to_or_from(mail, address):
	return any(match_glob(addr, address) for addr in all_addresses(mail))
def is_to_or_from_re(mail, address):
	return any(match_re(addr, address) for addr in all_addresses(mail))
#
# bogofilter utilites

def bogofilter_auto(mail):
	p = subprocess.Popen(
		["bogofilter", "-u", "-v", "-I", mail.path],
		stdout=subprocess.PIPE,
		stderr=subprocess.STDOUT)
	(output, _) = p.communicate()
	processor.log("*** Bogofilter result: {0!r}".format(output.rstrip()))
	if p.returncode not in [0, 1, 2]:
		processor.log_error(
			"Error running bogofilter: Return code = {0!r}".format(
				p.returncode))
	return p.returncode
def bogofilter_ham(mail):
	subprocess.call(["bogofilter", "-S", "-n", "-I", mail.path])
def bogofilter_spam(mail):
	subprocess.call(["bogofilter", "-N", "-s", "-I", mail.path])
#
# The core of my filters

def move_ham(mail, folder):
	y = datetime.datetime.now().year
	folder = "MAIN/Ham."+str(y)+folder
	dir = processor.maildir_base+"/"+folder
	if not os.path.isfile(dir+"/maildirfolder"):
		os.makedirs(dir+"/tmp", 0o777, True)
		os.makedirs(dir+"/new", 0o777, True)
		os.makedirs(dir+"/cur", 0o777, True)
		open(dir+"/maildirfolder", 'a').close()
		subprocess.call(['make', '-C', os.environ['XDG_CONFIG_HOME']])
	mail.move(folder)

def my_whitelist(mail):
	return (
		False
		or is_to_or_from(mail, "opengroup.org")
		or is_to_or_from(mail, "purestorage.com")
		or is_to_or_from(mail, "sourceware.org")
		or is_to_or_from(mail, "vger.kernel.org")
		or is_to_or_from(mail, "Bryan@ChankTunUnGi.onmicrosoft.com")
		or is_to_or_from(mail, "cacnedcomms@gmail.com")
		or is_to_or_from(mail, "fsf.org")
		or is_to_or_from(mail, "gnu.org")
		or is_to_or_from(mail, "parabola.nu")
		or is_to_or_from(mail, "parabolagnulinux.org")
		or is_from(mail, "3174451635@mms.att.net")
		or is_from(mail, "MAILER-DAEMON@yahoo.com")
		or is_from(mail, "careereco.com")
		or is_from(mail, "ciholas.com")
		or is_from(mail, "e.oldnational.com")
		or is_from(mail, "facebookmail.com")
		or is_from(mail, "gandi.net")
		or is_from(mail, "github.com")
		or is_from(mail, "goodwillindy.org")
		or is_from(mail, "kickstarter.com")
		or is_from(mail, "list.cr.yp.to")
		or is_from(mail, "lpi.org")
		or is_from(mail, "lulzbot.com")
		or is_from(mail, "mail.scribd.com")
		or is_from(mail, "massdrop.com")
		or is_from(mail, "msdlt.k12.in.us")
		or is_from(mail, "parabola.nu")
		or is_from(mail, "post.oreilly.com")
		or is_from(mail, "scouting.org")
		or is_from(mail, "solutionsinplastic.com")
		or is_from(mail, "startcom.org")
		or is_from(mail, "usfirst.org")
		or is_from(mail, "vectren.com")
		or is_from(mail, "vectrenemail.com")
		or is_from(mail, "wolframalpha.com")
		or is_from(mail, "Promo@email.newegg.com")
		or is_from(mail, "info@email2.mysimplemobile.com")
		or is_from(mail, "margieshu@sbcglobal.net")
		or is_from(mail, "parabolagnulinux.org")
		or is_from(mail, "gandi.net")
		or match_re(parse_address(mail["List-Id"]), ".*\.(gnu|gnome|archlinux|parabolagnulinuxlibre|fedorahosted)\.org")
		or match_re(parse_address(mail["List-Id"]), ".*\.parabola\.nu")
		or mail["Subject"].contains("[Dev]")
		or mail["Subject"].contains("[Maintenance]")
		or mail["Subject"].contains("[PATCH")
		or mail["Subject"].contains("[systemd-devel]")
		)

def my_filters(mail):
	if mail["From"].contains("Parabola Website Notification <nobody@parabola.nu>"):
		move_ham(mail, ".software.parabola.dev.web-notif")
		return

	# .software.POSIX (custom mlm, I think)
	if is_to_or_from(mail, "austin-group-l@opengroup.org"):
		move_ham(mail, ".software.POSIX")
		return
	# .software.* (GNU Mailman)
	for pair in [
			[ 'archlinux.org'                 , 'archlinux'           ], # @sbcglobal.net and @lukeshu.com ; problems delivering to Yahoo!
			[ 'gnome.org'                     , 'gnome'               ], # https://mail.gnome.org/mailman/options/networkmanager-list/lukeshu@lukeshu.com
			[ 'gnu.org'                       , 'gnu'                 ], # https://lists.gnu.org/mailman/options/bug-librejs/lukeshu@lukeshu.com
			[ 'lists.arthurdejong.org'        , 'arthurdejong'        ],
			[ 'lists.fedorahosted.org'        , 'fedorahosted'        ],
			[ 'lists.freedesktop.org'         , 'freedesktop'         ], # https://lists.freedesktop.org/mailman/options/systemd-devel/lukeshu@lukeshu.com
			[ 'lists.parabola.nu'             , 'parabola'            ], # https://lists.parabola.nu/mailman/options/dev/lukeshu@lukeshu.com
			[ 'lists.reproducible-builds.org' , 'reproducible-builds' ], # https://lists.reproducible-builds.org/options/rb-general/lukeshu@lukeshu.com
			[ 'nongnu.org'                    , 'nongnu'              ], # https://lists.nongnu.org/mailman/options/gnu-linux-libre/lukeshu@lukeshu.com
			[ 'redhat.com'                    , 'redhat'              ], # https://www.redhat.com/mailman/options/pam-list/lukeshu@lukeshu.com	              
			[ 'lists.stanford.edu'            , 'stanford' ],
			[ 'mailman.stanford.edu'          , 'stanford' ],

	]:
		list = mailman_domain(mail, pair[0])
		if list:
			move_ham(mail, ".software."+pair[1]+"."+list)
			return
	# .software.* (EZMLM)
	for pair in [
			[ 'list.cr.yp.to',  'djb'        ],
			[ 'sourceware.org', 'sourceware' ],
	]:
		list = ezmlm_domain(mail, pair[0])
		if list:
			move_ham(mail, ".software."+pair[1]+"."+list)
			return
	if is_from(mail, "sourceware-bugzilla@sourceware.org"):
		move_ham(mail, ".software.sourceware-bugzilla")
		return
	# .software.* (Majordomo)
	for pair in [ [ 'vger.kernel.org', 'kernel' ] ]:
		list = majordomo_domain(mail, pair[0])
		if list:
			move_ham(mail, ".software."+pair[1]+"."+list)
			return
	# .software.parabola
	if (
		False
		or is_to_or_from(mail, "ceata.org")
		or is_to_or_from(mail, "kiwwwi.com.ar")
		or is_to_or_from(mail, "endefensadelsl.org")
		or is_to_or_from(mail, "parabola.nu")
		or is_to_or_from(mail, "parabolagnulinux.org")
		or is_to_or_from(mail, "xylon.me.uk")
		or False
		or is_to_or_from(mail, "g4jc@openmailbox.org")
		or is_to_or_from(mail, "jon@whiteheat.org.uk")
		or is_to_or_from(mail, "srw@openmailbox.org")
		or is_to_or_from(mail, "eliotime3000@openmailbox.org")
		):
		move_ham(mail, ".software.parabola")
		return
	# .software.TravisCI
	if is_from(mail, "builds@travis-ci.org"):
		move_ham(mail, ".software.TravisCI")
		return
	# .software
	for address in [
		"archlinux.org",
		"canonical.org",
		"cnuk.org",
		"core3.amsl.com",
		"defectivebydesign.org",
		"eff.org",
		"fedorahosted.org",
		"foocorp.net",
		"fsf.org",
		"github.com",
		"gitorious.org",
		"gnome.org",
		"gnu.org",
		"ietf.org",
		"kde.org",
		"mozilla.org",
		"nongnu.org",
		"sourceforge.com",
		"thyrsus.com",
		]:
		if is_to_or_from(mail, address):
			move_ham(mail, ".software")
			return
	# .servers
	if (
		False
		or is_from(mail, "gandi.net")
		or is_from(mail, "ramhost.us")
		or is_from(mail, "startcom.org")
		or is_from(mail, "startssl.com")
		or is_from(mail, "vultr.com")
		or (is_from(mail, "@2co.com") and mail["Subject"].contains("RAM Host"))
		or is_from(mail, "localhost")
		or is_from(mail, "local")
		or is_from(mail, "lan")
		or is_from(mail, "lukeshu.com")
		):
		move_ham(mail, ".servers")
		return
	# .Social.*
	if is_from_re(mail, ".*[@.]facebook(|mail)\.com"):
		move_ham(mail, ".Social.Facebook")
		return
	if is_from(mail, "identi.ca"):
		move_ham(mail, ".Social.Identica")
		return
	if is_from(mail, "twitter.com"):
		move_ham(mail, ".Social.Twitter")
		return
	if is_from(mail, "xkcd.com"):
		move_ham(mail, ".Social.xkcd")
		return
	if is_from(mail, "linkedin.com"):
		move_ham(mail, ".Social.LinkedIn")
		return
	# .jobs.*
	if is_from(mail, "guru.com"):
		move_ham(mail, ".jobs.Guru")
		return
	if is_from(mail, "glassdoor.com"):
		move_ham(mail, ".jobs.Glassdoor")
		return
	# .BSA
	if (
		False
		or mail["List-Id"].contains("troopmailinglist.troop276.net")
		or mail["Subject"].matches("troop")
		or mail["Subject"].matches("merit\s*badge")
		or is_to_or_from(mail, "t276_announcements@att.net")
		or is_to_or_from(mail, "Bryan@ChankTunUnGi.onmicrosoft.com")
		or is_to_or_from(mail, "basu@maharjan.org")
		or is_to_or_from(mail, "cacnedcomms@gmail.com")
		or is_to_or_from(mail, "crossroadsbsa.org")
		or is_to_or_from(mail, "dhoyt@yourhomecompany.com")
		or is_to_or_from(mail, "dllargent@comcast.net")
		or is_to_or_from(mail, "eldredmac@comcast.net")# MacDonell
		or is_to_or_from(mail, "jsting@sbcglobal.net")
		or is_to_or_from(mail, "mitchprather@sbcglobal.net")
		or is_to_or_from(mail, "muellerindy@yahoo.com")
		or is_to_or_from(mail, "mytroop.us")
		or is_to_or_from(mail, "oa_wap@yahoo.com")
		or is_to_or_from(mail, "salupo_vincent_p@lilly.com")
		or is_to_or_from(mail, "scouting.org")
		or is_to_or_from(mail, "solorzano.luis@rocketmail.com")
		or is_to_or_from(mail, "trdindy@comcast.net")
		or is_to_or_from(mail, "wjensen111@aol.com")
		):
		move_ham(mail, ".BSA")
		return
	# .FRC
	for address in [ "ni.com", "usfirst.org", "firstinspires.org" ]:
		if is_to_or_from(mail, address):
			move_ham(mail, ".FRC")
			return
	# .FRC.829
	if (
		False
		or mail["Subject"].matches("\b829\b")
		or is_to_or_from(mail, "wcxctrack829@aim.com") # Pat
		or is_to_or_from(mail, "william.walk@gmail.com")
		):
		move_ham(mail, ".FRC.829")
		return
	# .FRC.1024
	if (
		False
		or mail["Subject"].matches("\b1024\b")
		or mail["Subject"].matches("kil-?a-?bytes")
		or is_to_or_from(mail, "BBonahoom@stanleyworks.com")
		or is_to_or_from(mail, "bryanbonahoom@gmail.com")
		or is_to_or_from(mail, "allison.m.babcock@gmail.com")
		or is_to_or_from(mail, "cdewalt3@yahoo.com")
		or is_to_or_from(mail, "dave.nelson@ecolab.com")
		or is_to_or_from(mail, "dickaustin190@yahoo.com")
		or is_to_or_from(mail, "djnels1@comcast.net") # Dave and Julie Nelson
		or is_to_or_from(mail, "gamefreak207@gmail.com") # Brett Leedy
		or is_to_or_from(mail, "jason.zielke@gmail.com")
		or is_to_or_from(mail, "jeffreysmith@msdlt.k12.in.us")
		or is_to_or_from(mail, "sarahlittell@comcast.net")
		or is_to_or_from(mail, "silioso@gmail.com")
		or is_to_or_from(mail, "skiplittell@comcast.net")
		or is_to_or_from(mail, "tswilson4801@att.net")
		):
			move_ham(mail, ".FRC.1024")
			return
	# .FRC.4272
	if (
		False
		or mail["Subject"].matches("\b4272\b")
		or mail["Subject"].contains("[ME297]")
		or is_to_or_from(mail, "firstteam4272@gmail.com")
		or is_to_or_from(mail, "@tscstudents.net")
		or is_to_or_from(mail, "@tsc.k12.in.us")
		or is_to_or_from(mail, "ericjoelsells@gmail.com")
		or is_to_or_from(mail, "abenyeho@purdue.edu")
		or is_to_or_from(mail, "alexhenry@purdue.edu")
		or is_to_or_from(mail, "Henry65@purdue.edu")
		or is_to_or_from(mail, "chang282@purdue.edu")
		):
		move_ham(mail, ".FRC.4272")
		return
	# .Purdue.*
	if (
		False
		or mail["Subject"].contains("[PASE]")
		or is_to_or_from(mail, "Purduealumni@purdue.edu")
		or is_to_or_from(mail, "pase@purdue.edu")
		):
		move_ham(mail, ".Purdue.PASE")
		return
	if mail["Subject"].contains("[PLUG]"):
		move_ham(mail, ".Purdue.PLUG")
		return
	if is_to_or_from(mail, "@cerias.purdue.edu"):
		move_ham(mail, ".Purdue.CERIAS")
		return
	if (
		False
		or is_to_or_from(mail, "purduehackers@gmail.com")
		or is_to_or_from(mail, "royfu@purdue.edu")
		or is_to_or_from(mail, "usmannkhan@purdue.edu")
		):
		move_ham(mail, ".Purdue.Hackers")
		return
	if (
		False
		or mail["Subject"].contains("[CS Opportunity Update]")
		or mail["Subject"].contains("[CS Majors]")
		):
		move_ham(mail, ".Purdue.CS")
		return
	if (
		False
		or is_to_or_from(mail, "askcco@purdue.edu")
		or is_to_or_from_re(mail, "pmx-auto-approve\+.*@purdue\.edu")
		or is_to_or_from(mail, "evertrue@purdue.edu")
		or is_to_or_from(mail, "college.response@purdue.edu")
		or is_to_or_from(mail, "purduepresident@purdue.edu")
		or is_to_or_from(mail, "@prf.org")
		):
		move_ham(mail, ".Purdue.misc")
		return
	# .misc.*
	if is_from(mail, "schwab.com"):
		move_ham(mail, ".misc.schwab")
		return
	if is_from(mail, "ebay.com"):
		move_ham(mail, ".misc.ebay")
		return
	if (
		False
		or is_to_or_from(mail, "margieshu@sbcglobal.net")
		or is_to_or_from(mail, "3174451635@mms.att.net")
		):
		move_ham(mail, ".misc.Mom")
		return
	if is_to_or_from(mail, "freelancer.com"):
		move_ham(mail, ".misc.Freelancer")
		return
	for address in [
		"lpi.org",
		"pearson.com",
		"ciscotraining-notify@cisco.com",
		]:
		if is_from(mail, address):
			move_ham(mail, ".misc.CompTIA")
			return
	if (
		False
		or mail["From"].contains("newsletter")
		or mail["From"].contains("announcements")
		or mail["Subject"].contains("newsletter")
		or mail["Message-Id"].contains("@sailthru.com")
		or False
		or (mail["From"].contains("@sparkfun.com") and mail["Message-Id"].contains("rsgsv.net"))
		or (mail["From"].contains("no-reply@kickstarter.com") and mail["Message-Id"].contains(".sendgrid.net"))
		or (mail["From"].contains("no-reply@kickstarter.com") and (
			False
			or mail["Subject"].contains("Projects We Love:")
			or mail["Subject"].contains("Project Update")))
		or mail["From"].contains("Info@mailing.jamendo.com")
		or mail["From"].contains("Promo@email.newegg.com")
		or mail["From"].contains("auto@comicsbyemail.com")
		or mail["From"].contains("info@demandprogress.org")
		or mail["From"].contains("info@email2.mysimplemobile.com")
		or mail["From"].contains("info@massdrop.com")
		or mail["From"].contains("info@lulzbot.com")
		or mail["From"].contains("oreilly.com")
		or mail["From"].contains("reply-to@e.digikey.com")
		or mail["From"].contains("communication@communications.bmv.in.gov")
		or mail["From"].contains("sales@solutionsinplastic.com")
		or mail["From"].contains("social@goodwillindy.org")
		or mail["From"].contains("support@support.digitalocean.com")
		or mail["From"].contains("@pardonsnowden.org")
		):
		move_ham(mail, ".misc.Newsletters")
		return
	if (
		False
		or mail["Subject"].contains("password")
		or mail["Subject"].contains("account")
		or mail["From"].contains("accounts")
		):
		move_ham(mail, ".misc.accounts")
		return

	move_ham(mail, "")
#
# call the above

def handle_incoming_ham_training(mail):
	bogofilter_ham(mail)
	handle_incoming_ham(mail)
def handle_incoming_spam_training(mail):
	bogofilter_spam(mail)
	mail.move("MAIN/Spam")

def handle_incoming_ham(mail):
	my_filters(mail)
def handle_incoming_spam(mail):
	mail.move("MAIN/Spam")

def handle_incoming_unknown(mail):
	# Whitelist
	if my_whitelist(mail):
		handle_incoming_ham_training(mail)
		return

	spam = bogofilter_auto(mail)
	if spam == 0:
		handle_incoming_spam(mail)
		return
	elif spam == 1:
		handle_incoming_ham(mail)
		return
	elif spam == 2:
		mail.move("MAIN/MysteryMeat")
		return
	else:
		mail.move("MAIN/BogoFail")
		return

# hook the above functions into the maildirproc processor
processor.maildir_base = "~/Maildir"
processor.auto_reload_rcfile = True
handle_mapping = {
	"REMOTES/ATT/Inbox":     handle_incoming_unknown,
	"REMOTES/ATT/Bulk Mail": handle_incoming_unknown, # fucking Yahoo!
	"REMOTES/lukeshu/INBOX": handle_incoming_unknown,
	"QUEUES/Spam":           handle_incoming_spam_training,
	"QUEUES/Ham":            handle_incoming_ham_training,
	}
processor.maildirs = handle_mapping.keys()
for mail in processor:
	handle_mapping[mail.maildir](mail)