feat: add rule-based spam detection engine
Heuristic spam detector with 50+ patterns including SPF/DKIM/DMARC auth checks, display name impersonation, URL analysis, attachment risk scoring, and advanced phishing detection (fake quarantine reports, voicemail scams, cold email solicitation). Configurable via SpamDetectorConfig.json with customizable weights and blocklist. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
72
EmailSearch/SpamDetection/AttachmentAnalyzer.cs
Normal file
72
EmailSearch/SpamDetection/AttachmentAnalyzer.cs
Normal file
@@ -0,0 +1,72 @@
|
||||
using NetOffice.OutlookApi;
|
||||
|
||||
namespace EmailSearch.SpamDetection;
|
||||
|
||||
internal static class AttachmentAnalyzer
|
||||
{
|
||||
// Risk scores by extension type (0.0 = safe, 1.0 = very dangerous)
|
||||
private static readonly Dictionary<string, double> AttachmentRiskScores = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
// Critical risk - direct executables
|
||||
{ ".exe", 1.0 },
|
||||
{ ".scr", 1.0 },
|
||||
{ ".bat", 0.95 },
|
||||
{ ".cmd", 0.95 },
|
||||
{ ".com", 0.95 },
|
||||
{ ".pif", 0.95 },
|
||||
{ ".msi", 0.9 },
|
||||
{ ".vbs", 0.9 },
|
||||
{ ".js", 0.9 },
|
||||
{ ".ps1", 0.9 },
|
||||
{ ".wsf", 0.9 },
|
||||
|
||||
// High risk - macro-enabled documents
|
||||
{ ".docm", 0.8 },
|
||||
{ ".xlsm", 0.8 },
|
||||
{ ".pptm", 0.8 },
|
||||
{ ".xlam", 0.8 },
|
||||
|
||||
// Medium-high risk - can contain executables
|
||||
{ ".iso", 0.7 },
|
||||
{ ".img", 0.7 },
|
||||
{ ".lnk", 0.75 },
|
||||
{ ".hta", 0.7 },
|
||||
|
||||
// Medium risk - HTML can be phishing
|
||||
{ ".html", 0.6 },
|
||||
{ ".htm", 0.6 },
|
||||
{ ".svg", 0.5 },
|
||||
|
||||
// Low-medium risk - archives
|
||||
{ ".zip", 0.3 },
|
||||
{ ".rar", 0.35 },
|
||||
{ ".7z", 0.35 },
|
||||
{ ".tar", 0.3 },
|
||||
{ ".gz", 0.3 }
|
||||
};
|
||||
|
||||
public static double GetAttachmentRiskScore(MailItem mail)
|
||||
{
|
||||
if (mail.Attachments == null || mail.Attachments.Count == 0)
|
||||
return 0.0;
|
||||
|
||||
double maxRisk = 0.0;
|
||||
|
||||
foreach (var attachment in mail.Attachments)
|
||||
{
|
||||
if (attachment is Attachment att)
|
||||
{
|
||||
var name = att.FileName?.ToLowerInvariant() ?? "";
|
||||
foreach (var kvp in AttachmentRiskScores)
|
||||
{
|
||||
if (name.EndsWith(kvp.Key))
|
||||
{
|
||||
maxRisk = Math.Max(maxRisk, kvp.Value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return maxRisk;
|
||||
}
|
||||
}
|
||||
30
EmailSearch/SpamDetection/FeatureExtractors.cs
Normal file
30
EmailSearch/SpamDetection/FeatureExtractors.cs
Normal file
@@ -0,0 +1,30 @@
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace EmailSearch.SpamDetection;
|
||||
|
||||
internal static class FeatureExtractors
|
||||
{
|
||||
public static List<string> ExtractUrls(string text) =>
|
||||
Regex.Matches(text ?? "", @"https?://[^\s'""<>()]+", RegexOptions.IgnoreCase)
|
||||
.Select(m => m.Value)
|
||||
.ToList();
|
||||
|
||||
public static string ExtractFirstEmail(string headerLine)
|
||||
{
|
||||
var m = Regex.Match(
|
||||
headerLine ?? "",
|
||||
@"[A-Z0-9._%+\-]+@[A-Z0-9.\-]+\.[A-Z]{2,}",
|
||||
RegexOptions.IgnoreCase);
|
||||
return m.Success ? m.Value : "";
|
||||
}
|
||||
|
||||
public static string MatchHeader(string headers, string pattern)
|
||||
{
|
||||
if (string.IsNullOrEmpty(headers)) return string.Empty;
|
||||
var match = Regex.Match(
|
||||
headers,
|
||||
pattern,
|
||||
RegexOptions.IgnoreCase | RegexOptions.Multiline);
|
||||
return match.Success ? match.Groups["val"].Value : string.Empty;
|
||||
}
|
||||
}
|
||||
25
EmailSearch/SpamDetection/SpamAnalysisResult.cs
Normal file
25
EmailSearch/SpamDetection/SpamAnalysisResult.cs
Normal file
@@ -0,0 +1,25 @@
|
||||
namespace EmailSearch.SpamDetection;
|
||||
|
||||
/// <summary>
|
||||
/// Result of spam analysis containing scores and detected features.
|
||||
/// </summary>
|
||||
public sealed class SpamAnalysisResult
|
||||
{
|
||||
public double RuleBasedScore { get; set; }
|
||||
public double FinalScore { get; set; }
|
||||
public bool PredictedSpam { get; set; }
|
||||
public SpamFeatures? Features { get; set; }
|
||||
public List<string> RedFlags { get; set; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Gets a human-readable spam likelihood category.
|
||||
/// </summary>
|
||||
public string SpamLikelihood => FinalScore switch
|
||||
{
|
||||
>= 0.9 => "Very High",
|
||||
>= 0.7 => "High",
|
||||
>= 0.5 => "Medium",
|
||||
>= 0.3 => "Low",
|
||||
_ => "Very Low"
|
||||
};
|
||||
}
|
||||
686
EmailSearch/SpamDetection/SpamDetector.cs
Normal file
686
EmailSearch/SpamDetection/SpamDetector.cs
Normal file
@@ -0,0 +1,686 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
using NetOffice.OutlookApi;
|
||||
using NetOffice.OutlookApi.Enums;
|
||||
|
||||
namespace EmailSearch.SpamDetection;
|
||||
|
||||
public sealed class SpamDetector
|
||||
{
|
||||
private readonly SpamDetectorConfig _config;
|
||||
private readonly HashSet<string> _blocklistEmails;
|
||||
private readonly HashSet<string> _blocklistDomains;
|
||||
|
||||
public SpamDetector() : this(null) { }
|
||||
|
||||
public SpamDetector(SpamDetectorConfig? config)
|
||||
{
|
||||
_config = config ?? LoadConfiguration() ?? SpamDetectorConfig.GetDefault();
|
||||
(_blocklistEmails, _blocklistDomains) = LoadBlocklist();
|
||||
}
|
||||
|
||||
private static SpamDetectorConfig? LoadConfiguration()
|
||||
{
|
||||
try
|
||||
{
|
||||
var configPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "SpamDetectorConfig.json");
|
||||
if (File.Exists(configPath))
|
||||
{
|
||||
var json = File.ReadAllText(configPath);
|
||||
return JsonSerializer.Deserialize<SpamDetectorConfig>(json, new JsonSerializerOptions
|
||||
{
|
||||
PropertyNameCaseInsensitive = true
|
||||
});
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
return null;
|
||||
}
|
||||
|
||||
private static (HashSet<string> emails, HashSet<string> domains) LoadBlocklist()
|
||||
{
|
||||
var emails = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
var domains = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
try
|
||||
{
|
||||
var blocklistPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "BlockList.txt");
|
||||
if (!File.Exists(blocklistPath))
|
||||
return (emails, domains);
|
||||
|
||||
var lines = File.ReadAllLines(blocklistPath, System.Text.Encoding.Unicode);
|
||||
|
||||
foreach (var line in lines)
|
||||
{
|
||||
var entry = line.Trim();
|
||||
if (string.IsNullOrWhiteSpace(entry))
|
||||
continue;
|
||||
|
||||
if (entry.StartsWith("@"))
|
||||
domains.Add(entry.Substring(1).ToLowerInvariant());
|
||||
else if (entry.Contains("@"))
|
||||
emails.Add(entry.ToLowerInvariant());
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
|
||||
return (emails, domains);
|
||||
}
|
||||
|
||||
public SpamAnalysisResult Analyze(MailItem mailItem)
|
||||
{
|
||||
var features = BuildFeatures(mailItem);
|
||||
var score = CalculateScore(features);
|
||||
var redFlags = GetRedFlags(features);
|
||||
|
||||
return new SpamAnalysisResult
|
||||
{
|
||||
RuleBasedScore = score,
|
||||
FinalScore = score,
|
||||
PredictedSpam = score >= 0.7,
|
||||
Features = features,
|
||||
RedFlags = redFlags
|
||||
};
|
||||
}
|
||||
|
||||
private SpamFeatures BuildFeatures(MailItem m)
|
||||
{
|
||||
var f = new SpamFeatures();
|
||||
|
||||
// Sender/display
|
||||
f.DisplayName = m.SenderName ?? "";
|
||||
f.FromAddress = GetSmtpAddress(m);
|
||||
f.FromDomain = DomainOf(f.FromAddress);
|
||||
|
||||
// Body/headers
|
||||
var headers = GetInternetHeaders(m);
|
||||
var bodyPreview = (m.Body ?? "").Trim();
|
||||
var html = m.HTMLBody ?? "";
|
||||
|
||||
// Auth results
|
||||
f.SpfFail = Contains(headers, "spf=fail");
|
||||
f.DkimFail = Contains(headers, "dkim=fail");
|
||||
f.DmarcFail = Contains(headers, "dmarc=fail");
|
||||
|
||||
// Reply-To mismatch
|
||||
var replyTo = FeatureExtractors.MatchHeader(headers, @"(?im)^\s*Reply-To:\s*(?<val>.+)$");
|
||||
var replyToAddr = FeatureExtractors.ExtractFirstEmail(replyTo);
|
||||
f.ReplyToDomainMismatch = !string.IsNullOrEmpty(replyToAddr) &&
|
||||
!string.Equals(DomainOf(replyToAddr), f.FromDomain, StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
// Display name impersonation
|
||||
f.DisplayImpersonation = LooksLikeVendorName(f.DisplayName) && !IsKnownVendorDomain(f.FromDomain);
|
||||
|
||||
// Subject domain impersonation
|
||||
f.SubjectDomainImpersonation = CheckSubjectDomainImpersonation(m.Subject ?? "", f.FromDomain);
|
||||
|
||||
// URLs
|
||||
var urls = FeatureExtractors.ExtractUrls(html.Length > 0 ? html : bodyPreview);
|
||||
f.UrlCount = urls.Count;
|
||||
f.HasUrl = f.UrlCount > 0;
|
||||
f.HasIpLink = urls.Any(u => UrlAnalyzer.IsIpUrl(u));
|
||||
f.UsesShortener = urls.Any(u => UrlAnalyzer.IsShortener(u));
|
||||
|
||||
// Suspicious TLDs
|
||||
f.SuspiciousTld = _config.BadTlds.Contains(TldOf(f.FromDomain), StringComparer.OrdinalIgnoreCase) ||
|
||||
urls.Any(url => HasSuspiciousTld(url));
|
||||
|
||||
// Free-mail domains
|
||||
f.FreeMailboxDomain = _config.FreeDomains.Contains(f.FromDomain, StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
// Unknown domain
|
||||
f.UnknownDomain = !string.IsNullOrEmpty(f.FromDomain) &&
|
||||
!_config.Domains.Vendors.ContainsKey(f.FromDomain) &&
|
||||
!_config.Domains.Trusted.Any(d => f.FromDomain.Equals(d, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
// Blocklist
|
||||
f.IsBlocklisted = IsBlocklisted(f.FromAddress, f.FromDomain);
|
||||
|
||||
// Tracking pixel
|
||||
f.HasTrackingPixel = Regex.IsMatch(html,
|
||||
@"<img[^>]+(width=['""]?1['""]?[^>]*height=['""]?1['""]?|height=['""]?1['""]?[^>]*width=['""]?1['""]?)",
|
||||
RegexOptions.IgnoreCase);
|
||||
|
||||
// Attachments
|
||||
f.HasAttachment = m.Attachments?.Count > 0;
|
||||
f.AttachmentRiskScore = AttachmentAnalyzer.GetAttachmentRiskScore(m);
|
||||
f.HasRiskyAttachment = f.AttachmentRiskScore >= 0.5;
|
||||
|
||||
// Keyword bait
|
||||
var lower = (m.Subject + " " + bodyPreview).ToLowerInvariant();
|
||||
f.KeywordBait = _config.BaitKeywords.Any(k => lower.Contains(k, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
// Placeholder text
|
||||
f.HasPlaceholderText = HasPlaceholderText(m.Subject + " " + bodyPreview);
|
||||
|
||||
// Generic sender
|
||||
f.GenericSenderName = IsGenericSender(f.DisplayName, f.FromAddress);
|
||||
|
||||
// Single link with minimal text
|
||||
var isMinimal = IsMinimalContent(bodyPreview, html);
|
||||
f.SingleLinkOnly = f.UrlCount == 1 && bodyPreview.Length < 2000 && isMinimal;
|
||||
|
||||
// Unicode lookalikes
|
||||
f.UnicodeLookalike = HasHomoglyphs(f.FromDomain);
|
||||
|
||||
// Reputation
|
||||
f.SenderReputation = _config.Domains.Vendors.TryGetValue(f.FromDomain, out var vendorInfo)
|
||||
? vendorInfo.Reputation
|
||||
: 0;
|
||||
|
||||
// List-Unsubscribe header
|
||||
f.HasListUnsub = Contains(headers, "List-Unsubscribe:");
|
||||
|
||||
// Advanced patterns
|
||||
f.CompanySubdomainSpoof = CheckCompanySubdomainSpoof(f.FromDomain);
|
||||
f.FakeQuarantineReport = CheckFakeQuarantineReport(m.Subject ?? "", bodyPreview, f.FromDomain);
|
||||
f.HasZeroWidthChars = HasZeroWidthCharacters(m.Subject + " " + f.DisplayName + " " + bodyPreview);
|
||||
f.HasRandomRefId = HasRandomReferenceId(m.Subject ?? "");
|
||||
f.HasTimestampInSubject = HasTimestampInSubject(m.Subject ?? "");
|
||||
f.ColdEmailSolicitation = CheckColdEmailSolicitation(m.Subject ?? "", bodyPreview);
|
||||
f.FakeVoicemailNotification = CheckFakeVoicemailNotification(m.Subject ?? "", bodyPreview, f.FromDomain);
|
||||
f.FakeSystemNotification = CheckFakeSystemNotification(m.Subject ?? "", bodyPreview, f.FromDomain);
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
private double CalculateScore(SpamFeatures f)
|
||||
{
|
||||
var w = _config.SpamScoreWeights;
|
||||
double s = 0;
|
||||
|
||||
// Auth & identity
|
||||
if (f.SpfFail) s += w.SpfFail;
|
||||
if (f.DkimFail) s += w.DkimFail;
|
||||
if (f.DmarcFail) s += w.DmarcFail;
|
||||
if (f.ReplyToDomainMismatch) s += w.ReplyToDomainMismatch;
|
||||
if (f.DisplayImpersonation) s += w.DisplayImpersonation;
|
||||
if (f.UnicodeLookalike) s += w.UnicodeLookalike;
|
||||
if (f.GenericSenderName) s += 0.18;
|
||||
if (f.SubjectDomainImpersonation) s += 0.35;
|
||||
if (f.IsBlocklisted) s += 0.95;
|
||||
|
||||
// Content/links
|
||||
if (f.HasUrl) s += w.HasUrl + Math.Min(0.10, f.UrlCount * w.UrlCountMultiplier);
|
||||
if (f.HasIpLink) s += w.HasIpLink;
|
||||
if (f.UsesShortener) s += w.UsesShortener;
|
||||
if (f.SuspiciousTld) s += w.SuspiciousTld;
|
||||
if (f.HasTrackingPixel) s += w.HasTrackingPixel;
|
||||
|
||||
// Attachments & bait
|
||||
if (f.HasAttachment) s += w.HasAttachment;
|
||||
s += f.AttachmentRiskScore * w.HasRiskyAttachment;
|
||||
if (f.KeywordBait) s += w.KeywordBait;
|
||||
if (f.HasPlaceholderText) s += 0.30;
|
||||
if (f.SingleLinkOnly) s += 0.25;
|
||||
|
||||
// Unknown domain
|
||||
if (f.UnknownDomain)
|
||||
{
|
||||
s += w.UnknownDomain;
|
||||
if (f.KeywordBait || f.UsesShortener)
|
||||
s += 0.15;
|
||||
}
|
||||
|
||||
// Freemail
|
||||
if (f.FreeMailboxDomain && f.HasUrl) s += w.FreeMailboxWithUrl;
|
||||
else if (f.FreeMailboxDomain) s += w.FreeMailboxOnly;
|
||||
|
||||
// Reputation
|
||||
s += Math.Clamp(-w.ReputationMultiplier * f.SenderReputation, -0.25, 0.25);
|
||||
|
||||
// Legitimacy signals
|
||||
if (f.HasListUnsub) s += w.HasListUnsubscribe;
|
||||
|
||||
// Advanced patterns
|
||||
if (f.CompanySubdomainSpoof) s += w.CompanySubdomainSpoof;
|
||||
if (f.FakeQuarantineReport) s += w.FakeQuarantineReport;
|
||||
if (f.HasZeroWidthChars) s += w.HasZeroWidthChars;
|
||||
if (f.HasRandomRefId) s += w.HasRandomRefId;
|
||||
if (f.HasTimestampInSubject) s += w.HasTimestampInSubject;
|
||||
if (f.ColdEmailSolicitation) s += w.ColdEmailSolicitation;
|
||||
if (f.FakeVoicemailNotification) s += w.FakeVoicemailNotification;
|
||||
if (f.FakeSystemNotification) s += w.FakeSystemNotification;
|
||||
|
||||
return Math.Max(0, Math.Min(1, s));
|
||||
}
|
||||
|
||||
private List<string> GetRedFlags(SpamFeatures f)
|
||||
{
|
||||
var flags = new List<string>();
|
||||
|
||||
if (f.IsBlocklisted) flags.Add("Sender is blocklisted");
|
||||
if (f.SpfFail) flags.Add("SPF authentication failed");
|
||||
if (f.DkimFail) flags.Add("DKIM authentication failed");
|
||||
if (f.DmarcFail) flags.Add("DMARC authentication failed");
|
||||
if (f.ReplyToDomainMismatch) flags.Add("Reply-To domain doesn't match sender");
|
||||
if (f.DisplayImpersonation) flags.Add("Display name may impersonate known vendor");
|
||||
if (f.SubjectDomainImpersonation) flags.Add("Subject mentions known domain but sender differs");
|
||||
if (f.UnicodeLookalike) flags.Add("Domain contains suspicious Unicode characters");
|
||||
if (f.GenericSenderName) flags.Add("Generic/automated sender name");
|
||||
if (f.HasIpLink) flags.Add("Contains IP address-based URL");
|
||||
if (f.UsesShortener) flags.Add("Uses URL shortener service");
|
||||
if (f.SuspiciousTld) flags.Add("Suspicious top-level domain");
|
||||
if (f.HasRiskyAttachment) flags.Add($"Risky attachment type (risk: {f.AttachmentRiskScore:P0})");
|
||||
if (f.KeywordBait) flags.Add("Contains spam/phishing keywords");
|
||||
if (f.HasPlaceholderText) flags.Add("Contains placeholder/merge field text");
|
||||
if (f.SingleLinkOnly) flags.Add("Minimal content with single link");
|
||||
if (f.CompanySubdomainSpoof) flags.Add("Subdomain spoofing detected");
|
||||
if (f.FakeQuarantineReport) flags.Add("Fake quarantine/spam report");
|
||||
if (f.HasZeroWidthChars) flags.Add("Contains zero-width characters (filter evasion)");
|
||||
if (f.HasRandomRefId) flags.Add("Random reference ID in subject");
|
||||
if (f.HasTimestampInSubject) flags.Add("Automated timestamp in subject");
|
||||
if (f.ColdEmailSolicitation) flags.Add("Cold email solicitation");
|
||||
if (f.FakeVoicemailNotification) flags.Add("Fake voicemail notification");
|
||||
if (f.FakeSystemNotification) flags.Add("Fake system notification");
|
||||
if (f.FreeMailboxDomain && f.HasUrl) flags.Add("Free email with links (potential phishing)");
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
// ---- Helper Methods ----
|
||||
|
||||
private static string GetSmtpAddress(MailItem m)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (m.Sender != null)
|
||||
{
|
||||
var addressEntry = m.Sender;
|
||||
if (addressEntry.AddressEntryUserType == OlAddressEntryUserType.olSmtpAddressEntry)
|
||||
{
|
||||
return Safe(addressEntry.Address);
|
||||
}
|
||||
|
||||
if (addressEntry.AddressEntryUserType == OlAddressEntryUserType.olExchangeUserAddressEntry ||
|
||||
addressEntry.AddressEntryUserType == OlAddressEntryUserType.olExchangeRemoteUserAddressEntry)
|
||||
{
|
||||
try
|
||||
{
|
||||
var pa = addressEntry.PropertyAccessor;
|
||||
var smtpAddress = pa.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x39FE001E");
|
||||
if (smtpAddress is string s && !string.IsNullOrEmpty(s))
|
||||
{
|
||||
return s.Trim();
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
|
||||
try
|
||||
{
|
||||
var exchangeUser = addressEntry.GetExchangeUser();
|
||||
if (exchangeUser != null && !string.IsNullOrEmpty(exchangeUser.PrimarySmtpAddress))
|
||||
{
|
||||
return exchangeUser.PrimarySmtpAddress.Trim();
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
}
|
||||
}
|
||||
|
||||
var senderEmail = m.SenderEmailAddress ?? "";
|
||||
|
||||
if (senderEmail.StartsWith("/O=", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var headers = GetInternetHeaders(m);
|
||||
var fromHeader = FeatureExtractors.MatchHeader(headers, @"(?im)^\s*From:\s*(?<val>.+)$");
|
||||
var extractedEmail = FeatureExtractors.ExtractFirstEmail(fromHeader);
|
||||
if (!string.IsNullOrEmpty(extractedEmail))
|
||||
{
|
||||
return extractedEmail;
|
||||
}
|
||||
}
|
||||
|
||||
return Safe(senderEmail);
|
||||
}
|
||||
catch
|
||||
{
|
||||
return Safe(m.SenderEmailAddress);
|
||||
}
|
||||
}
|
||||
|
||||
private static string GetInternetHeaders(MailItem m)
|
||||
{
|
||||
try
|
||||
{
|
||||
var pa = m.PropertyAccessor;
|
||||
var raw = pa.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x007D001E");
|
||||
return raw is string s ? s : "";
|
||||
}
|
||||
catch { return ""; }
|
||||
}
|
||||
|
||||
private static string Safe(string? s) => s?.Trim() ?? "";
|
||||
private static bool Contains(string hay, string needle) =>
|
||||
hay?.IndexOf(needle, StringComparison.OrdinalIgnoreCase) >= 0;
|
||||
|
||||
private static string DomainOf(string email)
|
||||
{
|
||||
var at = email.IndexOf('@');
|
||||
if (at < 0) return "";
|
||||
return email[(at + 1)..].Trim().ToLowerInvariant();
|
||||
}
|
||||
|
||||
private static string TldOf(string domain)
|
||||
{
|
||||
var dot = domain.LastIndexOf('.');
|
||||
return dot >= 0 ? domain[(dot + 1)..] : domain;
|
||||
}
|
||||
|
||||
private bool LooksLikeVendorName(string name)
|
||||
{
|
||||
if (string.IsNullOrEmpty(name))
|
||||
return false;
|
||||
|
||||
var patterns = _config.Domains.Vendors.Values
|
||||
.SelectMany(v => v.DisplayNamePatterns)
|
||||
.Where(p => !string.IsNullOrEmpty(p))
|
||||
.ToList();
|
||||
|
||||
if (patterns.Count == 0)
|
||||
return false;
|
||||
|
||||
var pattern = "(" + string.Join("|", patterns) + ")";
|
||||
return Regex.IsMatch(name, pattern, RegexOptions.IgnoreCase);
|
||||
}
|
||||
|
||||
private bool IsKnownVendorDomain(string domain)
|
||||
{
|
||||
if (string.IsNullOrEmpty(domain))
|
||||
return false;
|
||||
|
||||
return _config.Domains.Vendors.ContainsKey(domain);
|
||||
}
|
||||
|
||||
private static bool HasHomoglyphs(string domain) =>
|
||||
domain.Any(ch => ch > 127);
|
||||
|
||||
private static bool IsMinimalContent(string bodyText, string html)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(html))
|
||||
return bodyText.Length < 200;
|
||||
|
||||
// Strip HTML tags for word count
|
||||
var plainText = Regex.Replace(html, "<[^>]+>", " ");
|
||||
plainText = System.Net.WebUtility.HtmlDecode(plainText);
|
||||
plainText = Regex.Replace(plainText, @"\s+", " ").Trim();
|
||||
|
||||
var words = plainText.Split(new[] { ' ', '\t', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries)
|
||||
.Where(w => w.Length > 1)
|
||||
.ToArray();
|
||||
|
||||
return words.Length < 50;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return bodyText.Length < 200;
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsGenericSender(string displayName, string fromAddress)
|
||||
{
|
||||
var genericPatterns = new[]
|
||||
{
|
||||
"noreply", "no-reply", "no.reply", "donotreply", "do-not-reply",
|
||||
"notification", "notify", "alert", "system", "admin", "administrator",
|
||||
"support", "helpdesk", "help.desk", "technical support", "tech support",
|
||||
"voice message", "voicemail", "fax", "scanner", "document center",
|
||||
"storage center", "help center", "incident", "ticket"
|
||||
};
|
||||
|
||||
var lowerDisplay = displayName.ToLowerInvariant();
|
||||
var lowerAddress = fromAddress.ToLowerInvariant();
|
||||
|
||||
return genericPatterns.Any(p =>
|
||||
lowerDisplay.Contains(p, StringComparison.OrdinalIgnoreCase) ||
|
||||
lowerAddress.Contains(p, StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
private bool HasSuspiciousTld(string url)
|
||||
{
|
||||
try
|
||||
{
|
||||
var host = new Uri(url).Host.ToLowerInvariant();
|
||||
var tld = TldOf(host);
|
||||
return _config.BadTlds.Contains(tld, StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
catch { return false; }
|
||||
}
|
||||
|
||||
private static bool HasPlaceholderText(string text)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
return false;
|
||||
|
||||
var placeholderKeywords = new[]
|
||||
{
|
||||
"email", "name", "user", "recipient", "customer", "client",
|
||||
"address", "company", "account", "localpart", "domain"
|
||||
};
|
||||
|
||||
var keywordPattern = string.Join("|", placeholderKeywords);
|
||||
|
||||
var bracketPatterns = new[]
|
||||
{
|
||||
$@"\[.*?(?:{keywordPattern}).*?\]",
|
||||
$@"\{{.*?(?:{keywordPattern}).*?\}}",
|
||||
$@"\{{\{{.*?(?:{keywordPattern}).*?\}}\}}",
|
||||
$@"<.*?(?:{keywordPattern}).*?>",
|
||||
$@"\$\{{.*?(?:{keywordPattern}).*?\}}",
|
||||
$@"%.*?(?:{keywordPattern}).*?%"
|
||||
};
|
||||
|
||||
return bracketPatterns.Any(p => Regex.IsMatch(text, p, RegexOptions.IgnoreCase));
|
||||
}
|
||||
|
||||
private bool IsBlocklisted(string fromAddress, string fromDomain)
|
||||
{
|
||||
if (_blocklistEmails.Contains(fromAddress.ToLowerInvariant()))
|
||||
return true;
|
||||
|
||||
if (_blocklistDomains.Contains(fromDomain.ToLowerInvariant()))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private bool CheckSubjectDomainImpersonation(string subject, string fromDomain)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(subject))
|
||||
return false;
|
||||
|
||||
var subjectLower = subject.ToLowerInvariant();
|
||||
|
||||
foreach (var vendorDomain in _config.Domains.Vendors.Keys)
|
||||
{
|
||||
if (fromDomain.Equals(vendorDomain, StringComparison.OrdinalIgnoreCase))
|
||||
continue;
|
||||
|
||||
var pattern = $@"\b{Regex.Escape(vendorDomain)}\b";
|
||||
if (Regex.IsMatch(subjectLower, pattern, RegexOptions.IgnoreCase))
|
||||
return true;
|
||||
}
|
||||
|
||||
foreach (var trustedDomain in _config.Domains.Trusted)
|
||||
{
|
||||
if (fromDomain.Equals(trustedDomain, StringComparison.OrdinalIgnoreCase))
|
||||
continue;
|
||||
|
||||
var pattern = $@"\b{Regex.Escape(trustedDomain)}\b";
|
||||
if (Regex.IsMatch(subjectLower, pattern, RegexOptions.IgnoreCase))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private bool CheckCompanySubdomainSpoof(string fromDomain)
|
||||
{
|
||||
if (string.IsNullOrEmpty(fromDomain))
|
||||
return false;
|
||||
|
||||
var parts = fromDomain.Split('.');
|
||||
if (parts.Length < 3)
|
||||
return false;
|
||||
|
||||
var subdomain = parts[0].ToLowerInvariant();
|
||||
|
||||
foreach (var vendor in _config.Domains.Vendors)
|
||||
{
|
||||
var vendorDomainParts = vendor.Key.Split('.');
|
||||
var vendorName = vendorDomainParts[0].ToLowerInvariant();
|
||||
|
||||
if (subdomain.Contains(vendorName, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (!fromDomain.Equals(vendor.Key, StringComparison.OrdinalIgnoreCase))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
var fakeServiceDomains = _config.FakeServiceDomains.Count > 0
|
||||
? _config.FakeServiceDomains
|
||||
: GetDefaultFakeServiceDomains();
|
||||
|
||||
var baseDomain = string.Join(".", parts.Skip(1));
|
||||
return fakeServiceDomains.Any(fsd => baseDomain.EndsWith(fsd, StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
private static List<string> GetDefaultFakeServiceDomains() => new()
|
||||
{
|
||||
"voiceservicing.net", "audios.net", "voicemail.net", "audioservices.net",
|
||||
"mailservicing.net", "emailservicing.net", "securemail.net", "mailprotect.net"
|
||||
};
|
||||
|
||||
private bool CheckFakeQuarantineReport(string subject, string body, string fromDomain)
|
||||
{
|
||||
var text = (subject + " " + body).ToLowerInvariant();
|
||||
|
||||
var quarantineKeywords = _config.QuarantineKeywords.Count > 0
|
||||
? _config.QuarantineKeywords
|
||||
: new List<string> { "quarantine summary", "spam report", "quarantine folder", "email quarantine" };
|
||||
|
||||
var hasQuarantineKeyword = quarantineKeywords.Any(k => text.Contains(k, StringComparison.OrdinalIgnoreCase));
|
||||
if (!hasQuarantineKeyword)
|
||||
return false;
|
||||
|
||||
var legitimateQuarantineDomains = new[]
|
||||
{
|
||||
"microsoft.com", "office365.com", "mimecast.com", "proofpoint.com",
|
||||
"barracuda.com", "sophos.com", "fortinet.com", "cisco.com"
|
||||
};
|
||||
|
||||
return !legitimateQuarantineDomains.Any(d => fromDomain.EndsWith(d, StringComparison.OrdinalIgnoreCase)) &&
|
||||
!_config.Domains.Vendors.Keys.Any(v => fromDomain.Equals(v, StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
private static bool HasZeroWidthCharacters(string text)
|
||||
{
|
||||
if (string.IsNullOrEmpty(text))
|
||||
return false;
|
||||
|
||||
var zeroWidthChars = new[]
|
||||
{
|
||||
'\u200B', '\u200C', '\u200D', '\u200E', '\u200F',
|
||||
'\u2060', '\uFEFF', '\u00AD', '\u034F', '\u061C',
|
||||
'\u115F', '\u1160', '\u17B4', '\u17B5', '\u180E'
|
||||
};
|
||||
|
||||
return text.Any(c => zeroWidthChars.Contains(c));
|
||||
}
|
||||
|
||||
private static bool HasRandomReferenceId(string subject)
|
||||
{
|
||||
if (string.IsNullOrEmpty(subject))
|
||||
return false;
|
||||
|
||||
var patterns = new[]
|
||||
{
|
||||
@"Ref[:#]?\s*[A-Za-z0-9]{20,}",
|
||||
@"#\d{8}[-_]?[A-Za-z0-9]{8,}",
|
||||
@"ID[:#]?\s*[A-Za-z0-9]{15,}",
|
||||
@"[A-Za-z0-9]{25,}",
|
||||
@"_[A-Za-z0-9]{20,}"
|
||||
};
|
||||
|
||||
return patterns.Any(p => Regex.IsMatch(subject, p, RegexOptions.IgnoreCase));
|
||||
}
|
||||
|
||||
private static bool HasTimestampInSubject(string subject)
|
||||
{
|
||||
if (string.IsNullOrEmpty(subject))
|
||||
return false;
|
||||
|
||||
var patterns = new[]
|
||||
{
|
||||
@"<\d{2}:\d{2}:\d{2}\.\d{3}\s+\d{2}/\d{2}/\d{4}>",
|
||||
@"\[\d{2}:\d{2}:\d{2}\]",
|
||||
@"\(\d{2}:\d{2}:\d{2}\)",
|
||||
@"\d{2}:\d{2}:\d{2}\.\d{3}"
|
||||
};
|
||||
|
||||
return patterns.Any(p => Regex.IsMatch(subject, p));
|
||||
}
|
||||
|
||||
private bool CheckColdEmailSolicitation(string subject, string body)
|
||||
{
|
||||
var text = (subject + " " + body).ToLowerInvariant();
|
||||
|
||||
var coldEmailKeywords = _config.ColdEmailKeywords.Count > 0
|
||||
? _config.ColdEmailKeywords
|
||||
: new List<string> { "seo services", "website design", "reaching out", "hope this finds you" };
|
||||
|
||||
var matchCount = coldEmailKeywords.Count(k => text.Contains(k, StringComparison.OrdinalIgnoreCase));
|
||||
return matchCount >= 2;
|
||||
}
|
||||
|
||||
private bool CheckFakeVoicemailNotification(string subject, string body, string fromDomain)
|
||||
{
|
||||
var text = (subject + " " + body).ToLowerInvariant();
|
||||
|
||||
var voicemailKeywords = _config.VoicemailKeywords.Count > 0
|
||||
? _config.VoicemailKeywords
|
||||
: new List<string> { "voicemail", "voice message", "missed call" };
|
||||
|
||||
var hasVoicemailKeyword = voicemailKeywords.Any(k => text.Contains(k, StringComparison.OrdinalIgnoreCase));
|
||||
if (!hasVoicemailKeyword)
|
||||
return false;
|
||||
|
||||
var legitimateVoicemailDomains = new[]
|
||||
{
|
||||
"ringcentral.com", "vonage.com", "grasshopper.com", "dialpad.com",
|
||||
"8x8.com", "goto.com", "zoom.us", "microsoft.com", "office365.com"
|
||||
};
|
||||
|
||||
var isFromLegitimate = legitimateVoicemailDomains.Any(d => fromDomain.EndsWith(d, StringComparison.OrdinalIgnoreCase)) ||
|
||||
_config.Domains.Vendors.Keys.Any(v => fromDomain.Equals(v, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
var isSubdomainSpoof = CheckCompanySubdomainSpoof(fromDomain);
|
||||
|
||||
return !isFromLegitimate || isSubdomainSpoof;
|
||||
}
|
||||
|
||||
private bool CheckFakeSystemNotification(string subject, string body, string fromDomain)
|
||||
{
|
||||
var text = (subject + " " + body).ToLowerInvariant();
|
||||
|
||||
var systemNotificationKeywords = _config.SystemNotificationKeywords.Count > 0
|
||||
? _config.SystemNotificationKeywords
|
||||
: new List<string> { "verify your email", "account suspended", "storage limit" };
|
||||
|
||||
var hasSystemKeyword = systemNotificationKeywords.Any(k => text.Contains(k, StringComparison.OrdinalIgnoreCase));
|
||||
if (!hasSystemKeyword)
|
||||
return false;
|
||||
|
||||
var legitimateSystemDomains = new[]
|
||||
{
|
||||
"microsoft.com", "office365.com", "google.com", "godaddy.com",
|
||||
"intermedia.net", "hostpilot.com", "networksolutions.com", "namecheap.com",
|
||||
"cloudflare.com", "amazon.com", "aws.amazon.com"
|
||||
};
|
||||
|
||||
return !legitimateSystemDomains.Any(d => fromDomain.EndsWith(d, StringComparison.OrdinalIgnoreCase)) &&
|
||||
!_config.Domains.Vendors.Keys.Any(v => fromDomain.Equals(v, StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
}
|
||||
140
EmailSearch/SpamDetection/SpamDetectorConfig.cs
Normal file
140
EmailSearch/SpamDetection/SpamDetectorConfig.cs
Normal file
@@ -0,0 +1,140 @@
|
||||
namespace EmailSearch.SpamDetection;
|
||||
|
||||
public sealed class SpamDetectorConfig
|
||||
{
|
||||
public List<string> FreeDomains { get; set; } = new();
|
||||
public List<string> BadTlds { get; set; } = new();
|
||||
public List<string> BaitKeywords { get; set; } = new();
|
||||
public DomainConfiguration Domains { get; set; } = new();
|
||||
public SpamScoreWeights SpamScoreWeights { get; set; } = new();
|
||||
public List<string> QuarantineKeywords { get; set; } = new();
|
||||
public List<string> VoicemailKeywords { get; set; } = new();
|
||||
public List<string> SystemNotificationKeywords { get; set; } = new();
|
||||
public List<string> ColdEmailKeywords { get; set; } = new();
|
||||
public List<string> FakeServiceDomains { get; set; } = new();
|
||||
|
||||
public static SpamDetectorConfig GetDefault()
|
||||
{
|
||||
return new SpamDetectorConfig
|
||||
{
|
||||
FreeDomains = new List<string>
|
||||
{
|
||||
"gmail.com", "outlook.com", "hotmail.com", "yahoo.com",
|
||||
"icloud.com", "aol.com", "proton.me", "protonmail.com",
|
||||
"live.com", "msn.com", "ymail.com", "mail.com"
|
||||
},
|
||||
BadTlds = new List<string>
|
||||
{
|
||||
"icu", "top", "click", "xyz", "mom", "quest", "work",
|
||||
"shop", "rest", "tokyo", "pics", "zip", "com.br", "net",
|
||||
"buzz", "cam", "link", "loan", "online", "site", "website"
|
||||
},
|
||||
BaitKeywords = new List<string>
|
||||
{
|
||||
// Financial
|
||||
"invoice", "overdue", "wire", "zelle", "gift card", "payroll",
|
||||
"remit", "ach", "payment", "past due", "bank transfer",
|
||||
// Urgency/Action
|
||||
"review & sign", "sign now", "action required", "urgent",
|
||||
"verify", "confirm your", "suspended", "expire", "limited time",
|
||||
// Account/System
|
||||
"storage limit", "storage quota", "account", "password",
|
||||
"security alert", "unusual activity", "locked",
|
||||
// Messages/Notifications
|
||||
"voice message", "voicemail", "fax", "document", "shared with you",
|
||||
// Domain/SEO spam
|
||||
"domain for sale", "premium domain", "seo", "website design",
|
||||
// Cold sales
|
||||
"setup request", "follow up", "checking in", "quick question"
|
||||
},
|
||||
Domains = new DomainConfiguration
|
||||
{
|
||||
Vendors = new Dictionary<string, VendorDomainInfo>(),
|
||||
Trusted = new List<string>
|
||||
{
|
||||
"microsoft.com", "office365.com", "google.com", "amazon.com",
|
||||
"apple.com", "github.com", "linkedin.com"
|
||||
}
|
||||
},
|
||||
SpamScoreWeights = new SpamScoreWeights(),
|
||||
QuarantineKeywords = new List<string>
|
||||
{
|
||||
"quarantine summary", "spam report", "quarantine folder",
|
||||
"email quarantine", "quarantined email", "spam summary",
|
||||
"junk summary", "blocked messages", "held messages"
|
||||
},
|
||||
VoicemailKeywords = new List<string>
|
||||
{
|
||||
"voicemail", "voice message", "voice mail", "audio message",
|
||||
"new voicemail", "play voicemail", "missed call", "phone message"
|
||||
},
|
||||
SystemNotificationKeywords = new List<string>
|
||||
{
|
||||
"verify your email", "email verification", "verify now",
|
||||
"confirm your email", "account suspended", "account locked",
|
||||
"storage limit", "storage quota", "mailbox full",
|
||||
"password expir", "credentials expir", "unusual activity",
|
||||
"security alert", "suspicious activity", "action required"
|
||||
},
|
||||
ColdEmailKeywords = new List<string>
|
||||
{
|
||||
"seo services", "seo affordable", "search engine optimization",
|
||||
"website ranking", "google ranking", "backlinks", "link building",
|
||||
"website redesign", "web development", "web developer",
|
||||
"website design", "graphic designer", "mobile app", "app development",
|
||||
"reaching out", "hope this finds you", "i came across your",
|
||||
"outsource", "offshore", "dedicated team", "cost-effective"
|
||||
},
|
||||
FakeServiceDomains = new List<string>
|
||||
{
|
||||
"voiceservicing.net", "audios.net", "voicemail.net",
|
||||
"audioservices.net", "mailservicing.net", "emailservicing.net",
|
||||
"securemail.net", "mailprotect.net", "docuservices.net"
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
public sealed class DomainConfiguration
|
||||
{
|
||||
public Dictionary<string, VendorDomainInfo> Vendors { get; set; } = new();
|
||||
public List<string> Trusted { get; set; } = new();
|
||||
}
|
||||
|
||||
public sealed class VendorDomainInfo
|
||||
{
|
||||
public int Reputation { get; set; }
|
||||
public List<string> DisplayNamePatterns { get; set; } = new();
|
||||
}
|
||||
|
||||
public sealed class SpamScoreWeights
|
||||
{
|
||||
public double SpfFail { get; set; } = 0.28;
|
||||
public double DkimFail { get; set; } = 0.25;
|
||||
public double DmarcFail { get; set; } = 0.30;
|
||||
public double ReplyToDomainMismatch { get; set; } = 0.20;
|
||||
public double DisplayImpersonation { get; set; } = 0.22;
|
||||
public double UnicodeLookalike { get; set; } = 0.20;
|
||||
public double HasUrl { get; set; } = 0.06;
|
||||
public double UrlCountMultiplier { get; set; } = 0.02;
|
||||
public double HasIpLink { get; set; } = 0.18;
|
||||
public double UsesShortener { get; set; } = 0.12;
|
||||
public double SuspiciousTld { get; set; } = 0.10;
|
||||
public double HasTrackingPixel { get; set; } = 0.06;
|
||||
public double HasAttachment { get; set; } = 0.06;
|
||||
public double HasRiskyAttachment { get; set; } = 0.22;
|
||||
public double KeywordBait { get; set; } = 0.22;
|
||||
public double FreeMailboxWithUrl { get; set; } = 0.18;
|
||||
public double FreeMailboxOnly { get; set; } = 0.08;
|
||||
public double HasListUnsubscribe { get; set; } = -0.04;
|
||||
public double ReputationMultiplier { get; set; } = 0.05;
|
||||
public double UnknownDomain { get; set; } = 0.15;
|
||||
public double CompanySubdomainSpoof { get; set; } = 0.45;
|
||||
public double FakeQuarantineReport { get; set; } = 0.40;
|
||||
public double HasZeroWidthChars { get; set; } = 0.35;
|
||||
public double HasRandomRefId { get; set; } = 0.18;
|
||||
public double HasTimestampInSubject { get; set; } = 0.15;
|
||||
public double ColdEmailSolicitation { get; set; } = 0.30;
|
||||
public double FakeVoicemailNotification { get; set; } = 0.42;
|
||||
public double FakeSystemNotification { get; set; } = 0.38;
|
||||
}
|
||||
57
EmailSearch/SpamDetection/SpamFeatures.cs
Normal file
57
EmailSearch/SpamDetection/SpamFeatures.cs
Normal file
@@ -0,0 +1,57 @@
|
||||
namespace EmailSearch.SpamDetection;
|
||||
|
||||
/// <summary>
|
||||
/// Contains all extracted features from an email for spam analysis.
|
||||
/// </summary>
|
||||
public sealed class SpamFeatures
|
||||
{
|
||||
// Identity
|
||||
public string DisplayName { get; set; } = "";
|
||||
public string FromAddress { get; set; } = "";
|
||||
public string FromDomain { get; set; } = "";
|
||||
|
||||
// Auth & headers
|
||||
public bool SpfFail { get; set; }
|
||||
public bool DkimFail { get; set; }
|
||||
public bool DmarcFail { get; set; }
|
||||
public bool ReplyToDomainMismatch { get; set; }
|
||||
public bool HasListUnsub { get; set; }
|
||||
|
||||
// Impersonation / lookalikes
|
||||
public bool DisplayImpersonation { get; set; }
|
||||
public bool UnicodeLookalike { get; set; }
|
||||
public bool GenericSenderName { get; set; }
|
||||
public bool SubjectDomainImpersonation { get; set; }
|
||||
|
||||
// Links
|
||||
public bool HasUrl { get; set; }
|
||||
public int UrlCount { get; set; }
|
||||
public bool HasIpLink { get; set; }
|
||||
public bool UsesShortener { get; set; }
|
||||
public bool SuspiciousTld { get; set; }
|
||||
|
||||
// Sender/domain traits
|
||||
public bool FreeMailboxDomain { get; set; }
|
||||
public bool UnknownDomain { get; set; }
|
||||
public bool IsBlocklisted { get; set; }
|
||||
public int SenderReputation { get; set; }
|
||||
|
||||
// Content/attachments
|
||||
public bool HasTrackingPixel { get; set; }
|
||||
public bool HasAttachment { get; set; }
|
||||
public bool HasRiskyAttachment { get; set; }
|
||||
public double AttachmentRiskScore { get; set; }
|
||||
public bool KeywordBait { get; set; }
|
||||
public bool SingleLinkOnly { get; set; }
|
||||
public bool HasPlaceholderText { get; set; }
|
||||
|
||||
// Advanced patterns
|
||||
public bool CompanySubdomainSpoof { get; set; }
|
||||
public bool FakeQuarantineReport { get; set; }
|
||||
public bool HasZeroWidthChars { get; set; }
|
||||
public bool HasRandomRefId { get; set; }
|
||||
public bool HasTimestampInSubject { get; set; }
|
||||
public bool ColdEmailSolicitation { get; set; }
|
||||
public bool FakeVoicemailNotification { get; set; }
|
||||
public bool FakeSystemNotification { get; set; }
|
||||
}
|
||||
31
EmailSearch/SpamDetection/UrlAnalyzer.cs
Normal file
31
EmailSearch/SpamDetection/UrlAnalyzer.cs
Normal file
@@ -0,0 +1,31 @@
|
||||
namespace EmailSearch.SpamDetection;
|
||||
|
||||
internal static class UrlAnalyzer
|
||||
{
|
||||
private static readonly HashSet<string> Shorteners = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
"bit.ly", "tinyurl.com", "t.co", "goo.gl", "is.gd", "buff.ly",
|
||||
"ow.ly", "rb.gy", "rebrand.ly", "cutt.ly", "soo.gd", "tiny.cc",
|
||||
"short.io", "bl.ink", "shorte.st", "clicky.me"
|
||||
};
|
||||
|
||||
public static bool IsIpUrl(string url)
|
||||
{
|
||||
try
|
||||
{
|
||||
var host = new Uri(url).Host;
|
||||
return System.Net.IPAddress.TryParse(host, out _);
|
||||
}
|
||||
catch { return false; }
|
||||
}
|
||||
|
||||
public static bool IsShortener(string url)
|
||||
{
|
||||
try
|
||||
{
|
||||
var host = new Uri(url).Host.ToLowerInvariant();
|
||||
return Shorteners.Any(s => host == s || host.EndsWith("." + s));
|
||||
}
|
||||
catch { return false; }
|
||||
}
|
||||
}
|
||||
94
EmailSearch/SpamDetectorConfig.json
Normal file
94
EmailSearch/SpamDetectorConfig.json
Normal file
@@ -0,0 +1,94 @@
|
||||
{
|
||||
"freeDomains": [
|
||||
"gmail.com", "outlook.com", "hotmail.com", "yahoo.com",
|
||||
"icloud.com", "aol.com", "proton.me", "protonmail.com",
|
||||
"live.com", "msn.com", "ymail.com", "mail.com"
|
||||
],
|
||||
"badTlds": [
|
||||
"icu", "top", "click", "xyz", "mom", "quest", "work",
|
||||
"shop", "rest", "tokyo", "pics", "zip", "com.br",
|
||||
"buzz", "cam", "link", "loan", "online", "site", "website"
|
||||
],
|
||||
"baitKeywords": [
|
||||
"invoice", "overdue", "wire", "zelle", "gift card", "payroll",
|
||||
"remit", "ach", "payment", "past due", "bank transfer",
|
||||
"review & sign", "sign now", "action required", "urgent",
|
||||
"verify", "confirm your", "suspended", "expire", "limited time",
|
||||
"storage limit", "storage quota", "account", "password",
|
||||
"security alert", "unusual activity", "locked",
|
||||
"voice message", "voicemail", "fax", "document", "shared with you",
|
||||
"domain for sale", "premium domain", "seo", "website design",
|
||||
"setup request", "follow up", "checking in", "quick question"
|
||||
],
|
||||
"domains": {
|
||||
"vendors": {
|
||||
"example-vendor.com": {
|
||||
"reputation": 5,
|
||||
"displayNamePatterns": ["example", "vendor"]
|
||||
}
|
||||
},
|
||||
"trusted": [
|
||||
"microsoft.com", "office365.com", "google.com", "amazon.com",
|
||||
"apple.com", "github.com", "linkedin.com"
|
||||
]
|
||||
},
|
||||
"quarantineKeywords": [
|
||||
"quarantine summary", "spam report", "quarantine folder",
|
||||
"email quarantine", "quarantined email", "spam summary",
|
||||
"junk summary", "blocked messages", "held messages"
|
||||
],
|
||||
"voicemailKeywords": [
|
||||
"voicemail", "voice message", "voice mail", "audio message",
|
||||
"new voicemail", "play voicemail", "missed call", "phone message"
|
||||
],
|
||||
"systemNotificationKeywords": [
|
||||
"verify your email", "email verification", "verify now",
|
||||
"confirm your email", "account suspended", "account locked",
|
||||
"storage limit", "storage quota", "mailbox full",
|
||||
"password expir", "credentials expir", "unusual activity",
|
||||
"security alert", "suspicious activity", "action required"
|
||||
],
|
||||
"coldEmailKeywords": [
|
||||
"seo services", "seo affordable", "search engine optimization",
|
||||
"website ranking", "google ranking", "backlinks", "link building",
|
||||
"website redesign", "web development", "web developer",
|
||||
"website design", "graphic designer", "mobile app", "app development",
|
||||
"reaching out", "hope this finds you", "i came across your",
|
||||
"outsource", "offshore", "dedicated team", "cost-effective"
|
||||
],
|
||||
"fakeServiceDomains": [
|
||||
"voiceservicing.net", "audios.net", "voicemail.net",
|
||||
"audioservices.net", "mailservicing.net", "emailservicing.net",
|
||||
"securemail.net", "mailprotect.net", "docuservices.net"
|
||||
],
|
||||
"spamScoreWeights": {
|
||||
"spfFail": 0.28,
|
||||
"dkimFail": 0.25,
|
||||
"dmarcFail": 0.30,
|
||||
"replyToDomainMismatch": 0.20,
|
||||
"displayImpersonation": 0.22,
|
||||
"unicodeLookalike": 0.20,
|
||||
"hasUrl": 0.06,
|
||||
"urlCountMultiplier": 0.02,
|
||||
"hasIpLink": 0.18,
|
||||
"usesShortener": 0.12,
|
||||
"suspiciousTld": 0.10,
|
||||
"hasTrackingPixel": 0.06,
|
||||
"hasAttachment": 0.06,
|
||||
"hasRiskyAttachment": 0.22,
|
||||
"keywordBait": 0.22,
|
||||
"freeMailboxWithUrl": 0.18,
|
||||
"freeMailboxOnly": 0.08,
|
||||
"hasListUnsubscribe": -0.04,
|
||||
"reputationMultiplier": 0.05,
|
||||
"unknownDomain": 0.15,
|
||||
"companySubdomainSpoof": 0.45,
|
||||
"fakeQuarantineReport": 0.40,
|
||||
"hasZeroWidthChars": 0.35,
|
||||
"hasRandomRefId": 0.18,
|
||||
"hasTimestampInSubject": 0.15,
|
||||
"coldEmailSolicitation": 0.30,
|
||||
"fakeVoicemailNotification": 0.42,
|
||||
"fakeSystemNotification": 0.38
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user