Format Email Messages Using NLP
NLP can automate email formatting, tone analysis, and content improvement — useful for building writing assistants, customer service tools, and email productivity applications.
Analyzing Email Tone with VADER
Before reformatting, understand the current tone:
import nltknltk.download('vader_lexicon')from nltk.sentiment import SentimentIntensityAnalyzerimport re
sia = SentimentIntensityAnalyzer()
def analyze_email_tone(email_text): sentences = re.split(r'(?<=[.!?])\s+', email_text) sentence_scores = []
for sentence in sentences: if len(sentence.strip()) > 5: score = sia.polarity_scores(sentence) sentence_scores.append({ "text": sentence.strip(), "sentiment": "positive" if score['compound'] > 0.05 else "negative" if score['compound'] < -0.05 else "neutral", "score": round(score['compound'], 3) })
overall = sia.polarity_scores(email_text)['compound'] return { "overall_tone": "positive" if overall > 0.05 else "negative" if overall < -0.05 else "neutral", "overall_score": round(overall, 3), "sentences": sentence_scores }
email = """Hi team,
I'm extremely frustrated with the ongoing delays in the project. This is the thirddeadline we've missed and I'm very disappointed. However, I do appreciate the hardwork everyone has put in. Let's find a solution together.
Best regards,Sarah"""
result = analyze_email_tone(email)print(f"Overall tone: {result['overall_tone']} (score: {result['overall_score']})")print("\nSentence-level analysis:")for s in result['sentences']: print(f" [{s['sentiment']:<9} {s['score']:+.3f}] {s['text'][:60]}")Extracting Email Components
import re
def parse_email(raw_email): components = { "greeting": "", "body_paragraphs": [], "closing": "", "signature": "" }
lines = raw_email.strip().split('\n') lines = [l.strip() for l in lines if l.strip()]
# Detect greeting (first line starting with Hi/Hello/Dear) greeting_pattern = re.compile(r'^(hi|hello|dear|hey|good\s+(morning|afternoon|evening))', re.IGNORECASE) if lines and greeting_pattern.match(lines[0]): components["greeting"] = lines[0] lines = lines[1:]
# Detect closing (Best/Regards/Thanks/Sincerely) closing_pattern = re.compile(r'^(best|regards|thanks|thank you|sincerely|cheers|kind regards)', re.IGNORECASE) for i, line in enumerate(reversed(lines)): if closing_pattern.match(line): idx = len(lines) - 1 - i components["closing"] = lines[idx] components["signature"] = ' '.join(lines[idx + 1:]) lines = lines[:idx] break
# Remaining lines are body components["body_paragraphs"] = lines
return components
raw = """Dear John,
I wanted to follow up on our meeting last Tuesday.The project timeline looks good and we're on track.Please let me know if you have any concerns.
Best regards,Emily Chen"""
parsed = parse_email(raw)for key, value in parsed.items(): print(f"{key}: {value}")Grammar and Clarity Improvement with LanguageTool
# pip install language-tool-pythonimport language_tool_python
tool = language_tool_python.LanguageTool('en-US')
def check_email_grammar(text): matches = tool.check(text) suggestions = []
for match in matches: suggestions.append({ "error": text[match.offset:match.offset + match.errorLength], "message": match.message, "suggestion": match.replacements[0] if match.replacements else "No suggestion", "category": match.ruleId })
corrected = language_tool_python.utils.correct(text, matches) return {"original": text, "corrected": corrected, "issues": suggestions}
email_text = """Hi john,
I want to inform you that the the project deadline have been moved to next friday.Please make sure everyone on you're team are aware of this change.
Best,Michael"""
result = check_email_grammar(email_text)print("Original:")print(result["original"])print("\nCorrected:")print(result["corrected"])print("\nIssues found:", len(result["issues"]))for issue in result["issues"]: print(f" '{issue['error']}' → {issue['suggestion']} ({issue['message'][:50]})")Generating a Subject Line
from transformers import pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def generate_subject_line(email_body, max_length=15): # Summarize to a very short form as subject line summary = summarizer( email_body, max_length=max_length, min_length=5, do_sample=False ) subject = summary[0]['summary_text']
# Clean up subject = subject.rstrip('.').strip() return subject
email_body = """I wanted to let you know that we've completed the technical review of the newNLP pipeline and everything looks great. The accuracy metrics have improved by15% compared to our baseline and the processing speed is three times faster.We're ready to schedule a demo for the stakeholders next week."""
subject = generate_subject_line(email_body)print(f"Suggested subject: {subject}")LLM-Powered Email Rewriting
from openai import OpenAI
client = OpenAI()
def reformat_email(original_email, style="professional"): style_instructions = { "professional": "Make it formal, concise, and professional. Use clear structure.", "friendly": "Make it warm, approachable, and conversational while staying professional.", "concise": "Remove all unnecessary words. Get to the point immediately. Use bullet points if helpful.", "diplomatic": "Soften any harsh language. Make critical feedback constructive and respectful." }
instruction = style_instructions.get(style, style_instructions["professional"])
response = client.chat.completions.create( model="gpt-4o-mini", messages=[ { "role": "system", "content": f"You are an email writing assistant. Reformat the email as follows: {instruction}. Preserve the original intent and key information. Return only the reformatted email." }, { "role": "user", "content": f"Reformat this email:\n\n{original_email}" } ], max_tokens=400, temperature=0.5 )
return response.choices[0].message.content
harsh_email = """John,
This is completely unacceptable. The report was supposed to be done yesterdayand I have NO IDEA what you've been doing all week. This is the third timethis has happened. I need this done TODAY or there will be consequences.
Mark"""
print("=== Original ===")print(harsh_email)print("\n=== Reformatted (diplomatic) ===")print(reformat_email(harsh_email, style="diplomatic"))Email Summarization
from transformers import pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def summarize_long_email(email_text, max_length=100, min_length=30): # Remove greeting and signature for cleaner summarization lines = email_text.strip().split('\n') body_lines = [l for l in lines[2:-3] if l.strip()] # skip header/footer body = ' '.join(body_lines)
if len(body.split()) < 30: return body # Too short to summarize
summary = summarizer(body, max_length=max_length, min_length=min_length, do_sample=False) return summary[0]['summary_text']
long_email = """Hi team,
I hope this message finds you well. I wanted to provide an update on the Q3 NLP platformproject. Over the past two weeks, our engineering team has successfully integrated thenew BERT-based classification model into the production pipeline. The model processescustomer support tickets and automatically categorizes them by urgency and department.
Initial results are very promising. The model achieves 94% accuracy on our test set,compared to 78% with our previous rule-based system. Processing time is also significantlyfaster, handling 500 tickets per minute versus 50 with the old system.
The next phase involves deploying the sentiment analysis component and building themonitoring dashboard. We expect to complete this by end of Q3 and are on trackwith our original timeline.
Please let me know if you have any questions or need more details on the technical implementation.
Best regards,Jennifer Park"""
summary = summarize_long_email(long_email)print("Summary:", summary)