Co-creation-projects/bichchibui5-hub-EmailSmartAssistant/main.ipynb
from hello_agents import SimpleAgent, HelloAgentsLLM, ToolRegistry
from hello_agents.tools import Tool, ToolParameter
from typing import Dict, Any, List
import json
import re
import os
from datetime import datetime, timedelta
from collections import Counter
import jieba
from langdetect import detect
import pandas as pd
from rich.console import Console
from rich.table import Table
from rich.panel import Panel
console = Console()
os.environ["LLM_MODEL_ID"] = "Qwen/Qwen2.5-72B-Instruct"
os.environ["LLM_API_KEY"] = "your_api_key_here"
os.environ["LLM_BASE_URL"] = "https://api-inference.modelscope.cn/v1/"
os.environ["LLM_TIMEOUT"] = "60"
class EmailClassificationTool(Tool):
"""邮件智能分类工具"""
def __init__(self):
super().__init__(
name="email_classification",
description="根据邮件内容自动分类邮件类型、优先级和发件人类型"
)
# 加载分类规则
try:
with open('config/email_config.json', 'r', encoding='utf-8') as f:
config = json.load(f)
self.classification_rules = config.get('classification_rules', {})
self.priority_rules = config.get('priority_rules', {})
except FileNotFoundError:
# 默认分类规则
self.classification_rules = {
'work_keywords': ['会议', '项目', '工作', '任务', '汇报', 'meeting', 'project', 'work', 'task', 'urgent'],
'customer_keywords': ['客户', '咨询', '购买', '服务', 'customer', 'inquiry', 'purchase', 'service'],
'personal_keywords': ['个人', '家庭', '朋友', 'personal', 'family', 'friend', '聚餐'],
'spam_keywords': ['广告', '推广', '营销', '优惠', 'advertisement', 'promotion', 'marketing', '折扣']
}
self.priority_rules = {
'high_priority_keywords': ['紧急', 'urgent', 'asap', '重要', 'important'],
'low_priority_keywords': ['通知', 'newsletter', 'notification', '订阅']
}
def run(self, parameters: Dict[str, Any]) -> str:
"""分类邮件并返回结果"""
subject = parameters.get("subject", "")
body = parameters.get("body", "")
sender = parameters.get("sender", "")
if not subject and not body:
return "错误: 邮件主题和内容不能同时为空"
# 合并文本内容进行分析
text_content = f"{subject} {body}".lower()
# 检查垃圾邮件
spam_score = sum(1 for keyword in self.classification_rules['spam_keywords']
if keyword in text_content)
if spam_score >= 2:
classification = {'type': 'spam', 'priority': 'low', 'sender_type': 'external'}
else:
# 计算各类型得分
work_score = sum(1 for keyword in self.classification_rules['work_keywords']
if keyword in text_content)
customer_score = sum(1 for keyword in self.classification_rules['customer_keywords']
if keyword in text_content)
personal_score = sum(1 for keyword in self.classification_rules['personal_keywords']
if keyword in text_content)
# 确定邮件类型
scores = {'work': work_score, 'customer': customer_score, 'personal': personal_score}
email_type = max(scores, key=scores.get) if max(scores.values()) > 0 else 'other'
# 确定优先级
priority = 'medium' # 默认中等优先级
if any(word in text_content for word in self.priority_rules['high_priority_keywords']):
priority = 'high'
elif any(word in text_content for word in self.priority_rules['low_priority_keywords']):
priority = 'low'
# 确定发件人类型
sender_lower = sender.lower()
if 'company.com' in sender_lower or 'corp.com' in sender_lower:
sender_type = 'colleague'
elif 'noreply' in sender_lower or 'no-reply' in sender_lower:
sender_type = 'system'
elif email_type == 'customer':
sender_type = 'customer'
else:
sender_type = 'external'
classification = {
'type': email_type,
'priority': priority,
'sender_type': sender_type
}
return json.dumps(classification, ensure_ascii=False, indent=2)
def get_parameters(self) -> List[ToolParameter]:
return [
ToolParameter(
name="subject",
type="string",
description="邮件主题",
required=False
),
ToolParameter(
name="body",
type="string",
description="邮件正文内容",
required=False
),
ToolParameter(
name="sender",
type="string",
description="发件人邮箱地址",
required=True
)
]
class InfoExtractionTool(Tool):
"""关键信息提取工具"""
def __init__(self):
super().__init__(
name="info_extraction",
description="从邮件内容中提取日期、时间、联系方式、待办事项等关键信息"
)
def run(self, parameters: Dict[str, Any]) -> str:
"""提取关键信息"""
body = parameters.get("body", "")
if not body:
return "错误: 邮件内容不能为空"
# 提取日期
date_patterns = [
r'\d{4}-\d{1,2}-\d{1,2}', # 2024-01-15
r'\d{1,2}月\d{1,2}日', # 1月15日
r'\d{1,2}/\d{1,2}', # 1/15
r'\d{1,2}-\d{1,2}' # 1-15
]
dates = []
for pattern in date_patterns:
dates.extend(re.findall(pattern, body))
# 提取时间
time_patterns = [
r'\d{1,2}:\d{2}', # 14:30
r'\d{1,2}点\d{0,2}分?', # 2点30分
r'\d{1,2}\s*PM', # 2 PM
r'\d{1,2}\s*AM' # 9 AM
]
times = []
for pattern in time_patterns:
times.extend(re.findall(pattern, body))
# 提取联系方式
phones = re.findall(r'1[3-9]\d{9}', body) # 中国手机号
emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', body)
# 提取待办事项(包含关键词的句子)
todo_keywords = ['需要', '请', '准备', 'need', 'please', 'prepare', '确认', '完成', '提交']
sentences = re.split(r'[。.!!]', body)
todos = []
for sentence in sentences:
sentence = sentence.strip()
if any(keyword in sentence for keyword in todo_keywords) and len(sentence) > 5:
todos.append(sentence)
# 限制待办事项数量
todos = todos[:5]
extracted_info = {
'dates': list(set(dates)), # 去重
'times': list(set(times)),
'phones': list(set(phones)),
'emails': list(set(emails)),
'todos': todos
}
return json.dumps(extracted_info, ensure_ascii=False, indent=2)
def get_parameters(self) -> List[ToolParameter]:
return [
ToolParameter(
name="body",
type="string",
description="邮件正文内容",
required=True
)
]
class ReplyGenerationTool(Tool):
"""智能回复生成工具"""
def __init__(self):
super().__init__(
name="reply_generation",
description="根据邮件内容和分类结果生成合适的回复草稿"
)
# 加载回复模板
try:
with open('templates/reply_templates.json', 'r', encoding='utf-8') as f:
self.templates = json.load(f)
except FileNotFoundError:
# 默认模板
self.templates = {
'work_meeting': {
'formal': {
'zh': '感谢您的邮件。关于{subject},我已收到您的信息。我将在24小时内回复您详细的反馈。如有紧急事项,请随时联系我。\n\n此致\n敬礼',
'en': 'Thank you for your email regarding {subject}. I have received your information and will provide detailed feedback within 24 hours. Please feel free to contact me if there are any urgent matters.\n\nBest regards'
}
},
'customer_inquiry': {
'formal': {
'zh': '尊敬的客户,\n\n感谢您对我们产品/服务的关注。关于您咨询的{subject},我们将安排专业人员在24小时内为您提供详细解答。\n\n如有其他问题,欢迎随时联系我们。\n\n此致\n敬礼',
'en': 'Dear Valued Customer,\n\nThank you for your interest in our products/services. Regarding your inquiry about {subject}, we will arrange for a professional to provide you with detailed answers within 24 hours.\n\nPlease feel free to contact us if you have any other questions.\n\nBest regards'
}
},
'general_acknowledgment': {
'formal': {
'zh': '您好,\n\n已收到您的邮件,我将仔细阅读并在24小时内回复。\n\n谢谢!',
'en': 'Hello,\n\nI have received your email and will read it carefully and reply within 24 hours.\n\nThank you!'
}
}
}
def run(self, parameters: Dict[str, Any]) -> str:
"""生成回复草稿"""
subject = parameters.get("subject", "")
body = parameters.get("body", "")
sender = parameters.get("sender", "")
email_type = parameters.get("email_type", "other")
if not subject and not body:
return "错误: 邮件主题和内容不能同时为空"
# 如果是垃圾邮件,不生成回复
if email_type == 'spam':
return json.dumps({'message': '垃圾邮件,不生成回复'}, ensure_ascii=False)
# 检测语言
text_to_detect = f"{subject} {body}"
try:
detected_lang = detect(text_to_detect)
is_chinese = detected_lang == 'zh-cn' or any('\u4e00' <= char <= '\u9fff' for char in text_to_detect)
except:
is_chinese = any('\u4e00' <= char <= '\u9fff' for char in text_to_detect)
lang = 'zh' if is_chinese else 'en'
# 选择模板类型
if email_type == 'work':
template_key = 'work_meeting'
elif email_type == 'customer':
template_key = 'customer_inquiry'
else:
template_key = 'general_acknowledgment'
# 获取模板
template = self.templates.get(template_key, {}).get('formal', {}).get(lang, '')
if not template:
# 使用通用模板
template = self.templates['general_acknowledgment']['formal'][lang]
# 生成回复内容
reply_content = template.format(
subject=subject,
timeframe='24小时' if lang == 'zh' else '24 hours'
)
reply_draft = {
'to': sender,
'subject': f"Re: {subject}",
'content': reply_content,
'language': lang,
'template_type': template_key
}
return json.dumps(reply_draft, ensure_ascii=False, indent=2)
def get_parameters(self) -> List[ToolParameter]:
return [
ToolParameter(
name="subject",
type="string",
description="邮件主题",
required=False
),
ToolParameter(
name="body",
type="string",
description="邮件正文内容",
required=False
),
ToolParameter(
name="sender",
type="string",
description="发件人邮箱地址",
required=True
),
ToolParameter(
name="email_type",
type="string",
description="邮件分类类型 (work/customer/personal/spam/other)",
required=False
)
]
# 创建工具注册表
tool_registry = ToolRegistry()
tool_registry.register_tool(EmailClassificationTool())
tool_registry.register_tool(InfoExtractionTool())
tool_registry.register_tool(ReplyGenerationTool())
# 初始化LLM
llm = HelloAgentsLLM()
# 定义系统提示词
system_prompt = """你是一位专业的邮件处理助手。你的任务是:
1. 使用email_classification工具分析邮件类型、优先级和发件人类型
2. 使用info_extraction工具提取邮件中的关键信息(日期、时间、联系方式、待办事项)
3. 使用reply_generation工具生成合适的回复草稿
4. 基于分析结果,提供详细的邮件处理报告
处理报告应包括:
- 邮件分类结果
- 提取的关键信息
- 生成的回复草稿
- 处理建议和提醒事项
请以结构化的格式输出报告,使用中文进行说明。"""
# 创建智能体
agent = SimpleAgent(
name="智能邮件助手",
llm=llm,
system_prompt=system_prompt,
tool_registry=tool_registry
)
console.print("✅ 智能邮件助手初始化完成!", style="green")
# 示例邮件数据
sample_emails = [
{
'subject': '紧急:项目进度汇报会议安排',
'sender': '[email protected]',
'body': '各位同事,请准备明天下午2点的项目进度汇报会议。需要准备本周工作总结和下周计划。截止时间:2024-01-16 14:00。请确认参会。'
},
{
'subject': '客户咨询:产品功能详情',
'sender': '[email protected]',
'body': '您好,我对贵公司的产品很感兴趣,希望了解更多功能详情。请问可以安排一次产品演示吗?我的联系方式:13800138000。期待您的回复。'
},
{
'subject': 'Urgent: Meeting Request',
'sender': '[email protected]',
'body': 'Hi team, we need to schedule an urgent meeting tomorrow at 3 PM to discuss the quarterly results. Please prepare your reports and confirm attendance by 5 PM today.'
}
]
console.print(Panel.fit(
f"📧 准备处理 {len(sample_emails)} 封示例邮件\n"
"包含工作邮件、客户咨询和英文邮件",
title="邮件处理开始",
style="blue"
))
# 处理每封邮件
results = []
for i, email in enumerate(sample_emails, 1):
console.print(f"\n🔄 处理邮件 {i}/{len(sample_emails)}: {email['subject'][:30]}...", style="cyan")
# 构建处理请求
email_content = f"""
请处理以下邮件:
发件人: {email['sender']}
主题: {email['subject']}
内容: {email['body']}
请进行完整的邮件分析和处理。
"""
# 执行邮件处理
try:
result = agent.run(email_content)
results.append({
'email': email,
'result': result,
'status': 'success'
})
console.print(f"✅ 邮件 {i} 处理完成", style="green")
except Exception as e:
results.append({
'email': email,
'result': f"处理失败: {str(e)}",
'status': 'error'
})
console.print(f"❌ 邮件 {i} 处理失败: {str(e)}", style="red")
console.print("\n🎉 所有邮件处理完成!", style="bold green")
# 显示处理结果
console.print("\n" + "="*60)
console.print("📊 邮件处理结果汇总", style="bold blue")
console.print("="*60)
success_count = sum(1 for r in results if r['status'] == 'success')
error_count = len(results) - success_count
# 创建统计表格
stats_table = Table(title="处理统计")
stats_table.add_column("项目", style="cyan")
stats_table.add_column("数量", style="white")
stats_table.add_row("总邮件数", str(len(results)))
stats_table.add_row("成功处理", str(success_count))
stats_table.add_row("处理失败", str(error_count))
console.print(stats_table)
# 显示详细结果
for i, result in enumerate(results, 1):
if result['status'] == 'success':
console.print(f"\n📧 邮件 {i} 处理结果:", style="bold yellow")
console.print(f"主题: {result['email']['subject']}")
console.print(f"发件人: {result['email']['sender']}")
console.print("\n处理报告:")
console.print(result['result'])
console.print("-" * 50)
else:
console.print(f"\n❌ 邮件 {i} 处理失败:", style="bold red")
console.print(result['result'])
# 保存处理报告
import os
from datetime import datetime
# 确保输出目录存在
os.makedirs('output/reports', exist_ok=True)
# 生成报告文件名
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
report_filename = f"output/reports/email_processing_report_{timestamp}.md"
# 生成Markdown报告
report_content = f"""# 智能邮件助手处理报告
**生成时间**: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
**处理邮件数量**: {len(results)}
**成功处理**: {success_count}
**处理失败**: {error_count}
## 处理结果详情
"""
for i, result in enumerate(results, 1):
report_content += f"""### 邮件 {i}
**主题**: {result['email']['subject']}
**发件人**: {result['email']['sender']}
**状态**: {'✅ 成功' if result['status'] == 'success' else '❌ 失败'}
**处理结果**:
{result['result']}
---
"""
# 保存报告
with open(report_filename, 'w', encoding='utf-8') as f:
f.write(report_content)
console.print(f"\n📄 处理报告已保存到: {report_filename}", style="green")
console.print("\n💡 下一步操作建议:", style="blue")
console.print("1. 查看生成的回复草稿")
console.print("2. 根据提取的关键信息设置提醒")
console.print("3. 配置真实邮箱进行实际邮件处理")
console.print("4. 调整分类规则和回复模板以适应具体需求")