处理25年政企市场提效增产通报脚本。
# Author: subk
# Time: 2025/7/24 17:07
# Desc: 处理25年集团市场提效增产通报数据
# Version: 1.0.1 (Updated summary card item alignment)
import requests
import pandas as pd
import re
import smtplib
import schedule
import time
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
# ========== 用户配置 ==========
username = "su@e.com"
password = "74A33600"
smtp_server = "smtp.xx.om"
smtp_port = 465
recipients = [
"suXXXXile.com"
]
REGIONS = ["XX"]
KEY_WEAK = ["战客客户净增", ]
last_sent_date = None
# ========== 工具函数 ==========
def get_dates():
today = datetime.now().strftime("%Y年%m月%d日")
yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y%m%d")
return today, yesterday
# def get_dates():
# today = "2025年07月24日"
# yesterday = "20250723"
# return today,yesterday
def fetch_web_content(url: str) -> str:
r = requests.get(url, timeout=10)
r.raise_for_status()
return r.text
def send_email(subject: str, html_body: str):
msg = MIMEMultipart()
msg["From"] = username
msg["To"] = ", ".join(recipients)
msg["Subject"] = subject
msg.attach(MIMEText(html_body, "html", "utf-8"))
with smtplib.SMTP_SSL(smtp_server, smtp_port) as s:
s.login(username, password)
s.sendmail(username, recipients, msg.as_string())
def highlight_keyword(content: str, keyword: str) -> str:
return content.replace(keyword, f"<span style='background-color:#FFF59D'>{keyword}</span>")
def generate_styled_html_report(title: str, summary: str, main_table_html: str, back_table_html: str, top_table_html: str, match_table_html: str, original_content: str) -> str:
"""生成美化后的HTML报告"""
# 获取当前时间,确保头部和底部时间一致
current_time = datetime.now().strftime("%Y-%m-%d %H:%M")
html_template = f"""
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>政企三季度提产增效通报</title>
<style>
* {{
margin: 0;
padding: 0;
box-sizing: border-box;
}}
body {{
font-family: 'Microsoft YaHei', Arial, sans-serif;
background-color: #f5f5f5;
min-height: 100vh;
padding: 20px;
color: #333;
}}
.container {{
max-width: 1200px;
margin: 0 auto;
background: white;
border-radius: 8px;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.05);
overflow: hidden;
}}
.header {{
background: #3498d8;
color: white;
padding: 25px;
text-align: center;
}}
.header h1 {{
font-size: 1.8em;
margin-bottom: 10px;
}}
.header p {{
font-size: 0.95em;
opacity: 0.9;
}}
.content {{
padding: 25px;
}}
.section {{
margin-bottom: 30px;
}}
.section-header {{
background: #3498db;
color: white;
padding: 12px 15px;
font-size: 1.1em;
font-weight: bold;
border-radius: 4px 4px 0 0;
}}
.section-content {{
padding: 20px;
background: white;
border: 1px solid #e0e0e0;
border-top: none;
border-radius: 0 0 4px 4px;
}}
.summary-card {{
background: #e8f4fc;
border: 1px solid #bedcf0;
border-radius: 6px;
padding: 20px;
margin-bottom: 25px;
}}
.summary-card h2 {{
font-size: 1.3em;
margin-bottom: 15px;
color: #2c3e50;
/* 标题保持默认/左对齐,不居中 */
}}
.summary-stats {{
display: flex;
justify-content: space-around;
flex-wrap: wrap;
gap: 15px;
}}
/* 关键修改:确保 stat-item 内的所有内容(数值、标签、排名)都居中 */
.stat-item {{
display: flex;
flex-direction: column;
align-items: center; /* 子元素水平居中 */
justify-content: center; /* 子元素垂直居中 */
text-align: center; /* 文本居中(作为后备) */
min-width: 120px;
}}
.stat-number {{
font-size: 2em;
font-weight: bold;
color: #e74c3c;
/* display: block; 移除,因为 flex 已处理布局 */
margin-bottom: 4px; /* 与标签保持间距 */
}}
.stat-label {{
font-size: 0.9em;
color: #555;
margin-bottom: 2px; /* 与排名信息保持间距 */
}}
.rank-info {{
font-size: 0.75em;
color: #777;
/* margin-top: 4px; 移除,因为 margin-bottom 已处理间距 */
}}
/* 确保表格内容居中 */
table {{
width: 100%;
border-collapse: collapse;
margin: 15px 0;
}}
th {{
background: #f8f9fa;
color: #2c3e50;
padding: 10px;
text-align: center;
font-weight: bold;
font-size: 0.95em;
border: 1px solid #ddd;
}}
td {{
padding: 8px 10px;
text-align: center;
border: 1px solid #eee;
font-size: 0.9em;
word-wrap: break-word;
}}
tr:nth-child(even) {{
background-color: #fafafa;
}}
tr:hover td {{
background-color: #f0f7ff;
}}
tr:last-child td {{
background-color: #f0f0f0;
font-weight: bold;
}}
.table-container {{
overflow-x: auto;
margin: 15px 0;
}}
.original-content {{
background: #f9f9f9;
border: 1px solid #e0e0e0;
border-radius: 6px;
padding: 15px;
margin-top: 20px;
font-size: 0.9em;
}}
.original-content h3 {{
color: #2c3e50;
margin-bottom: 10px;
font-size: 1.1em;
}}
.footer {{
text-align: center;
padding: 15px;
background: #f8f9fa;
color: #666;
font-size: 0.85em;
border-top: 1px solid #eee;
}}
@media (max-width: 768px) {{
.summary-stats {{
flex-direction: column;
gap: 15px;
}}
.stat-item {{
width: 100%;
}}
.header h1 {{
font-size: 1.5em;
}}
.section-header {{
font-size: 1.05em;
}}
}}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>{title}</h1>
<p>数据更新时间:{current_time}</p>
</div>
<div class="content">
<!-- 描述段落 -->
<div class="section">
<div class="section-header">
📊 通报概览
</div>
<div class="section-content">
<p>
根据政企市场提产增效通报,现将三季度XX分公司情况进行通报。请政企部各管理员高度重视,针对弱项制定改进措施,确保完成全年目标任务。
</p>
</div>
</div>
<!-- XX数据摘要 -->
<div class="summary-card">
<h2>🎯 XX地区数据摘要</h2>
<div class="summary-stats">
"""
# 从 summary 字符串中提取信息以确保准确性
import re
front_match = re.search(r'前三项目数(\d+),排名(\d+)', summary)
back_match = re.search(r'后三项目数(\d+),排名(\d+)', summary)
score_match = re.search(r'模拟得分([\d.]+),排名(\d+)', summary)
front_count, front_rank = front_match.groups() if front_match else ("0", "0")
back_count, back_rank = back_match.groups() if back_match else ("0", "0")
score_value, score_rank = score_match.groups() if score_match else ("0.00", "0")
score_value = f"{float(score_value):.2f}"
html_template += f"""
<!-- 前三项目数及排名 -->
<div class="stat-item">
<span class="stat-number">{front_count}</span>
<span class="stat-label">前三项目数</span>
<div class="rank-info">排名: {front_rank}</div>
</div>
<!-- 后三项目数及排名 -->
<div class="stat-item">
<span class="stat-number">{back_count}</span>
<span class="stat-label">后三项目数</span>
<div class="rank-info">排名: {back_rank}</div>
</div>
<!-- 模拟得分及排名 -->
<div class="stat-item">
<span class="stat-number">{score_value}</span>
<span class="stat-label">模拟得分</span>
<div class="rank-info">排名: {score_rank}</div>
</div>
"""
html_template += """
</div>
</div>
<!-- 主数据表格 -->
<div class="section">
<div class="section-header">
📈 各区县弱项数、得分排名情况
</div>
<div class="section-content">
<div class="table-container">
"""
html_template += main_table_html
html_template += """
</div>
</div>
</div>
<!-- 后三项目列表 -->
<div class="section">
<div class="section-header">
⚠️ 后三项目列表
</div>
<div class="section-content">
<div class="table-container">
"""
html_template += back_table_html
html_template += """
</div>
</div>
</div>
<!-- 前三项目列表 -->
<div class="section">
<div class="section-header">
🏆 前三项目列表
</div>
<div class="section-content">
<div class="table-container">
"""
html_template += top_table_html
html_template += """
</div>
</div>
</div>
<!-- 重点弱项匹配 -->
<div class="section">
<div class="section-header">
🔍 周调度重点弱项匹配
</div>
<div class="section-content">
<div class="table-container">
"""
html_template += match_table_html
html_template += """
</div>
</div>
</div>
<!-- 原始页面内容 -->
<div class="section">
<div class="section-header">
📋 原始页面内容(含XX高亮)
</div>
<div class="section-content">
<div class="original-content">
<h3>页面原始数据</h3>
"""
html_template += f"<p>{original_content}</p>"
html_template += """
</div>
</div>
</div>
</div>
<div class="footer">
<p>© 2025 中国移动通信集团江苏有限公司XX分公司 | 数据更新时间:{current_time}</p>
</div>
</div>
</body>
</html>
"""
return html_template
def print_styled_table(df: pd.DataFrame, title: str) -> str:
last = df.index[-1]
styler = (
df.style
.set_table_attributes('style="margin-left:auto;margin-right:auto;border-collapse:collapse;"')
.set_caption(f"<strong style='font-size:1.3em;color:#2E86C1'>{title}</strong>")
.set_table_styles([
{"selector":"thead th",
"props":[("background-color","#F8F9FA"),("font-weight","bold"),
("font-size","0.95em"),("border","1px solid #ddd"),("padding","10px")]},
{"selector":"tbody th", "props":[("border","1px solid #eee"),("padding","8px 10px")]}
])
.set_properties(**{"border":"1px solid #eee","padding":"8px 10px","text-align":"center","word-wrap":"break-word"})
.apply(lambda row: ["background-color:#f0f0f0" if row.name==last else "" for _ in row], axis=1)
)
return f"<div style='margin:15px 0;'>{styler.to_html()}</div>"
def enhance_table_border(table_html: str) -> str:
soup = BeautifulSoup(table_html, "html.parser")
table = soup.find("table")
if table:
table['border'] = "0"
style = table.get('style', '')
style += ';width:100%;border-collapse:collapse;margin:15px 0;'
table['style'] = style
for th in soup.find_all('th'):
th_style = th.get('style', '')
th_style += ';background:#f8f9fa;color:#2c3e50;padding:10px;text-align:center;font-weight:bold;font-size:0.95em;border:1px solid #ddd;'
th['style'] = th_style
for td in soup.find_all('td'):
td_style = td.get('style', '')
td_style += ';padding:8px 10px;text-align:center;border:1px solid #eee;font-size:0.9em;word-wrap: break-word;'
td['style'] = td_style
rows = soup.find_all('tr')
for i, row in enumerate(rows):
if i % 2 == 0:
row_style = row.get('style', '')
row_style += ';background-color:#fafafa;'
row['style'] = row_style
return str(soup)
# ========== 提取函数 ==========
def parse_main_table(soup: BeautifulSoup) -> pd.DataFrame:
table = soup.find("table", class_="displayTable")
if not table:
raise ValueError("未找到主表 displayTable")
df = pd.read_html(str(table), header=1)[0]
df.columns = [c.strip().replace("\n", "") for c in df.columns]
df = df[df["县区"].notnull() & (df["县区"] != "")]
for col in ["前三项目数", "后三项目数", "模拟得分", "落后时序项目数", "排名"]:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors="coerce")
if col == "模拟得分":
df[col] = df[col].round(2)
else:
df[col] = df[col].fillna(0).astype(int)
return df
def extract_lianshui(df: pd.DataFrame) -> dict:
sub = df[df["县区"]=="XX"]
if sub.empty:
raise ValueError("未找到XX数据")
front = int(sub["前三项目数"].values[0])
back = int(sub["后三项目数"].values[0])
score = float(sub["模拟得分"].values[0])
# 计算排名 (降序)
front_rank_df = df.sort_values("前三项目数", ascending=False).reset_index(drop=True)
front_rank = int(front_rank_df[front_rank_df["县区"] == "XX"].index[0] + 1)
# 后三项目数排名 (降序) - 项目数多的排名高
back_rank_df = df.sort_values("后三项目数", ascending=False).reset_index(drop=True)
back_rank = int(back_rank_df[back_rank_df["县区"] == "XX"].index[0] + 1)
# 模拟得分排名 (降序)
score_rank_df = df.sort_values("模拟得分", ascending=False).reset_index(drop=True)
score_rank = int(score_rank_df[score_rank_df["县区"] == "XX"].index[0] + 1)
return {
'front': front, 'back': back, 'score': score,
'front_rank': front_rank, 'back_rank': back_rank, 'score_rank': score_rank
}
def extract_metrics(soup: BeautifulSoup, color: str) -> dict:
result = {r: [] for r in REGIONS}
for d in soup.find_all("div", class_="title_content"):
txt = d.get_text(strip=True)
m = re.match(r'^\d+、(.+?)三季度', txt)
if not m: continue
metric = m.group(1).strip()
for f in d.find_all("font", attrs={"color": color}):
items = re.findall(r'([\u4e00-\u9fa5]+)\([^)]*\)', f.get_text())
for region in items:
if region in result:
result[region].append(metric)
return result
# ========== 主报表生成函数 ==========
def generate_report():
today, yesterday = get_dates()
url = f"http://10.33.222.52:31002/hamobile/table/mailReport?flag=jtReport5&cfg_id=207&day_time={yesterday}"
html = fetch_web_content(url)
soup = BeautifulSoup(html, "html.parser")
parts = []
desc_div = soup.find("div", class_="title_content")
title_html = str(desc_div) if desc_div else "<p>无描述信息</p>"
parts.append(title_html)
main_table = soup.find("table", class_="displayTable")
if not main_table:
raise ValueError("主表未找到")
df_main = parse_main_table(soup)
lianshui = extract_lianshui(df_main)
# 更新摘要文本,包含模拟得分和排名
summary = (f"XX前三项目数{lianshui['front']},排名{lianshui['front_rank']};"
f"后三项目数{lianshui['back']},排名{lianshui['back_rank']}。"
f"模拟得分{lianshui['score']:.2f},排名{lianshui['score_rank']}。")
main_table_html = enhance_table_border(str(main_table))
back = extract_metrics(soup, "red")
maxlen = max(len(v) for v in back.values())
df_back = pd.DataFrame({r: back[r]+['']*(maxlen-len(back[r])) for r in REGIONS})
df_back.loc[df_back.shape[0]] = {r: len(back[r]) for r in REGIONS}
df_back.index = [f"后三项目{i+1}" for i in range(df_back.shape[0]-1)] + ["后三项目个数"]
back_table_html = print_styled_table(df_back, "后三项目列表")
top = extract_metrics(soup, "blue")
maxlen2 = max(len(v) for v in top.values())
df_top = pd.DataFrame({r: top[r]+['']*(maxlen2-len(top[r])) for r in REGIONS})
df_top.loc[df_top.shape[0]] = {r: len(top[r]) for r in REGIONS}
df_top.index = [f"前三项目{i+1}" for i in range(df_top.shape[0]-1)] + ["前三项目个数"]
top_table_html = print_styled_table(df_top, "前三项目列表")
matched = {r: [m for m in back[r] if m in KEY_WEAK] for r in REGIONS}
maxlen3 = max(len(v) for v in matched.values())
df_match = pd.DataFrame({r: matched[r]+['']*(maxlen3-len(matched[r])) for r in REGIONS})
df_match.loc[df_match.shape[0]] = {r: len(matched[r]) for r in REGIONS}
df_match.index = [f"重点弱项{i+1}" for i in range(df_match.shape[0]-1)] + ["重点弱项个数"]
match_table_html = print_styled_table(df_match, "周调度重点弱项匹配")
highlighted = highlight_keyword(html, "XX")
report_html = generate_styled_html_report(
f"【抢先版】政企提效增产通报:{summary}",
summary,
main_table_html,
back_table_html,
top_table_html,
match_table_html,
highlighted
)
return f"【抢先版】政企提效增产通报:{summary}", report_html
# ========== 调度逻辑 ==========
def job_wrapper():
global last_sent_date
now = datetime.now()
today_str = now.strftime("%Y%m%d")
if now.hour >= 15 and last_sent_date != today_str:
try:
subject, body = generate_report()
send_email(subject, body)
last_sent_date = today_str
print(f"[{now}] 报告已发送")
except Exception as e:
print(f"[{now}] 生成失败:{e}")
def main():
global last_sent_date
# --- 手动执行部分 ---
try:
subject, body = generate_report()
send_email(subject, body)
if datetime.now().hour >= 15:
last_sent_date = datetime.now().strftime("%Y%m%d")
print("手动执行成功,已发送邮件。")
except Exception as e:
print("手动执行失败:", e)
# --- 调度部分 ---
schedule.every(5).minutes.do(job_wrapper)
print("调度任务已启动,按 Ctrl+C 退出...")
try:
while True:
schedule.run_pending()
time.sleep(1)
except KeyboardInterrupt:
print("\n程序已收到中断信号,正在退出...")
# --- 调度部分结束 ---
if __name__ == "__main__":
main()