|
|
加上时间段的
- import csv
- from collections import defaultdict
- from datetime import datetime
- def count_names_with_date_filter(csv_file, date_column, start_date=None, end_date=None, encoding='gbk'):
- """
- 统计姓名出现次数,并按日期范围筛选
-
- 参数:
- csv_file: CSV文件路径
- date_column: 日期列的列名
- start_date: 开始日期(字符串,格式'YYYY-MM-DD')
- end_date: 结束日期(字符串,格式'YYYY-MM-DD')
- encoding: 文件编码
-
- 返回:
- 姓名计数字典
- """
- name_counts = defaultdict(int)
-
- # 转换日期字符串为datetime对象
- start_dt = datetime.strptime(start_date, '%Y-%m-%d') if start_date else None
- end_dt = datetime.strptime(end_date, '%Y-%m-%d') if end_date else None
-
- try:
- with open(csv_file, mode='r', encoding=encoding) as file:
- reader = csv.DictReader(file)
-
- # 检查必要的列是否存在
- required_columns = ['人员', date_column]
- missing_columns = [col for col in required_columns if col not in reader.fieldnames]
-
- if missing_columns:
- print(f"错误:CSV文件中缺少必要的列: {missing_columns}")
- print(f"可用的列有:{reader.fieldnames}")
- return None
-
- for row in reader:
- # 处理日期
- date_str = row[date_column].strip()
- if not date_str:
- continue
-
- try:
- row_date = datetime.strptime(date_str, '%Y-%m-%d')
- except ValueError:
- # 尝试其他常见日期格式
- for fmt in ('%Y/%m/%d', '%Y年%m月%d日', '%Y.%m.%d'):
- try:
- row_date = datetime.strptime(date_str, fmt)
- break
- except ValueError:
- continue
- else:
- # 所有格式都解析失败,跳过这行
- continue
-
- # 日期范围筛选
- if start_dt and row_date < start_dt:
- continue
- if end_dt and row_date > end_dt:
- continue
-
- # 处理姓名
- names_str = row['人员'].strip()
- if not names_str:
- continue
-
- # 使用顿号分隔姓名
- names = [name.strip() for name in names_str.replace('、', '、').split('、') if name.strip()]
-
- for name in names:
- name_counts[name] += 1
-
- except UnicodeDecodeError:
- # 如果当前编码失败,尝试utf-8-sig
- print(f"使用{encoding}编码失败,尝试utf-8-sig...")
- return count_names_with_date_filter(csv_file, date_column, start_date, end_date, 'utf-8-sig')
-
- return dict(name_counts)
- # 使用示例
- csv_path = 'your_file.csv' # 替换为你的CSV文件路径
- date_column = '日期' # 替换为你的日期列的列名
- # 设置日期范围(可选)
- start_date = '2023-01-01' # 格式为'YYYY-MM-DD'
- end_date = '2023-12-31' # 格式为'YYYY-MM-DD'
- result = count_names_with_date_filter(
- csv_path,
- date_column,
- start_date=start_date,
- end_date=end_date
- )
- if result:
- sorted_counts = sorted(result.items(), key=lambda x: x[1], reverse=True)
-
- print(f"姓名统计结果 (日期范围: {start_date or '最早'} 至 {end_date or '最新'}):")
- print("------------------")
- print("{:<15} {:<10}".format("姓名", "出现次数"))
- for name, count in sorted_counts:
- print("{:<15} {:<10}".format(name, count))
-
- # 可选:保存结果到CSV
- # with open('name_counts_result.csv', 'w', encoding='utf-8-sig', newline='') as f:
- # writer = csv.writer(f)
- # writer.writerow(['姓名', '出现次数'])
- # writer.writerows(sorted_counts)
复制代码
|
|