|
|
二、用python读取excel,然后插入到数据库
- import pandas as pd
- import pymysql
- import numpy as np
- from datetime import datetime
- def insert_excel_to_mysql_complete():
- """
- 完整的Excel导入MySQL程序
- """
- # ========== 配置参数 ==========
- excel_path = 'd:\\210750.xlsx' # 修改为你的Excel文件路径
- host = 'localhost'
- user = 'root'
- password = '12345678' # 修改为你的密码
- database = 'banma' # 你的数据库名是banma
- table_name = 'shuju'
- port = 3306
- connection = None
- try:
- # ========== 1. 读取Excel ==========
- print("=" * 60)
- print("步骤1: 读取Excel文件")
- print("=" * 60)
- df = pd.read_excel(excel_path)
- print(f"✅ 读取成功: {len(df)} 行数据")
- print(f"列名: {df.columns.tolist()}")
- print(f"\n前3行原始数据:")
- print(df.head(3))
- # ========== 2. 数据清洗和转换 ==========
- print("\n" + "=" * 60)
- print("步骤2: 数据清洗和转换")
- print("=" * 60)
- # 处理日期列
- if 'riqi' in df.columns:
- print("处理日期列...")
- # 查看日期列原始数据
- print(f"日期列原始数据类型: {df['riqi'].dtype}")
- print(f"日期列前5个值:\n{df['riqi'].head()}")
- # 转换为日期格式
- df['riqi'] = pd.to_datetime(df['riqi'], errors='coerce')
- # 只保留日期部分
- df['riqi'] = df['riqi'].dt.date
- print(f"转换后日期列前5个值:\n{df['riqi'].head()}")
- # 处理其他文本字段(确保是字符串类型)
- text_fields = ['field1', 'field2', 'field3', 'field4']
- for field in text_fields:
- if field in df.columns:
- # 转换为字符串,NaN保持为NaN
- df[field] = df[field].astype(object)
- # 将NaN转换为None(MySQL的NULL)
- df = df.where(pd.notna(df), None)
- print(f"\n清洗后的前3行数据:")
- print(df.head(3))
- # ========== 3. 准备插入数据 ==========
- print("\n" + "=" * 60)
- print("步骤3: 准备插入数据")
- print("=" * 60)
- # 转换为列表格式
- data_list = df.values.tolist()
- print(f"准备插入 {len(data_list)} 行数据")
- print(f"第1行数据示例: {data_list[0] if data_list else '无数据'}")
- # 检查是否有空数据
- empty_count = sum(1 for row in data_list if all(v is None for v in row))
- if empty_count > 0:
- print(f"⚠️ 警告: 有 {empty_count} 行全为空数据")
- # ========== 4. 连接数据库并插入 ==========
- print("\n" + "=" * 60)
- print("步骤4: 连接数据库并插入数据")
- print("=" * 60)
- connection = pymysql.connect(
- host=host,
- user=user,
- password=password,
- database=database,
- port=port,
- charset='utf8mb4'
- )
- print("✅ 数据库连接成功")
- with connection.cursor() as cursor:
- # 检查表是否存在
- cursor.execute(f"SHOW TABLES LIKE '{table_name}'")
- if not cursor.fetchone():
- print(f"❌ 错误: 表 {table_name} 不存在")
- return
- # 查看插入前的记录数
- cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
- before_count = cursor.fetchone()[0]
- print(f"插入前表中有 {before_count} 条记录")
- # 构建INSERT语句
- columns = ['riqi', 'field1', 'field2', 'field3', 'field4']
- placeholders = ', '.join(['%s'] * len(columns))
- sql = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({placeholders})"
- print(f"SQL语句: {sql}")
- # 逐行插入(便于调试)
- success_count = 0
- error_count = 0
- for idx, row in enumerate(data_list):
- try:
- # 确保row有5个元素
- if len(row) != 5:
- print(f"⚠️ 第{idx + 1}行数据长度不正确: {len(row)},跳过")
- error_count += 1
- continue
- # 执行插入
- cursor.execute(sql, row)
- success_count += 1
- # 每100行提交一次
- if success_count % 100 == 0:
- connection.commit()
- print(f"已插入 {success_count}/{len(data_list)} 行")
- except Exception as e:
- print(f"❌ 第{idx + 1}行插入失败: {e}")
- print(f" 数据: {row}")
- error_count += 1
- continue
- # 提交剩余数据
- connection.commit()
- print(f"\n✅ 成功插入 {success_count} 行")
- if error_count > 0:
- print(f"❌ 失败 {error_count} 行")
- # 验证插入结果
- cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
- after_count = cursor.fetchone()[0]
- print(f"\n插入后表中有 {after_count} 条记录")
- print(f"实际增加: {after_count - before_count} 条")
- # 显示刚插入的数据
- if after_count > before_count:
- cursor.execute(f"SELECT * FROM {table_name} LIMIT 5")
- sample_data = cursor.fetchall()
- print(f"\n表中前5条数据:")
- for row in sample_data:
- print(f" {row}")
- except FileNotFoundError:
- print(f"❌ 错误: 找不到文件 {excel_path}")
- print("请确认文件路径是否正确")
- except pymysql.Error as e:
- print(f"❌ 数据库错误: {e}")
- if connection:
- connection.rollback()
- except Exception as e:
- print(f"❌ 其他错误: {e}")
- import traceback
- traceback.print_exc()
- finally:
- if connection:
- connection.close()
- print("\n数据库连接已关闭")
- print("=" * 60)
- # ========== 运行程序 ==========
- if __name__ == "__main__":
- insert_excel_to_mysql_complete()
复制代码
|
|