# 1、获取mysql的信息
import pymysql
coon=pymysql.connect(host='192.168.88.100',user='root',password='123456',database='insurance')
cursor = coon.cursor()
sql_str = 'show tables'
cursor.execute(sql_str)
data  = cursor.fetchall()
from pyspark.sql import SparkSession
ss = SparkSession.builder. \
    config('spark.sql.warehouse.dir', 'hdfs://node1:8020/user/hive/warehouse'). \
    config('hive.metastore.uris', 'thrift://node1:9083'). \
    enableHiveSupport(). \
    getOrCreate()
# 读取mysql下的表数据
for i in data:
    df=ss.read.jdbc(url='jdbc:mysql://node1:3306/insurance?characterEncoding=UTF-8', table=i[0],
             properties={'user': 'root', 'password': '123456', 'driver': 'com.mysql.jdbc.Driver'})
    # 使用df创建表
    df.createTempView(i[0])
    # 使用sql创建表读取df数据生成新的表
    ss.sql(f'create table insurance_b.{i[0]} as select * from {i[0]} ')
cursor.close()
coon.close()
Logo

腾讯云面向开发者汇聚海量精品云计算使用和开发经验,营造开放的云计算技术生态圈。

更多推荐