利用pydoop将mysql导入hdfs

首先配置环境变量nano ~/.bashrc,添加下面这几行

export HADOOP_HOME=/usr/local/hadoop
export PATH=$HADOOP_HOME/bin:$PATH
export HADOOP_CLASSPATH=$HADOOP_HOME/etc/hadoop:$HADOOP_HOME/share/hadoop/common/:$HADOOP_HOME/share/hadoop/mapreduce/
export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native

source ~/.bashrc

例如服务器运行如下api

from flask import Flask, request, jsonify
import pandas as pd
from pydoop.hdfs import hdfs
import os

app = Flask(__name__)

@app.route('/upload_csv', methods=['POST'])
def upload_csv():
    # 从请求中获取数据
    data = request.json
    local_file_path = data['local_file_path']  # 本地文件路径
    hdfs_path = data['hdfs_path']  # HDFS 路径

    try:
        # 检查文件是否存在
        if not os.path.exists(local_file_path):
            return jsonify({"error": "本地文件不存在"}), 400
        df = pd.read_csv(local_file_path)
        df.to_csv(hdfs_path, index=False)
        
        return jsonify({"message": "文件已成功上传到 HDFS", "hdfs_path": hdfs_path}), 200

    except Exception as e:
        return jsonify({"error": str(e)}), 400

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)

输入如下命令hadoop fs -ls /user/hadoop/查看hdfs文件系统

root@hadoop01:~# hadoop fs -ls /user/hadoop/
Found 2 items
-rw-r--r-- 3 root supergroup 32 2024-11-02 13:36 /user/hadoop/1.csv
-rw-r--r-- 3 root supergroup 32 2024-11-02 13:39 /user/hadoop/1.xlsx
root@hadoop01:~#
暂无评论

发送评论 编辑评论


				
|´・ω・)ノ
ヾ(≧∇≦*)ゝ
(☆ω☆)
(╯‵□′)╯︵┴─┴
 ̄﹃ ̄
(/ω\)
∠( ᐛ 」∠)_
(๑•̀ㅁ•́ฅ)
→_→
୧(๑•̀⌄•́๑)૭
٩(ˊᗜˋ*)و
(ノ°ο°)ノ
(´இ皿இ`)
⌇●﹏●⌇
(ฅ´ω`ฅ)
(╯°A°)╯︵○○○
φ( ̄∇ ̄o)
ヾ(´・ ・`。)ノ"
( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃
(ó﹏ò。)
Σ(っ °Д °;)っ
( ,,´・ω・)ノ"(´っω・`。)
╮(╯▽╰)╭
o(*////▽////*)q
>﹏<
( ๑´•ω•) "(ㆆᴗㆆ)
😂
😀
😅
😊
🙂
🙃
😌
😍
😘
😜
😝
😏
😒
🙄
😳
😡
😔
😫
😱
😭
💩
👻
🙌
🖕
👍
👫
👬
👭
🌚
🌝
🙈
💊
😶
🙏
🍦
🍉
😣
Source: github.com/k4yt3x/flowerhd
颜文字
Emoji
小恐龙
花!
上一篇
下一篇