首先配置环境变量nano ~/.bashrc,添加下面这几行
export HADOOP_HOME=/usr/local/hadoop
export PATH=$HADOOP_HOME/bin:$PATH
export HADOOP_CLASSPATH=$HADOOP_HOME/etc/hadoop:$HADOOP_HOME/share/hadoop/common/:$HADOOP_HOME/share/hadoop/mapreduce/
export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native
source ~/.bashrc
例如服务器运行如下api
from flask import Flask, request, jsonify
import pandas as pd
from pydoop.hdfs import hdfs
import os
app = Flask(__name__)
@app.route('/upload_csv', methods=['POST'])
def upload_csv():
# 从请求中获取数据
data = request.json
local_file_path = data['local_file_path'] # 本地文件路径
hdfs_path = data['hdfs_path'] # HDFS 路径
try:
# 检查文件是否存在
if not os.path.exists(local_file_path):
return jsonify({"error": "本地文件不存在"}), 400
df = pd.read_csv(local_file_path)
df.to_csv(hdfs_path, index=False)
return jsonify({"message": "文件已成功上传到 HDFS", "hdfs_path": hdfs_path}), 200
except Exception as e:
return jsonify({"error": str(e)}), 400
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)
输入如下命令hadoop fs -ls /user/hadoop/查看hdfs文件系统
root@hadoop01:~# hadoop fs -ls /user/hadoop/
Found 2 items
-rw-r--r-- 3 root supergroup 32 2024-11-02 13:36 /user/hadoop/1.csv
-rw-r--r-- 3 root supergroup 32 2024-11-02 13:39 /user/hadoop/1.xlsx
root@hadoop01:~#