代码如下
from pyhdfs import HdfsClient client = HdfsClient(hosts='192.168.1.1:50070') # TypeError: cannot use a string pattern on a bytes-like object #从hdfs中读取文件 file = client.open("/data/movielens/train/ra.train") #获取内容 content = file.read() #open后,file是二进制,str()转换为字符串并转码 s = str(content, "utf-8") #打开本地文件.csv 并写入内容 file = open("/home/data/data.csv", "w") file.write(s) #pandas读取本地csv文件 train_data = pd.read_csv("/home/data/data.csv", sep=self.sep, header=None, usecols=[0, 1, 2], names=['user', 'item', 'rating'])