MapReduce报错：「MKDirs failed to create file」

2023-05-16

MapReduce报错：「MKDirs failed to create file」

文章目录

MapReduce报错：「MKDirs failed to create file」
- 0. 写在前面
- 1. 程序代码及报错信息
- - 输入、输出路径
  - 程序代码
  - 报错信息
- 2. 查找资料
- 3. 原因分析
- 4. 参考

在这里插入图片描述

0. 写在前面

Linux：Ubuntu Kylin16.04
Hadoop：Hadoop2.7.2

1. 程序代码及报错信息

输入、输出路径

zhangsan@hadoop01:/$ ll | grep input
drwxr-xr-x   3 zhangsan  zhangsan   4096 9月  20 03:35 input/
zhangsan@hadoop01:/$ ll | grep output
zhangsan@hadoop01:/$

程序代码

package com.mr.ch07.maxmin;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class MinMaxValueDemo7_2_1 {

	public static class MinMaxMapper extends Mapper<Object, Text, Text, MinMaxWritable> {

		private MinMaxWritable outTuple = new MinMaxWritable();

		@Override
		public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
			String[] strs = value.toString().split(" ");
			String strDate = strs[0];// 定义记录日期的字符串变量strDate

			if (strDate == null) {
				return;// 如果该日期值为空，则返回
			}

			System.out.println(strs[0] + ",,,," + strs[1]);

			// 将值即做为最大值又做为最小值存储到自定义Writable类MinMaxWritable中。
			outTuple.setMin(Integer.parseInt(strs[1]));
			outTuple.setMax(Integer.parseInt(strs[1]));
			
			// 将结果写入上下文
			context.write(new Text(strDate), outTuple);
		}

	}



	public static class MinMaxReducer extends Reducer<Text, MinMaxWritable, Text, MinMaxWritable> {

		private MinMaxWritable result = new MinMaxWritable();


		@Override
		public void reduce(Text key, Iterable<MinMaxWritable> values, Context context) 
			throws IOException, InterruptedException {
			
			result.setMax(0);
			result.setMin(0);

			// 按key迭代输出value的值
			for (MinMaxWritable val : values) {
				// 最小值放于结果集中
				if (result.getMin() == 0 || val.getMin() < result.getMin()) {
					result.setMin(val.getMin());
				}
				// 最大值放于结果集中
				if (result.getMax() == 0 || val.getMax() > result.getMax()) {
					System.out.println("val.getMax(): " + val.getMax() + ",,,,"
							+ result.getMax());
					result.setMax(val.getMax());
				}
			}
			context.write(key, result);
		}

	}

	public static void main(String[] args) throws Exception {

		Configuration conf = new Configuration();
		// String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
		String[] otherArgs = new String[] { "/input/ch07/minmax", "/output/ch07/minmax" };
		
		if (otherArgs.length != 2) {
			System.err.println("Usage: MinMaxCountDriver <in> <out>");
			System.exit(2);
		}

		// Job job = new Job(conf, "StackOverflow Comment Date Min Max Count");
		Job job = Job.getInstance(conf);
		job.setJarByClass(MinMaxValueDemo7_2_1.class);
		job.setMapperClass(MinMaxMapper.class);
		job.setCombinerClass(MinMaxReducer.class);
		job.setReducerClass(MinMaxReducer.class);


		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(MinMaxWritable.class);


		FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
		FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));


		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}




	public static class MinMaxWritable implements Writable {
		private int min;// 记录最小值
		private int max;// 记录最大值

		public int getMin() {
			return min;
		}

		public void setMin(int min) {
			this.min = min;
		}

		public int getMax() {
			return max;
		}

		public void setMax(int max) {
			this.max = max;
		}



		@Override
		public void readFields(DataInput in) throws IOException {
			min = in.readInt();
			max = in.readInt();
		}



		@Override
		public void write(DataOutput out) throws IOException {
			out.writeInt(max);
				out.writeInt(min);
			}



		@Override
		public String toString() {
			return max + "\t" + min;
		}

	}

}

报错信息

java.lang.Exception: java.io.IOException: Mkdirs failed to create file:/output/ch07/minmax/_temporary/0/_temporary/attempt_local391816241_0001_r_000000_0 (exists=false, cwd=file:/home/zhangsan/Java_Eclipse/eclipse-workspace/MapReduce)
	at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462)
	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:529)
Caused by: java.io.IOException: Mkdirs failed to create file:/output/ch07/minmax/_temporary/0/_temporary/attempt_local391816241_0001_r_000000_0 (exists=false, cwd=file:/home/zhangsan/Java_Eclipse/eclipse-workspace/MapReduce)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:449)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:435)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:909)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:890)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:787)
	at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:132)
	at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.<init>(ReduceTask.java:540)
	at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:614)
	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389)
	at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:319)
	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
2022-09-21 20:42:24,059 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1360)) - Job job_local391816241_0001 running in uber mode : false
2022-09-21 20:42:24,062 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1367)) -  map 100% reduce 0%
2022-09-21 20:42:24,064 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1380)) - Job job_local391816241_0001 failed with state FAILED due to: NA
2022-09-21 20:42:24,080 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1385)) - Counters: 30
	File System Counters
		FILE: Number of bytes read=247
		FILE: Number of bytes written=269461
		FILE: Number of read operations=0
		FILE: Number of large read operations=0
		FILE: Number of write operations=0
	Map-Reduce Framework
		Map input records=8
		Map output records=8
		Map output bytes=128
		Map output materialized bytes=60
		Input split bytes=97
		Combine input records=8
		Combine output records=3
		Reduce input groups=0
		Reduce shuffle bytes=60
		Reduce input records=0
		Reduce output records=0
		Spilled Records=3
		Shuffled Maps =1
		Failed Shuffles=0
		Merged Map outputs=1
		GC time elapsed (ms)=0
		Total committed heap usage (bytes)=193986560
	Shuffle Errors
		BAD_ID=0
		CONNECTION=0
		IO_ERROR=0
		WRONG_LENGTH=0
		WRONG_MAP=0
		WRONG_REDUCE=0
	File Input Format Counters 
		Bytes Read=96
	File Output Format Counters 
		Bytes Written=0

2. 查找资料

网上有一个帖子提到了这个报错信息

链接

https://community.cloudera.com/t5/Support-Questions/MKDirs-failed-to-create-file/td-p/35041

根据@snm1523的回答，我尝试将mapred-site.xml添加如下内容

<property>
    <name>mapreduce.jobtracker.address</name>
    <value>localhost:9101</value>
</property>

3. 原因分析

输出路径不能create，那就先创建输出路径

zhangsan@hadoop01:/$ sudo mkdir /output
[sudo] zhangsan 的密码： 
zhangsan@hadoop01:/$ ll | grep output
drwxr-xr-x   2 root root  4096 9月  21 20:43 output/

依旧跟前面一样的报错信息

创建了路径还是报错，那应该是涉及到权限问题

修改/output目录权限为当前用户「即hadoop安装目录所在的所有者」

zhangsan@hadoop01:/$ sudo chown -R zhangsan:zhangsan output/
zhangsan@hadoop01:/$ ll | grep output
drwxr-xr-x   2 zhangsan  zhangsan   4096 9月  21 20:43 output/

重新执行MR程序

运行成功

2022-09-21 20:44:53,945 INFO  [Thread-15] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - reduce task executor complete.
2022-09-21 20:44:54,597 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1360)) - Job job_local2141955672_0001 running in uber mode : false
2022-09-21 20:44:54,600 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1367)) -  map 100% reduce 100%
2022-09-21 20:44:54,602 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1378)) - Job job_local2141955672_0001 completed successfully
2022-09-21 20:44:54,615 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1385)) - Counters: 30
	File System Counters
		FILE: Number of bytes read=646
		FILE: Number of bytes written=541974
		FILE: Number of read operations=0
		FILE: Number of large read operations=0
		FILE: Number of write operations=0
	Map-Reduce Framework
		Map input records=8
		Map output records=8
		Map output bytes=128
		Map output materialized bytes=60
		Input split bytes=97
		Combine input records=8
		Combine output records=3
		Reduce input groups=3
		Reduce shuffle bytes=60
		Reduce input records=3
		Reduce output records=3
		Spilled Records=6
		Shuffled Maps =1
		Failed Shuffles=0
		Merged Map outputs=1
		GC time elapsed (ms)=0
		Total committed heap usage (bytes)=387973120
	Shuffle Errors
		BAD_ID=0
		CONNECTION=0
		IO_ERROR=0
		WRONG_LENGTH=0
		WRONG_MAP=0
		WRONG_REDUCE=0
	File Input Format Counters 
		Bytes Read=96
	File Output Format Counters 
		Bytes Written=60

查看结果

zhangsan@hadoop01:/output/ch07/minmax$ ll
总用量 20
drwxrwxr-x 2 zhangsan zhangsan 4096 9月  21 20:44 ./
drwxrwxr-x 3 zhangsan zhangsan 4096 9月  21 20:44 ../
-rw-r--r-- 1 zhangsan zhangsan   48 9月  21 20:44 part-r-00000
-rw-r--r-- 1 zhangsan zhangsan   12 9月  21 20:44 .part-r-00000.crc
-rw-r--r-- 1 zhangsan zhangsan    0 9月  21 20:44 _SUCCESS
-rw-r--r-- 1 zhangsan zhangsan    8 9月  21 20:44 ._SUCCESS.crc

4. 参考

https://community.cloudera.com/t5/Support-Questions/MKDirs-failed-to-create-file/td-p/35041

顺利结束

本文内容由网友自发贡献，版权归原作者所有，本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容，请联系:hwhale#tublm.com(使用前将#替换为@)

MapReduce

mkdirs

Failed

create

file

MapReduce报错：「MKDirs failed to create file」的相关文章

如何从命令行运行scala文件？

scala是否支持scala run xxx scala go语言支持这样运行 go my go 并且Python支持 python my py 但看来 scala xxx scala 仅进行语法检查未观察到任何输出或运行行为那么有没有
从 Eclipse 在 AWS-EMR 上运行 MapReduce 作业

我在 Eclipse 中有 WordCount MapReduce 示例我将其导出到 Jar 然后将其复制到 S3 然后我在 AWS EMR 上运行它成功地然后我读到了这篇文章 http docs aws amazon com El
遍历 ArrayWritable - NoSuchMethodException

我刚刚开始使用 MapReduce 并且遇到了一个奇怪的错误我无法通过 Google 回答该错误我正在使用 ArrayWritable 制作一个基本程序但是当我运行它时在Reduce过程中出现以下错误 java lang Runti
如何使用 Amazon 的 EMR 在 CLI 中使用自定义 jar 指定 mapred 配置和 java 选项？

我想知道如何指定mapreduce配置例如mapred task timeout mapred min split size等等当使用自定义 jar 运行流作业时当我们使用 ruby 或 python 等外部脚本语言运行时我们可以使
找不到文件异常..但它就在那里

嘿这将是那些愚蠢的问题之一我试图在本地系统上获取一个文件但我不断收到FileNotFoundException thrown 请有人纠正我 if File Exists C logs hw healthways prod 2009 0
用 C 更快地读取文件

嗯我想知道是否有一种比使用 fscanf 更快地读取文件的方法例如假设我有这个文本 4 55 k 52 o 24 l 523 i 首先我想读取第一个数字它给出了接下来的行数令这个数称为N N 之后我想读取 N 行其中有一个整数
如何在 Windows 7 中使用 Python 廉价地创建非常大的文件？ [复制]

这个问题在这里已经有答案了可能的重复在Windows系统上快速创建大文件 https stackoverflow com questions 982659 quickly create large file on a windows s
fputc() 之后 c fgetc() 中的文件处理问题

我有一个带有文件名的文本文件in txt 其中包含以下内容 1111 1100 0000 我正在尝试使用以下程序更改此文件的内容 include
在 C# 中打开文本文件

我正在编写一个 Windows 窗体应用程序其中在程序运行时记录一些数据并使用以下代码将其写入临时文件中 string path Path GetTempFileName byte text new UTF8Encoding true
PHP将数据写入文件中间而不重写文件的最佳方法是什么

我正在 php 1GB 中处理大型文本文件我正在使用 file get contents file txt NULL NULL 100000000 100 要从文件中间获取数据但如果我想将文件中的数据更改为与原始数据不同的更改我将不得
动态创建和下载Doc文件

因此我尝试动态创建 doc 文件并让用户在单击按钮时下载该文件这些是我找到的用于下载文件的标头 header Content Description File Transfer header Content Type applicati
PHP：是否可以从文件内容（字符串）创建 SplFileObject 对象？

例如 contents file get contents image png 是否可以从 contents 创建 SplFileObject 对象 Thanks php 有一些特殊的流包装器 http www php net manual
如果文件为空，如何跳过文件行

python 3中的程序这是我的第一个涉及文件的程序我需要忽略注释行以开头和空行然后拆分这些行以便它们可迭代但我不断收到 IndexError 消息指出字符串索引超出范围并且程序在空行处崩溃 import os path
PHP 文件上传帮助

div align center div 这是我的代码
如何根据扩展名获取文件类型信息？（不是 MIME）在 c# 中

如何获取基于扩展名的一般文件类型描述如资源管理器所以不是 MIME 而是最终用户看到的信息 doc Microsoft Office Word 97 2003 文档 zip ZIP 文件 avi 视频文件我怎样才能获得似乎可用的辅助
监听文件夹和文件（更改）

可以直接在 PHP 或 Node 上监听文件夹和文件的更改通过事件还是我需要创建自己的方法来执行此操作 Example 我需要听文件夹 user 如果我将一些文件添加到该目录中 PHP 或 Node 会收到信息并运行PathEvent
Sqoop - 绑定到 YARN 队列

因此使用 MapReduce v2 您可以使用绑定到某些 YARN 队列来管理资源和优先级基本上通过使用 hadoop jar xyz jar D mapreduce job queuename QUEUE1 input output
Python 读取未格式化的直接访问 Fortran 90 给出不正确的输出

这是数据的写入方式它是一个二维浮点矩阵我不确定大小 open unit 51 file rmsd nn output form unformatted access direct status replace recl Npoints
获取给定类文件的目录路径

我遇到的代码尝试从类本身的 class 文件所在的同一目录中读取一些配置文件 File configFiles new File this getClass getResource getPath listFiles new Filenam
使用Git记录文件复制操作

当我使用 git mv 在 git 中移动文件时状态显示该文件已被重命名即使我更改了某些部分它仍然被认为几乎是相同的东西这很好因为它让我可以跟踪它的历史记录当我复制文件时原始文件有一些历史记录我想将其与新副本关联起来我尝试

随机推荐

Intellij Idea配置Tomcat并创建JavaWeb项目

Intellij IDEA配置Tomcat并创建JavaWeb项目文章目录 Intellij IDEA配置Tomcat并创建JavaWeb项目一实验环境二说明三 Web项目搭建1 创建一个空项目2 点击next3 设置空项目名称4 设
我的2013这一年 -- 唯一关键词变化

又一年 xff0c 又是一个年终总结 xff0c 2013的关键词就一个变化 xff0c 貌似去年是三个起初没怎么注意已到了2013年未 xff0c 圣诞前后 xff0c CSDN上开始扎堆出现年终总结的帖子或博客文章 xff0c 还有
大数据技术基础综合项目——牛客网招聘岗位统计分析

大数据技术基础综合项目牛客网招聘岗位统计分析文章目录大数据技术基础综合项目牛客网招聘岗位统计分析零实验环境说明一数据集来源及说明二数据预处理阶段 2 1 删除空行的数据 2 2 区分实习与非实习岗位 2 3 岗位预处理
Structured Streaming报错记录：Overloaded method foreachBatch with alternatives

Structured Streaming报错记录 xff1a Overloaded method foreachBatch with alternatives 文章目录 Structured Streaming报错记录 xff1a Over
Windows10安装netcat

文章目录 Windows10安装netcat下载地址安装操作不配置环境变量直接使用netcat Windows10安装netcat 下载地址 netcat win32 1 12 https eternallybored org misc n
DataGrip连接Hive执行DDL操作报错：「FAILED: ParseException line 1:5 cannot recognize input near ‘show‘ ‘indexe

DataGrip连接Hive执行DDL操作报错 xff1a FAILED ParseException line 1 5 cannot recognize input near show indexes on in ddl statemen
Superset执行「superset db upgrade」命令的报错记录

Superset执行 superset db upgrade 命令的报错记录文章目录 Superset执行 superset db upgrade 命令的报错记录零写在前面一 superset db upgrade 报错记录0 按照尚硅
Kylin启动失败||启动成功但Web界面显示404

Kylin启动失败启动成功但Web界面显示404 文章目录 Kylin启动失败启动成功但Web界面显示404 0 写在前面 1 原因一环境变量配置问题 2 原因二 Kylin兼容性问题没有得到解决 3 原因三 hive site xm
关于Presto对lzo压缩的表查询使用记录

关于Presto对lzo压缩的表查询使用记录文章目录关于Presto对lzo压缩的表查询使用记录 0 写在前面 1 正文 0 提前说明 1 查询ads层表 2 查询dwd dws dwt层表 3 查询ods层表 0 写在前面实验背景
Linux重新执行某个历史命令

Linux重新执行某个历史命令文章目录 Linux重新执行某个历史命令0 前置芝士1 Linux重新执行某个历史命令 0 前置芝士 Linux执行过的命令存储在家目录下的 bash history文件中 1 Linux重新执行某个历史命令
Ubuntu安装Anaconda及注意事项

虚拟机Ubuntu安装Anaconda及注意事项文章目录虚拟机Ubuntu安装Anaconda及注意事项0 写在前面1 安装步骤0 下载 Anaconda1 安装 Anaconda2 确认安装的路径3 确认conda命令是否可以正常使用
MapReduce关于类型转换报错记录

MapReduce关于类型转换报错记录文章目录 MapReduce关于类型转换报错记录0 写在前面1 程序代码2 参考 0 写在前面实验环境 xff1a Ubuntu Kylin16 04Hadoop版本 xff1a 2 7 2IDE
android 信息(mms)的故事（二） -- 存储

关于android存储的有三种方式 xff1a SharePrference 文件系统和数据库 xff0c 这些信息里都用到了 xff0c 数据库provider存储短信和彩信的基本信息 xff0c SharePrference存储关于信
使用Python3操作HBase的两种方法

使用Python3操作HBase 文章目录使用Python3操作HBase0 写在前面1 安装conda2 安装hbase thrift 0 20 0 patch新建一个Python3 9的anaconda环境激活新建的anaconda环
执行MapReduce报错：无法分配内存 (errno=12)

执行MapReduce报错 xff1a 无法分配内存 errno 61 12 文章目录执行MapReduce报错 xff1a 无法分配内存 errno 61 12 0 写在前面1 程序介绍2 报错解决3 参考 0 写在前面 Linux U
离线数仓之Kerberos基本使用及问题记录

离线数仓之Kerberos基本使用及问题记录文章目录离线数仓之Kerberos基本使用及问题记录0 写在前面1 Kerberos基本使用0 启动Kerberos相关服务1 安全模式下启动Hadoop集群 2 安装Kerberos客户端访
jps查看进程出现「xxxx -- process information unavailable」

jps查看进程出现 xxxx process information unavailable 文章目录 jps查看进程出现 xxxx process information unavailable 0 写在前面1 报错2 参考 0 写在前面
CentOS7.X时间调整为系统时间之后，重新开机就无效了

CentOS7 X时间调整为系统时间之后 xff0c 重新开机就无效了文章目录 CentOS7 X时间调整为系统时间之后 xff0c 重新开机就无效了0 原因分析1 时间修改2 参考 0 原因分析系统时区非上海没有同步网络时间 1 时
MongoDB的「Linux」安装及基本使用

MongoDB的 Linux 安装及基本使用文章目录 MongoDB的 Linux 安装及基本使用0 写在前面1 下载并安装MongoDB2 启动方式2 1 直接启动2 2 以配置文件方式启动2 2 1 使用默认配置文件2 2 2 自
MapReduce报错：「MKDirs failed to create file」

MapReduce报错 xff1a MKDirs failed to create file 文章目录 MapReduce报错 xff1a MKDirs failed to create file 0 写在前面1 程序代码及报错信息输入输

MapReduce报错：「MKDirs failed to create file」

MapReduce报错：「MKDirs failed to create file」

文章目录

0. 写在前面

1. 程序代码及报错信息

输入、输出路径

程序代码

报错信息

2. 查找资料

3. 原因分析

4. 参考

MapReduce报错：「MKDirs failed to create file」 的相关文章

随机推荐

热门标签

MapReduce报错：「MKDirs failed to create file」的相关文章