我正在我的机器上实现 Hadoop 单节点集群,方法如下迈克尔·诺尔的教程 http://www.michael-noll.com/wiki/Running_Hadoop_On_Ubuntu_Linux_%28Single-Node_Cluster%29并遇到数据复制错误:
这是完整的错误消息:
> hadoop@laptop:~/hadoop$ bin/hadoop dfs -copyFromLocal
> tmp/testfiles testfiles
>
> 12/05/04 16:18:41 WARN hdfs.DFSClient: DataStreamer Exception:
> org.apache.hadoop.ipc.RemoteException: java.io.IOException: File
> /user/hadoop/testfiles/testfiles/file1.txt could only be replicated to
> 0 nodes, instead of 1 at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:1271)
> at
> org.apache.hadoop.hdfs.server.namenode.NameNode.addBlock(NameNode.java:422)
> at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source) at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
> at java.lang.reflect.Method.invoke(Method.java:597) at
> org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508) at
> org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:959) at
> org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:955) at
> java.security.AccessController.doPrivileged(Native Method) at
> javax.security.auth.Subject.doAs(Subject.java:396) at
> org.apache.hadoop.ipc.Server$Handler.run(Server.java:953)
>
> at org.apache.hadoop.ipc.Client.call(Client.java:740) at
> org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220) at
> $Proxy0.addBlock(Unknown Source) at
> sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
> at java.lang.reflect.Method.invoke(Method.java:597) at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
> at $Proxy0.addBlock(Unknown Source) at
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.locateFollowingBlock(DFSClient.java:2937)
> at
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2819)
> at
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102)
> at
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288)
>
> 12/05/04 16:18:41 WARN hdfs.DFSClient: Error Recovery for block null
> bad datanode[0] nodes == null 12/05/04 16:18:41 WARN hdfs.DFSClient:
> Could not get block locations. Source file
> "/user/hadoop/testfiles/testfiles/file1.txt" - Aborting...
> copyFromLocal: java.io.IOException: File
> /user/hadoop/testfiles/testfiles/file1.txt could only be replicated to
> 0 nodes, instead of 1 12/05/04 16:18:41 ERROR hdfs.DFSClient:
> Exception closing file /user/hadoop/testfiles/testfiles/file1.txt :
> org.apache.hadoop.ipc.RemoteException: java.io.IOException: File
> /user/hadoop/testfiles/testfiles/file1.txt could only be replicated to
> 0 nodes, instead of 1 at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:1271)
> at
> org.apache.hadoop.hdfs.server.namenode.NameNode.addBlock(NameNode.java:422)
> at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source) at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
> at java.lang.reflect.Method.invoke(Method.java:597) at
> org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508) at
> org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:959) at
> org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:955) at
> java.security.AccessController.doPrivileged(Native Method) at
> javax.security.auth.Subject.doAs(Subject.java:396) at
> org.apache.hadoop.ipc.Server$Handler.run(Server.java:953)
>
> org.apache.hadoop.ipc.RemoteException: java.io.IOException: File
> /user/hadoop/testfiles/testfiles/file1.txt could only be replicated to
> 0 nodes, instead of 1 at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:1271)
> at
> org.apache.hadoop.hdfs.server.namenode.NameNode.addBlock(NameNode.java:422)
> at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source) at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
> at java.lang.reflect.Method.invoke(Method.java:597) at
> org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508) at
> org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:959) at
> org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:955) at
> java.security.AccessController.doPrivileged(Native Method) at
> javax.security.auth.Subject.doAs(Subject.java:396) at
> org.apache.hadoop.ipc.Server$Handler.run(Server.java:953)
>
> at org.apache.hadoop.ipc.Client.call(Client.java:740) at
> org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220) at
> $Proxy0.addBlock(Unknown Source) at
> sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
> at java.lang.reflect.Method.invoke(Method.java:597) at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
> at $Proxy0.addBlock(Unknown Source) at
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.locateFollowingBlock(DFSClient.java:2937)
> at
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2819)
> at
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102)
> at
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288)
另外当我执行时:
bin/stop-all.sh
它表示 datanode 尚未启动,因此无法停止。虽然,输出jps说数据节点存在。
I tried 格式化名称节点, 更改所有者权限,但似乎不起作用。希望我没有错过任何其他相关信息。
提前致谢。
对我有用的解决方案是逐一运行名称节点和数据节点,而不是一起使用bin/start-all.sh
。使用这种方法会发生的情况是,如果您在网络上设置数据节点时遇到一些问题,并且 stackoverflow 上的许多帖子表明 namenode 需要一些时间来启动,则错误是清晰可见的,因此,应该给它一些时间在启动数据节点之前启动。另外,在这种情况下,我遇到了名称节点和数据节点不同ID的问题,为此我必须更改与名称节点具有相同ID的数据节点的ID。
分步程序将是:
- 启动名称节点
bin/hadoop namenode
。检查是否有错误(如果有)。
- 启动数据节点
bin/hadoop datanode
。检查是否有错误(如果有)。
- 现在使用“bin/start-mapred.sh”启动任务跟踪器、作业跟踪器
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)