Jeston tx1 Tensorflow 编译配置过程

作者:zj573453769

首先要声明一下,本篇博文是编译tensorflow r0.9,如果你是想跑tensorflow版本的facenet,因为最新的model是基于tensorflowr0.11编译的,所以不会运行成功。本文也是踩了这个坑,只编译成功了r0.9。

参看链接:http://www.yuthon.com/2016/12/04/Installation-of-TensorFlow-r0-11-on-TX1/

http://stackoverflow.com/questions/39783919/tensorflow-on-nvidia-tx1/

Tensorflow编译:

你需要尽可能的删除一切东西,否则如果在本机编译,空间会不够,包很多错误,要么就给板子加个固态硬盘,要么就在移动硬盘下编译。建议编译的过程在代理下进行,如果你想编译r0.11版本,可参考上边第一个链接,本人没成功。


删除一切能删的:

   # get rid of liboffice, games, libvisionworks, perfkit, multimedia api, opencv4tegra, etc. 
    sudo apt-get purge libreoffice*
    sudo apt-get purge aisleriot gnome-sudoku mahjongg ace-of-penguins gnomine gbrainy
    sudo apt-get clean
    sudo apt-get autoremove

    rm -rf libvision*
    rm -rf PerfKit*

    # something along these lines; might be different for you
    # delete all libvision-works and opencv4tegra stuff
    cd var && rm -rf libopencv4tegra* && rm -rf libvision*

    # I deleted practically everything. Almost as if I shouldn't have even installed JetPack in the first place
    # delete all deb files, Firefox, chrome, all the stuff I really didn't need that was taking up memory. 
    # find big files and remove them assuming they're not important. Google is your friend.
    find / -size +10M -ls

另外在var下,有很多deb的包,可以都删除,会节约很多空间的。

安装protobuf,bazel与tensorflow:

    # install deps
    cd ~
    sudo add-apt-repository ppa:webupd8team/java
    sudo apt-get update
    sudo apt-get install oracle-java8-installer
    sudo apt-get install git zip unzip autoconf automake libtool curl zlib1g-dev maven swig bzip2

    #build  build protobuf 3.0.0-beta-2 jar
    git clone https://github.com/google/protobuf.git
    cd protobuf
    # autogen.sh downloads broken gmock.zip in d5fb408d
    git checkout master
    ./autogen.sh
    git checkout d5fb408d
    ./configure --prefix=/usr
    make -j 4
    sudo make install
    cd java
    mvn package

    #Get bazel version 0.2.1, it doesn't require gRPC 
    git clone https://github.com/bazelbuild/bazel.git
    cd bazel
    git checkout 0.2.1
    cp /usr/bin/protoc third_party/protobuf/protoc-linux-arm32.exe
    cp ../protobuf/java/target/protobuf-java-3.0.0-beta-2.jar third_party/protobuf/protobuf-java-3.0.0-beta-1.jar

编辑bazel使其识别aarch64:

    --- a/src/main/java/com/google/devtools/build/lib/util/CPU.java
    +++ b/src/main/java/com/google/devtools/build/lib/util/CPU.java
    @@ -25,7 +25,7 @@ import java.util.Set;
     public enum CPU {
       X86_32("x86_32", ImmutableSet.of("i386", "i486", "i586", "i686", "i786", "x86")),
       X86_64("x86_64", ImmutableSet.of("amd64", "x86_64", "x64")),
    -  ARM("arm", ImmutableSet.of("arm", "armv7l")),
    +  ARM("arm", ImmutableSet.of("arm", "armv7l", "aarch64")),
       UNKNOWN("unknown", ImmutableSet.of());

编译:

   ./compile.sh

git tensorflow:

    git clone -b r0.9 https://github.com/tensorflow/tensorflow.git
    ./configure
    # this will fail, but that's ok
    bazel build -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_package
下载config,更新.cache:

    cd ~
    wget -O config.guess 'http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD'
    wget -O config.sub 'http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD'

    # below are commands Dwight Crowe ran, yours will vary depending on .cache details.
    # look for '_bazel_ubuntu', 'farmhash_archive', and 'farmhash'
    cp config.guess ./.cache/bazel/_bazel_ubuntu/742c01ff0765b098544431b60b1eed9f/external/farmhash_archive/farmhash-34c13ddfab0e35422f4c3979f360635a8c050260/config.guess
    cp config.sub ./.cache/bazel/_bazel_ubuntu/742c01ff0765b098544431b60b1eed9f/external/farmhash_archive/farmhash-34c13ddfab0e35422f4c3979f360635a8c050260/config.sub
    

修改tensoflow源文件:

    --- a/tensorflow/core/kernels/BUILD
    +++ b/tensorflow/core/kernels/BUILD
    @@ -985,7 +985,7 @@ tf_kernel_libraries(
             "reduction_ops",
             "segment_reduction_ops",
             "sequence_ops",
    -        "sparse_matmul_op",
    +        #DC "sparse_matmul_op",
         ],
         deps = [
             ":bounds_check",

    --- a/tensorflow/python/BUILD
    +++ b/tensorflow/python/BUILD
    @@ -1110,7 +1110,7 @@ medium_kernel_test_list = glob([
         "kernel_tests/seq2seq_test.py",
         "kernel_tests/slice_op_test.py",
         "kernel_tests/sparse_ops_test.py",
    -    "kernel_tests/sparse_matmul_op_test.py",
    +    #DC "kernel_tests/sparse_matmul_op_test.py",
         "kernel_tests/sparse_tensor_dense_matmul_op_test.py",
     ])
    

    --- a/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc
    +++ b/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc
    @@ -43,8 +43,14 @@ struct BatchSelectFunctor {
         const int all_but_batch = then_flat_outer_dims.dimension(1);

     #if !defined(EIGEN_HAS_INDEX_LIST)
    -    Eigen::array broadcast_dims{{ 1, all_but_batch }};
    -    Eigen::Tensor::Dimensions reshape_dims{{ batch, 1 }};
    +    //DC Eigen::array broadcast_dims{{ 1, all_but_batch }};
    +    Eigen::array broadcast_dims;
    +    broadcast_dims[0] = 1;
    +    broadcast_dims[1] = all_but_batch;
    +    //DC Eigen::Tensor::Dimensions reshape_dims{{ batch, 1 }};
    +    Eigen::Tensor::Dimensions reshape_dims;
    +    reshape_dims[0] = batch;
    +    reshape_dims[1] = 1;
     #else
         Eigen::IndexList, int> broadcast_dims;
         broadcast_dims.set(1, all_but_batch);
    
    --- a/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
    +++ b/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
    @@ -104,9 +104,17 @@ struct SparseTensorDenseMatMulFunctor {
         int n = (ADJ_B) ? b.dimension(0) : b.dimension(1);

     #if !defined(EIGEN_HAS_INDEX_LIST)
    -    Eigen::Tensor::Dimensions matrix_1_by_nnz{{ 1, nnz }};
    -    Eigen::array n_by_1{{ n, 1 }};
    -    Eigen::array reduce_on_rows{{ 0 }};
    +    //DC Eigen::Tensor::Dimensions matrix_1_by_nnz{{ 1, nnz }};
    +    Eigen::Tensor::Dimensions matrix_1_by_nnz;
    +    matrix_1_by_nnz[0] = 1;
    +    matrix_1_by_nnz[1] = nnz;
    +    //DC Eigen::array n_by_1{{ n, 1 }};
    +    Eigen::array n_by_1;
    +    n_by_1[0] = n;
    +    n_by_1[1] = 1;
    +    //DC Eigen::array reduce_on_rows{{ 0 }};
    +    Eigen::array reduce_on_rows;
    +    reduce_on_rows[0] = 0;
     #else
         Eigen::IndexList, int> matrix_1_by_nnz;
         matrix_1_by_nnz.set(1, nnz);

    --- a/tensorflow/stream_executor/cuda/cuda_blas.cc
    +++ b/tensorflow/stream_executor/cuda/cuda_blas.cc
    @@ -25,6 +25,12 @@ limitations under the License.
     #define EIGEN_HAS_CUDA_FP16
     #endif

    +#if CUDA_VERSION >= 8000
    +#define SE_CUDA_DATA_HALF CUDA_R_16F
    +#else
    +#define SE_CUDA_DATA_HALF CUBLAS_DATA_HALF
    +#endif
    +
     #include "tensorflow/stream_executor/cuda/cuda_blas.h"

     #include 
    @@ -1680,10 +1686,10 @@ bool CUDABlas::DoBlasGemm(
       return DoBlasInternal(
           dynload::cublasSgemmEx, stream, true /* = pointer_mode_host */,
           CUDABlasTranspose(transa), CUDABlasTranspose(transb), m, n, k, &alpha,
    -      CUDAMemory(a), CUBLAS_DATA_HALF, lda,
    -      CUDAMemory(b), CUBLAS_DATA_HALF, ldb,
    +      CUDAMemory(a), SE_CUDA_DATA_HALF, lda,
    +      CUDAMemory(b), SE_CUDA_DATA_HALF, ldb,
           &beta,
    -      CUDAMemoryMutable(c), CUBLAS_DATA_HALF, ldc);
    +      CUDAMemoryMutable(c), SE_CUDA_DATA_HALF, ldc);
     #else
       LOG(ERROR) << "fp16 sgemm is not implemented in this cuBLAS version "
                  << "(need at least CUDA 7.5)";

   --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
    +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
    @@ -888,6 +888,9 @@ CudaContext* CUDAExecutor::cuda_context() { return context_; }
     // For anything more complicated/prod-focused than this, you'll likely want to
     // turn to gsys' topology modeling.
     static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) {
    +  // DC - make this clever later. ARM has no NUMA node, just return 0
    +  LOG(INFO) << "ARM has no NUMA node, hardcoding to return zero";
    +  return 0;
     #if defined(__APPLE__)
       LOG(INFO) << "OS X does not support NUMA - returning NUMA node zero";
       return 0;
    
编译:
    bazel build -c opt --config=cuda --local_resources 2048,4.0,1.0 --verbose_resources //tensorflow/tools/pip_package:build_pip_package --jobs 4

安装:

bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg

# The name of the .whl file will depend on your platform.
 sudo pip install /tmp/tensorflow_pkg/tensorflow-0.12.0rc0-py2-none-any.whl

下面是我编译好的tensorflow0.9版本:

链接: https://pan.baidu.com/s/1hrPd4FE 密码: qe5x


编译过程中可能会报错,多尝试几次,将jobs 换为3或4,编译过程中可能会报的错:

Error: unexpected EOF from Bazel server.

gcc: internal compiler error: Killed (program cc1plus)

这些都是因为内存不够,我是将deb全部删掉,在移动硬盘中编译,还碰到了cross_tool的错误,多换了几次jobs就成功了。

总之,这次的编译过程让我累死了,没啥收获。为了编译r0.11,还把0.9删了,结果0.9的安装包我还没保存,最后啥都没剩下。

点个赞咯,草稿快要写完时,没保存,火狐就崩了,又写了一遍,真倒霉,股市又因为加息的事大跌,虽然我两个月前就知道会这样,最近太忙,忘了卖,又亏惨了,我的人生啊!!!。。。。。

如果有人编译成功r0.11,请告诉下哈,本人不甘心。

发表评论

3个评论

  • xiji321

    博主,r0.11编过了嘛

    2017-03-09 16:35:28回复

  • moses1994

    同遇到这个坑,已经两周了,两块 TX1 刚刚又刷机了,准备重新再装。有一些问题不同,想和楼主交流一下,谢谢。Q:1002100760

    2016-12-13 16:52:44回复

  • zj573453769

    回复moses1994: 可以啊,我的Q:573453769

    2016-12-14 18:44:41回复

我要留言×

技术领域:

我要留言×

留言成功,我们将在审核后加至投票列表中!

提示x

人工智能开发框架知识库已成功保存至我的图谱现在你可以用它来管理自己的知识内容了

删除图谱提示×

你保存在该图谱下的知识内容也会被删除,建议你先将内容移到其他图谱中。你确定要删除知识图谱及其内容吗?

删除节点提示×

无法删除该知识节点,因该节点下仍保存有相关知识内容!

删除节点提示×

你确定要删除该知识节点吗?