tensorflow数据清洗

2023-05-16

import tensorflow as tf
import numpy as np
import random
import os
import math

from matplotlib import pyplot as plt

def get_files(file_dir):
"""
创建数据文件名列表

    :param file_dir:
    :return:image_list 所有图像文件名的列表,label_list 所有对应标贴的列表
    """
    #step1.获取图片，并贴上标贴
    #新建五个列表，存储文件夹下的文件名
    daisy=[]
    label_daisy=[]
    dandelion=[]
    label_dandelion = []
    roses=[]
    label_roses = []
    sunflowers=[]
    label_sunflowers = []
    tulips=[]
    label_tulips = []
    for file in os.listdir(file_dir+"/daisy"):
        daisy.append(file_dir+"/daisy"+"/"+file)
        label_daisy.append(0)

    for file in os.listdir(file_dir+"/dandelion"):
        dandelion.append(file_dir+"/dandelion"+"/"+file)
        label_dandelion.append(1)
    for file in os.listdir(file_dir+"/roses"):
        roses.append(file_dir+"/roses"+"/"+file)
        label_roses.append(2)
    for file in os.listdir(file_dir+"/sunflowers"):
        sunflowers.append(file_dir+"/sunflowers"+"/"+file)
        label_sunflowers.append(3)
    for file in os.listdir(file_dir+"/tulips"):
        tulips.append(file_dir+"/tulips"+"/"+file)
        label_tulips.append(4)

    #step2:对生成的图片路径和标签List做打乱处理
    #把所有图片跟标贴合并到一个列表list（img和lab）
    images_list=np.hstack([daisy,dandelion,roses,sunflowers,tulips])
    labels_list=np.hstack([label_daisy,label_dandelion,label_roses,label_sunflowers,label_tulips])

    #利用shuffle打乱顺序
    temp=np.array([images_list,labels_list]).transpose()
    np.random.shuffle(temp)
    # 从打乱的temp中再取出list（img和lab）
    image_list=list(temp[:,0])
    label_list=list(temp[:,1])
    label_list_new=[int(i) for i in label_list]

    # 将所得List分为两部分，一部分用来训练tra，一部分用来测试val
    # 测试样本数, ratio是测试集的比例
    ratio=0.3
    n_sample = len(label_list)
    n_val = int(math.ceil(n_sample * ratio))
    n_train = n_sample - n_val # 训练样本数
    tra_images = image_list[0:n_train]
    tra_labels = label_list_new[0:n_train]
    #tra_labels = [int(float(i)) for i in tra_labels] # 转换成int数据类型
    val_images = image_list[n_train:-1]
    val_labels = label_list_new[n_train:-1]
    #val_labels = [int(float(i)) for i in val_labels] # 转换成int数据类型
    return tra_images, tra_labels, val_images, val_labels

#return image_list,label_list_new

def get_batch(image, label, image_W, image_H,channel, batch_size, capacity):
    #step1：将上面生成的List传入get_batch() ，转换类型，产生一个输入队列queue
    #类型转换
    image=tf.cast(image,tf.string)
    label=tf.cast(label,tf.int32)
    #生成输入队列
    input_queue=tf.train.slice_input_producer([image,label])

    label=input_queue[1]
    image_contents=tf.read_file(input_queue[0])
    #print(image_contents)
    #step2：将图像解码，不同类型的图像不能混在一起，要么只用jpeg，要么只用png等
    images_value=tf.image.decode_jpeg(image_contents)
    #print(images_value)
    #step3：数据预处理，对图像进行旋转、缩放、裁剪、归一化等操作，让计算出的模型更健壮
    #image=tf.image.resize_image_with_crop_or_pad(images_value,image_W,image_H)
    #image=tf.image.resize_images(images_value,size=[200,200])
    image = tf.image.resize_images(images_value,size=[image_W,image_H])
    #image.set_shape(shape=[200, 200, 3])
    image.set_shape(shape=[image_W, image_H, channel])
    #print(image)
    # 对resize后的图片进行标准化处理
    image=tf.image.per_image_standardization(image)
    #step4：生成batch
    image_batch,label_batch=tf.train.batch([image,label],batch_size=batch_size,num_threads=1,capacity=capacity)
    # 重新排列label，行数为[batch_size]
    #print(label_batch)
    label_batch = tf.reshape(label_batch, [batch_size])
    #print(label_batch)
    image_batch = tf.cast(image_batch, tf.float32)
    return image_batch,label_batch

if __name__=="__main__":
    BATCH_SIZE = 2
    CAPACITY = 256
    IMG_W = 208
    IMG_H = 208
    # 读取文件所在路径
    mypath = "/home/sunxiaoming/PycharmProjects/data/flower_photos"
    image_list,label_list=get_files(mypath)
    print(len(image_list))
    print(len(label_list))
    image_batch,label_batch=get_batch(image_list,label_list,IMG_W,IMG_H,BATCH_SIZE,CAPACITY)
    print(image_batch)
    with tf.Session() as sess:
        # 开启线程
        # 线程协调元
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        i=0
        while i<2:
            image,lable = sess.run([image_batch, label_batch])
            #image_array=np.array(image[i,:,:,:])
            for j in range(2):
                plt.imshow(image[j, :, :, :])
                plt.show()

i+=1

        # 回收线程
        coord.request_stop()
        coord.join(threads)

    #with tf.Session() as sess:
        # 开启线程
        # 线程协调元
        #coord = tf.train.Coordinator()
        #threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        #i=0
        #while not coord.should_stop() and i < 2:

            #lable, image = sess.run([image_batch,label_batch])
            #print(type(image))
            #"""
                        #for j in np.arange(BATCH_SIZE):
               # print('label: %d' % lable[j])

                #plt.imshow(image[j, :, :, :])
                #plt.show()
            #i += 1

#"""

        # 回收线程
        #coord.request_stop()
        #coord.join(threads)

本文内容由网友自发贡献，版权归原作者所有，本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容，请联系:hwhale#tublm.com(使用前将#替换为@)

tensorflow

数据清洗

tensorflow数据清洗的相关文章

TensorFlow：有没有办法将冻结图转换为检查点模型？

可以将检查点模型转换为冻结图 ckpt 文件转换为 pb 文件但是是否有反向方法将 pb 文件再次转换为检查点文件我想它需要将常量转换回变量有没有办法将正确的常量识别为变量并将它们恢复回检查点模型目前支持将变量转换为常量 http
如何在Google机器学习中将jpeg图像转换为json文件

我正在研究 Google Cloud ML 我想对 jpeg 图像进行预测为此我想使用 gcloud beta ml 预测 instances INSTANCES model MODEL version VERSION https cl
Tensorflow `tf.layers.batch_normalization` 不会向 `tf.GraphKeys.UPDATE_OPS` 添加更新操作

以下代码复制粘贴可运行说明了如何使用tf layers batch normalization import tensorflow as tf bn tf layers batch normalization tf constant
在 keras 中集成采样的 softmax 失败

基于如何在 Keras 模型中使用 TensorFlow 的采样 softmax 损失函数 https stackoverflow com questions 47892380 how can i use tensorflows sampl
NotImplementedError：尚未为未构建的模型子类启用“fit_generator”

我正在使用以下代码 import tensorflow as tf traindata tf keras preprocessing image ImageDataGenerator rescale 1 255 shear range 0
Keras ZeroDivisionError：整数除法或以零为模

我正在尝试使用 Keras 和 Tensorflow 实现卷积神经网络我有以下代码 from keras models import Sequential from keras layers import Conv2D MaxPoolin
在 GPU 支持下对高维数据进行更快的 Kmeans 聚类

我们一直在使用 Kmeans 来对日志进行聚类典型的数据集有 10 mill 具有 100k 特征的样本为了找到最佳 k 我们并行运行多个 Kmeans 并选择轮廓得分最佳的一个在 90 的情况下我们最终得到的 k 介于 2 到 1
TensorFlow：在训练时更改变量

如果我将输入管道从 feed dict 更改为 tf data dataset 如何在每次迭代后的训练期间更改网络内参数的值澄清一下旧代码看起来像这样 Define Training Step model is some class t
pip：需要将包名称tensorflow-gpu更改为tensorflow

我正在尝试将具有 GPU 支持的张量流安装到 conda 环境中我使用命令 pip install ignore installed upgrade https storage googleapis com tensorflow linu
如何手动计算分类交叉熵？

当我手动计算二元交叉熵时我应用 sigmoid 来获取概率然后使用交叉熵公式并平均结果 logits tf constant 1 1 0 1 2 labels tf constant 0 0 1 1 1 probs tf nn sigm
如何将张量流模型部署到azure ml工作台

我在用Azure ML Workbench执行二元分类到目前为止一切正常我有很好的准确性我想将模型部署为用于推理的 Web 服务我真的不知道从哪里开始 azure 提供了这个doc https learn microsoft co
tf.gather_nd 直观上是做什么的？

你能直观地解释一下或者举更多例子吗tf gather nd用于在 Tensorflow 中索引和切片为高维张量我读了API https www tensorflow org api docs python tf gather nd 但它保
如何使用一个模型中间层的输出作为另一个模型的输入？

我训练一个模型A并尝试使用中间层的输出name layer x 作为模型的附加输入B 我尝试像 Keras 文档一样使用中间层的输出https keras io getting started faq how can i obtain th
使用预训练的 word2vec 初始化 Seq2seq 嵌入

我对使用预训练的 word2vec 初始化tensorflow seq2seq 实现感兴趣我已经看过代码了嵌入似乎已初始化 with tf variable scope scope or embedding attention deco
为 TFliteconverter 创建代表性数据集的正确方法是什么？

我正在尝试推断tinyYOLO V2 with INT8权重和激活我可以使用 TFliteConverter 将权重转换为 INT8 为了INT8激活我必须提供代表性数据集来估计缩放因子我创建此类数据集的方法似乎是错误的正确的程序是
Ray：如何在一个 GPU 上运行多个 Actor？

我只有一个 GPU 我想在该 GPU 上运行许多 Actor 这是我使用的方法ray 下列的https ray readthedocs io en latest actors html https ray readthedocs io en
TensorFlow的./configure在哪里以及如何启用GPU支持？

在我的 Ubuntu 上安装 TensorFlow 时我想将 GPU 与 CUDA 结合使用但我却停在了这一步官方教程 http www tensorflow org get started os setup md 这到底是哪里 con
为什么我的结果仍然无法重现？

我想要为 CNN 获得可重复的结果我使用带有 GPU 的 Keras 和 Google Colab 除了建议插入某些代码片段这应该允许再现性之外我还在层中添加了种子 This is the first code snipped to
Keras：如何保存模型或权重？

如果这个问题看起来很简单我很抱歉但是阅读 Keras 保存和恢复帮助页面 https www tensorflow org beta tutorials keras save and restore models https www t
在张量流的卷积层中使用自定义过滤器

我一直在从各种教程中学习 Tensorflow 并且想知道是否可以定义一个自定义过滤器供卷积网络使用例如如果我知道特征中有有意义的结构使得每个其他特征都是相关的我想定义一个看起来像 0 1 0 1 0 1 的过滤器 tf nn co

随机推荐

ESP8266 Arduino开发环境从零开始配置

目录 1 安装Arduino IDE2 添加 ESP8266 支持3 添加依赖库4 完毕 1 安装Arduino IDE Arduino IDE 是由 Arduino 官方提供的支持 C 语言的集成开发环境 xff0c 主要是针对 Ardu
使用HDFS的Java接口对文件基本操作

使用HDFS的Java接口进行文件的读写 FileSystem对象要从Hadoop文件系统中读取文件 xff0c 最简单的办法是使用java net URL对象打开数据流 xff0c 从中获取数据不过这种方法一般要使用FsUrlStre
elastic学习笔记（一）

elasticsearch安装 xff08 V7 10 0 xff09 踩坑笔记一 elasticsearch在6 X版本后不允许使用root账户启动创建其他账户 adduser es 创建密码 passwd es 二生产环境安装完毕
主机可以ping通虚拟机但无法访问虚拟机某一端口

问题描述主机可以ping通虚拟机 xff0c 但是服务器启动状态下访问 192 168 200 152 8888失败背景 UFW防火墙已经开放 8888 端口原因和解法 firewall这个防火墙的8888端口没有开放 xff0c 使
WSL安装ubuntu18.04+2016VCS+Verdi——记录安装过程错误

前言 xff1a 安装过程花了一个星期 xff0c 最开始是wsl的图形化界面一直没搞出来 xff0c 等图形化界面安装好之后 xff0c 安装vcs和verdi的过程又是一波三折 xff08 这段可不看 xff0c 总结一下自己踩过的坑
【stm32】ST-LINK Connection error解决方法之一

ST LINK报错原因之一 xff1a 固件丢失好久没有用开发板和stlink xff0c 今天发现下载报错 xff0c 明明电脑已经装了stlink驱动猜测可能是stlink固件丢失 span class token number 1
云服务器VNC安装教程

文章目录 1 VNC Viewer 注册及安装2 云服务器安装配置2 1 ssh安装和配置2 2 Tightvncserver 安装2 2 安装 gnome 桌面2 3 远程连接端口开启2 4 Xstartup文件编辑2 5 云服务器端口配
浅谈STP协议

1 STP协议解决什么问题 xff1f 解决网络拓扑中冗余链路产生的环路问题 2 STP协议有什么优缺点 xff1f 解决树形接口的可靠性问题 xff0c 有主备线路 xff0c 当主链路出现故障后 xff0c 可以感知网络的变化 xff0
Linux下CMake学习笔记和在QT的应用

一 cmake的介绍 CMak可以让我们通过编写简单的配置文件去生成本地的Makefile xff0c 这个配置文件是独立于运行平台和编译器的 xff0c 这样就不用亲自去编写Makefile了 xff0c 而且配置文件可以直接拿到其它平台
WinIO：一个底层的键盘事件模拟工具

当我们需要程序模拟键盘事件时 xff0c 通常会这样写 xff1a C xff1a keybd event JAVA xff1a robot keyPress 但是在某些情况下 xff0c 这么写并不能实现需求 xff0c 例如用程序在密码
gcc: error: unrecognized command line option ‘-std=c++14’ 问题解决

问题描述 xff1a 安装一些python包实际场景为py3安装infomap 时报错 xff1a gcc error unrecognized command line option std 61 c 43 43 14 环境 cento
无监督预训练 & 有监督预训练

本文参考了几篇其他博客 xff0c 具体链接在对应部分有提供无监督预训练 xff08 unsupervised pre training xff09 无监督预训练策略 xff0c 主要应用于复杂任务 43 少量标记数据集 xff0c 即
VSCode 是什么

VSCode 是什么 xff0c VS Code的全称是Visual Studio Code xff0c 但这全名实在是太长了 xff0c 很多用户喜欢叫它VS Code 说起VS Code xff0c 官方定义它是一个免费的开源的跨平台
SDN控制器之OVN实验二：使用OVN配置路由器

概览基于我上一篇文章中的实验环境 xff0c 我现在将第三层网络基础功能添加到OVN中最终呈现出来的将是由逻辑路由器连接的一对逻辑交换机另外 xff0c 路由器将被配置为通过OVN中内置的DHCP服务来提供IP地址重构逻辑组件由于
树莓派自带VNC黑屏（灰屏）个人解决办法

最近购买了树莓派 xff14 xff22 看来许多教程 xff0c 但VNC第一次还正常显示 xff0c 后来就黑屏 xff0c 还有什么现在不能显示桌面什么的 xff0c 下面给大家说下vnc黑屏我的解决办法 xff0c 我发现是vncv
Neutron基础知识学习1

Neutron OpenStack通过Neutron项目在物理网络环境之上提供满足多租户要求的虚拟网络和服务 Neutron提供的网络虚拟化能力包括 xff1a xff08 1 xff09 二层到七层网络的虚拟化 xff1a L2 xff0
使用百度echarts制作可视化大屏——最终效果和动态数据刷新

最终效果如下图 xff1a 接下来就是数据动态刷新了 xff0c 这个没什么好说的 xff0c 就是一个 post的事 xff0c 传递一个json给自定义的refresh函数就行了 post url null function d ref
简单记录ESP8266WIFI模块网络调试过程，测试通过。

本文目的 xff0c 使用WIFI模块连接阿里云飞燕平台 xff0c 调试之前我想先测试一下买回来的WIFI模块是否能正常使用 xff0c 可以通过网络调试助手进行测试以手机WIFI提供局域网环境 xff0c PC端和WIFI模块连接手机
在vue-element-ui的时候如果同时使用了animated会出现模态框层级显示错误问题

同时使用element ui 和animated的时候模态框的层级会出问题这个时候需要做动画最好还是用vue原带的动画组件transition和transition group xff0c 然后自定义动画
tensorflow数据清洗

import tensorflow as tf import numpy as np import random import os import math from matplotlib import pyplot as plt def

tensorflow数据清洗

tensorflow数据清洗 的相关文章

随机推荐

热门标签

tensorflow数据清洗的相关文章