逻辑回归案例

2023-05-16

应用案例

之前学习了逻辑回归，我们现在来做一个案例。
一个图片验证码识别的案例：怎么从图片中准确的识别出正确的数字。
我们分了三步。
第一步：先生成150验证码图片，每个图片有5个数字。图片中有随机的噪点。
具体代码实现：

"""
Date: 2019--10 09:39
User: yz
Email: 1147570523@qq.com
Desc:

"""
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
import random

def getRandomStr():
    random_num=str(random.randint(0,9))
    return random_num

def getRandomColor():
    #生成三个0～255的随机数字
    num1=random.randint(0,255)
    num2=random.randint(0,255)
    num3=random.randint(0,255)
    return num1,num2,num3

def generate_captcha():
    image=Image.new("RGB",(150,50),(255,255,255))
    draw=ImageDraw.Draw(image)
    font=ImageFont.truetype("LiberationSans-Bold.ttf",size=32)

    lable=""
    for i in range(5):
        #得到一个随机的数字
        random_char=getRandomStr()
        lable +=random_char
        draw.text((10+i*30,0),random_char,getRandomColor(),font=font)

    width =150
    height=30

    for i in range(3):
        x1=random.randint(0,width)
        x2=random.randint(0,width)
        y1=random.randint(0,height)
        y2=random.randint(0,height)
        draw.line((x1,y1,x2,y2),fill=(0,0,0))

    for i in range(5):
        draw.point([random.randint(0,width),random.randint(0,height)],fill=getRandomColor())
        x=random.randint(0,width)
        y=random.randint(0,height)
        draw.arc((x,y,x+4,y+4),0,90,fill=(0,0,0))
    #保存
    image.save(open(''.join(['img/',lable,'.png']),'wb'),'png')

if __name__ == '__main__':
    for i in range(150):
        generate_captcha()

在这里插入图片描述
第二步： 对150张验证码图像进行处理。
（1）二值化：首先把图像从RGB3通道转化成Gray1通道，然后把灰度图（0～255）转化成二值图（0，1）
（2）降噪：通过处理孤立点，对二值化的图进行降噪
（3）图片切割：根据像素格，把图片中的所有（5个）数字，分别保存到对应的0～9文件夹下
具体代码实现：

"""
Date: 2019--10 10:49
User: yz
Email: 1147570523@qq.com
Desc:
"""
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os

def binarizaion(path):
    img = Image.open(path)
    img_gray = img.convert('L')
    img_gray = np.array(img_gray)
    w, h = img_gray.shape
    for x in range(w):
        for y in range(h):
            gray = img_gray[x, y]
            if gray <= 230:
                img_gray[x, y] = 0
            else:
                img_gray[x, y] = 1
    plt.figure('')
    plt.imshow(img_gray, cmap='gray')
    plt.axis('off')
    # plt.show()
    return img_gray

def noiseReduction(img_gray, label):
    w, h = img_gray.shape
    for x in range(w):
        for y in range(h):
            cnt = 0
            if img_gray[x,y]==1:
                continue
            if x == 0 or y == 0 or x == w - 1 or y == h - 1:
                img_gray[x, y] = 1
            else:
                if img_gray[x, y - 1] == 0:
                    cnt += 1
                if img_gray[x, y + 1] == 0:
                    cnt += 1
                if img_gray[x + 1, y] == 0:
                    cnt += 1
                if img_gray[x - 1, y] == 0:
                    cnt += 1
                if img_gray[x - 1, y + 1] == 0:
                    cnt += 1
                if img_gray[x - 1, y - 1] == 0:
                    cnt += 1
                if img_gray[x + 1, y - 1] == 0:
                    cnt += 1
                if img_gray[x + 1, y + 1] == 0:
                    cnt += 1
                if cnt < 4:
                    img_gray[x, y] = 1
    plt.figure('')
    plt.imshow(img_gray, cmap='gray')
    plt.axis('off')
    # plt.show()
    plt.savefig(''.join(['imgs_logistic/', label, '.png']))

def cutImg(label):
    labels = list(label)
    img = Image.open(''.join(['imgs_logistic/', label, '.png']))
    for i in range(5):
        pic = img.crop((100*(1+i), 170, 100*(1+i)+100, 280))
        plt.imshow(pic)
        seq = get_save_seq(labels[i])
        pic.save(''.join(['cut_number/', str(labels[i]), '/', str(seq), '.png']))

def get_save_seq(num):
    numlist = os.listdir(''.join(['cut_number/', num, '/']))
    if len(numlist) == 0 or numlist is None:
        return 0
    else:
        max_file = 0
        for file in numlist:
            if int(file.split('.')[0]) > max_file:
                max_file = int(file.split('.')[0])
        return int(max_file)+1

def create_dir():
    for i in range(10):
        os.mkdir(''.join(['cut_number/', str(i)]))

def img_2_clean():
    captchas = os.listdir(''.join(['img/']))
    for captcha in captchas:
        label = captcha.split('.')[0]
        img_path = ''.join(['img/', captcha])
        # 二值化
        im = binarizaion(img_path)
        # 降噪
        noiseReduction(im, label)

def clean_to_cut():
    captchas = os.listdir(''.join(['imgs_logistic/']))
    for captcha in captchas:
        label = captcha.split('.')[0]
        cutImg(label)

if __name__ == '__main__':
    img_2_clean()
    clean_to_cut()

在这里插入图片描述
第三步： 利用逻辑回归做模型，来测试一张图片验证码。这里生成模型过程比较漫长，我们可以先生成模型，在做测试。

把数据带入逻辑回归进行建模
（1）把切割好的数据，按照X(二位数组),Y（一维数组）的方式传入logisticRegression.fit()函数进行拟合
我们可以通过网格搜索（GridSearch）来进行调参
（2）通过joblib包，把模型保存到本地
得到模型后，进行图像验证
（1）根据步骤1，重复操作新的图像
（2）对切割好的每个图像，独立的进行预测
（3）把最后预测结果进行拼接
具体代码实现：

"""
Date: 2019--10 14:26
User: yz
Email: 1147570523@qq.com
Desc:
"""
import os
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import  LogisticRegression
from sklearn.externals import joblib
from captcha_logistic import *

def load_data():
    X,Y=[],[]
    cut_list=os.listdir('cut_number/')
    for numC in cut_list:
        num_list_dir=''.join(['cut_number/',str(numC),'/'])
        nums_dir=os.listdir(num_list_dir)
        for num_file in nums_dir:
            img=Image.open(''.join(['cut_number/',str(numC),'/',num_file]))
            img_gray=img.convert('L')
            img_array=np.array(img_gray)
            w,h=img_array.shape
            for x in range(w):
                for y in range(h):
                    gray = img_array[x, y]
                    if gray <= 240:
                        img_array[x, y] = 0
                    else:
                        img_array[x, y] = 1
            img_re=img_array.reshape(1,-1)
            X.append(img_re[0])
            Y.append(int(numC))
    return np.array(X),np.array(Y)

def generate_model(X,Y):
    X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.3)

    log_clf=LogisticRegression(multi_class='ovr',solver='sag',max_iter=10000)

    log_clf.fit(X_train,Y_train)
    joblib.dump(log_clf,'captcha_model/captcha_model.model')

def get_model():
    model=joblib.load('captcha_model/captcha_model.model')
    return model

def captcha_predict():
    path='captcha_predict/unknown.png'
    pre_img_gray=binarizaion(path)
    noiseReduction(pre_img_gray,'unknown')
    labels=['0','1','2','3','4']

    img=Image.open(''.join(['imgs_logistic/unknown.png']))
    for i in range(5):
        pic=img.crop((100*(1+i),170,100*(1+i)+100,280))
        plt.imshow(pic)
        pic.save(''.join(['captcha_predict/',labels[i],'.png']))

    result=''
    model=get_model()
    for i in range(5):
        path=''.join(['captcha_predict/',labels[i],'.png'])
        img=Image.open(path)
        img_gray = img.convert('L')
        img_array = np.array(img_gray)
        w, h = img_array.shape
        for x in range(w):
            for y in range(h):
                gray = img_array[x, y]
                if gray <= 240:
                    img_array[x, y] = 0
                else:
                    img_array[x, y] = 1
        img_re = img_array.reshape(1, -1)
        X=img_re[0]
        Y_pre=model.predict([X])
        result=''.join([result,str(Y_pre[0])])
    return result

if __name__ == '__main__':
   #先生成本地模型，本地模型只需生成一次。
    # X,Y=load_data()
    # generate_model(X,Y)
    #测试。
    model=get_model()
    result=captcha_predict()
    print(result)

在这里插入图片描述
我们的结果测试是很准确的。

本文内容由网友自发贡献，版权归原作者所有，本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容，请联系:hwhale#tublm.com(使用前将#替换为@)

逻辑回归案例

逻辑回归案例的相关文章

周周复始 |python实现日历遇到的错误

错误总结 pip 问题使用oenpyxl的问题 pip 问题安装或更新都会出现一下问题 xff1a ERROR Exception Traceback most recent call last 解决办法就是找个镜像 xff1a 推荐 x
ValueError: not enough values to unpack (expected 3, got 0)

运行github上的大佬的代码 xff1a https github com YunYang1994 tensorflow yolov3 xff0c 在进行python train py的时候 ValueError not enough v
迁移博客中

本博客正在迁移到 xff1a 周周的苜蓿园
粘贴代码规整漂亮的网站（可在word中规整

可以生成比较好看的代码截图 xff1a https carbon now sh 可以在Word中粘贴出比较整齐的代码 xff1a http www planetb ca syntax highlight word 或者用Notepad 43
Ubuntu Server版安装Gnome图形桌面

1 安装全部桌面环境 xff0c 其实Ubuntu系列桌面实际上有几种桌面应用程序 xff0c 包括Ubuntu desktop Kubunut desktop和Xubuntu desktop 我们就安装了Ubuntu desktop还有G
c++ sort函数cmp的有关情况(由浅入深)

sort first last cmp sort默认是从小到大排序 xff0c 要改成从大到小排序则可修改cmp span class token keyword bool span span class token function cm
qemu-kvm中debian-11.3.0虚拟机 root密码忘记处理方法

1 启动虚拟机 virsh start debian 11 3 0 2 连接虚拟机 xff0c 按 34 e 34 virsh console debian 11 3 0 3 移动光标在 34 linux boot vmlinuz 5 10
SQL语句练习题

一表结构创建数据库 school 脚本 drop database school create database school 创建数据库 go CREATE DATABASE school ON NAME 61 school dat
Python+Tesseract-OCR实现图像中的文字识别

开发环境 xff1a 运行平台 win10 编程语言 xff1a python IDE pycharm 三方库模块 xff1a pytessseract Tesseract OCR pytesseract库安装 xff1a 打开cmd xf
Ubuntu上网功能的配置

虽然用了Ubuntu系统时间不短了 xff0c 但每次重装或者升级后都会出现无法上网的问题 xff0c 所以在此做下记录 xff0c 方便以后配置 xff0c 同时也希望能给不会配置的朋友一些帮助网络配置流程 xff1a 前提准备工作 x
you-get使用命令

安装 xff1a pip install you get you get支持的网站 xff1a 网站URL哔哩哔哩http www bilibili com 优酷http www youku com 腾讯视频http v qq com 爱奇
【OpenVINO】 Windows10环境下载安装

OpenVINO官方中文文档 xff1a OpenVINO 工具套件概述 OpenVINO 工具套件 https docs openvino ai cn latest index html OpenVINO下载地址 xff1a Downlo
《计算机应用研究》投稿经历和时间节点

记录四川计算机研究院计算机应用研究期刊投稿经历和时间节点日期状态周期2022 11 09上传稿件当天显示编辑部已接收稿件 xff0c 开始初审2022 11 09 2022 11 15初审6天2022 11 15 2022 12 21
项目管理工具：GitHub，GitLab，Azure DevOps，Gitea版本控制系统

1 版本控制系统是什么 xff1f 版本控制系统是一种记录一个或若干文件内容变化 xff0c 方便查阅特定版本修订情况的系统 2 为什么要用版本控制系统 xff1f 工作上 xff0c 当你处理一个共享文件夹的时候 xff0c 必须告知办公
Linux Debian安装或管理多个Python版本

在Debian安装或管理多个Python版本 2021 05 13 19 04 55 43 08 字数 xff1a 4772 标签 xff1a Linux Python Ubuntu 20 04的Python默认版本是3 8 xff0c 符
Fl Studio20切换中文教程汉化补丁包

FL studio 简称FL xff0c 全称 Fruity Loops Studio xff0c 因此国人习惯叫它 34 水果 34 目前最新版本是FL studio20 xff0c 它让你的计算机就像是全功能的录音室 xff0c 大混音
Promox VE（PVE）重启后GUI不能登录

ssh能连接 xff0c 但是gui打不开因为集群节点不正确移除 xff0c 留下配置信息 xff0c 启动后 xff0c 一直找集群的其它机器 ssh远程指令 xff1a pvecm expected 1 上面的命令是告诉系统 xff0
虚拟机已经显示了已连接的图标但不能上网的解决办法+虚拟机显示网络连接激活失败

虚拟机已经显示了已连接的图标但不能上网的解决办法 43 虚拟机显示网络连接激活失败问题叙述解决办法问题叙述解决办法 1 https blog csdn net big rotor article details 70163858 用w

随机推荐

【递归】CodeForces - 768B

题意 xff1a 给定一个数N xff0c 对大于1 的数在原来的位置拆分为N 2 xff0c N 2 xff0c N 2 三个数对拆分出的大于1 的数同样进行拆分 xff0c 直至所有的数均为0 或1 对拆分后的0 1 序列 xff0c
1025. 除数博弈

2020 7 24 LeetCode 题目描述爱丽丝和鲍勃一起玩游戏 xff0c 他们轮流行动爱丽丝先手开局最初 xff0c 黑板上有一个数字 N 在每个玩家的回合 xff0c 玩家需要执行以下操作 xff1a 选出任一 x xff0
VMware安装Arch Linux

目录一新建虚拟机二安装系统1 下载镜像2 将镜像文件导入空白虚拟机中3 查看启动模式4 查看网络5 查看系统时间6 分区6 1 查看磁盘信息6 2 进入图形化分区工具进行分区6 3 格式化分区6 4 挂载分区 7 修改镜像源8 安装l
HDU 1880 魔咒词典（Hash+二分）

题目链接哈利波特在魔法学校的必修课之一就是学习魔咒据说魔法世界有100000种不同的魔咒 xff0c 哈利很难全部记住 xff0c 但是为了对抗强敌 xff0c 他必须在危急时刻能够调用任何一个需要的魔咒 xff0c 所以他需要你的帮助
Python「pytesseract」：中文识别模块

在处理 ttf 文件时 xff0c 遇到了识别图片中中文的情况 xff0c 常见的方式是调用百度的语言识别接口 xff0c 但是这里为了大批量的识别 xff0c 首先试了试 python 自带的语言识别模块 pytesseract xff0
总结rce（远程代码执行各种sao姿势）绕过bypass

空格绕过 xff1a 在bash下可以用 IFS IFS IFS 9 09 lt gt lt gt xff08 例如 cat etc passwd xff09 20 space 09 tab xff0c 这儿跟sql注入有点相似 xff0c
docker-compose教程（安装，使用, 快速入门）

此文章为转载文章 xff0c 原文地址 xff1a https blog csdn net pushiqiang article details 78682323 目录 1 Compose介绍2 Compose和Docker兼容性3 安装d
Ubuntu查看和设置环境变量

文章目录 1 查看环境变量2 设置方式2 1 把你的路径加入PATH2 2 命名一个新的环境变量 3 作用域3 1 用于当前终端3 2 用于当前用户3 3 用于所有用户转载 1 查看环境变量查看环境变量有三个命令 xff1a env x
CorelDRAW2022下载附带序列号安装教程

CorelDRAW2022作为图形设计软件的代表 xff0c 以其杰出和革新的特性赢得了长期的声誉和用户的赞赏 xff0c 是一套屡获殊荣的图像编辑软件 CorelDRAW 2022包含程序 xff1a CorelDRAW 2022主程序矢
ASCII码、Unicode编码对照表 —— ASCII控制字符 Unicode编码字符编码的前世此生

ASCII控制字符 Unicode编码 ASCII xff08 American Standard Code for Information Interchange xff0c 美国信息互换标准代码 xff0c ASC xff09 是基于拉
RealSR真实场景超分

一 Camera Lens Super Resolution 本文主要解决RealSR的数据问题 xff0c 通过控制镜头到物体的距离产生成对的真实数据 xff08 Real paired SR data xff09 xff08 1 xff
5374. 长度为 n 的开心字符串中字典序第 k 小的字符串(回溯算法)

5374 长度为 n 的开心字符串中字典序第 k 小的字符串 List lt String gt res 答案集合不能定义为StringBuilder类型剩下的就是回溯算法 span class token keyword class s
宝塔忘记端口,解决办法

1 xff0c 登陆远程连接 xff0c 输入ll 2 输入cd后再ll 3 清下屏 xff0c 输入clear 4 xff0c 输入cd www server panel data 找到port pl 5 输入cat port pl查看端
幽冥传奇

JAVA环境添加 setx M JAVA HOME D YM cnmmm com bl20166 Java jdk1 8 0 144 setx M CLASSPATH JAVA HOME lib dt jar JAVA HOME lib t
TOR下载教程

不想用自己浏览器ip访问可用以下设置 xff0c 当然有很多其他方法 1 xff0c 官网https www torproject org 2 xff0c 下载对应版本 3 xff0c 打开tor 洋葱浏览器并选择config 4 lin
几步搞懂cobalt strike启动

很多人问Cobalt strike怎么启动 xff0c 总结几句 1 cmd管理员身份运2 切换到CS所在目录3 输入ipconf找到自己ip地址4 输入teamserver 自己ip 密码回车即可5 打开start bat文件再点击确定
TOR下载和网桥配置教程

不想用自己浏览器ip访问可用以下设置 xff0c 当然有很多其他方法 1 xff0c 官网https www torproject org 2 xff0c 下载对应版本安装即可本节以windows为例 xff08 苹果安卓手机都有对应a
XSS漏洞,通过XSS实现网页挂马

今天讲下通过XSS实现网页挂马 xff0c 目的是了解安全方面知识 xff0c 提升生活网络中辨别度原理 xff1a 实验分为两部分 xff1a 1 通过Kali linux xff0c 利用MS14 064漏洞 xff0c 制作一个木马
qt样式有时不生效问题分析

qt 中的样式表非常方便 xff0c 可以自定义自己想要的控件但是有时候会发现使用样式表时 xff0c 样式不会生效接下来分析一下主要原因 xff1a 1 样表格式不正确 2 样式表的样式被子对象覆盖 xff0c 设置时注意作用对象 x
逻辑回归案例

应用案例之前学习了逻辑回归 xff0c 我们现在来做一个案例一个图片验证码识别的案例 xff1a 怎么从图片中准确的识别出正确的数字我们分了三步第一步 xff1a 先生成150验证码图片 xff0c 每个图片有5个数字图片中有随机

逻辑回归案例

应用案例

逻辑回归案例 的相关文章

随机推荐

热门标签

逻辑回归案例的相关文章