Comments (16)
有没有大牛帮我指导一下,验证码是我自己生成的。
from captcha_break.
用原始的验证生成方式准确率很高,但换成自己生成方式变很差?
from captcha_break.
可以详细描述一下你的输入输出吗?比如提供输入数据的生成器和图片输出,提供模型构建代码,提供模型输出的结果,以及你的预期。
from captcha_break.
我的验证码生成器如下代码:
import random
import string
from PIL import Image, ImageFont, ImageDraw
import numpy as np
import math
from scipy import misc
import os
#字符集
characters = string.digits + string.ascii_uppercase + string.ascii_lowercase
class GenerateImageCaptcha(object):
def __init__(self, width=160, height=70, fonts=None, font_sizes=None):
self._width = width
self._height = height
self._fonts = fonts
self._font_sizes = font_sizes or (40, 45, 50)
self._truefonts = []
@property
def truefonts(self):
if self._truefonts:
return self._truefonts
self._truefonts = tuple([
ImageFont.truetype(n, s)
for n in self._fonts
for s in self._font_sizes
])
return self._truefonts
# 曲线噪音
@staticmethod
def create_noise_curve(image, color):
curve_width = random.randint(0, 4)
if curve_width in [0,1]:
return image
w, h = image.size
x1 = random.randint(0, int(w / 5))
y1 = random.randint(int(h / 5), h - int(h / 5))
for i in range(4):
x2 = x1 + random.randint(int(w / 5), int(w / 2))
y2 = random.randint(int(h / 5), h - int(h / 5))
if x2 <= w:
ImageDraw.Draw(image).line([x1, y1, x2, y2] , fill=color, width=curve_width)
else:
x2 = w
x1, y1 = x2, y2
return image
# 点噪音
@staticmethod
def create_noise_dots(image, color, width=3, number=30):
draw = ImageDraw.Draw(image)
w, h = image.size
while number:
x1 = random.randint(0, w)
y1 = random.randint(0, h)
draw.line(((x1, y1), (x1 - 1, y1 - 1)), fill=color, width=width)
number -= 1
return image
# 横向扭曲
def distort_x_img(self, im_array):
im_height, im_width = im_array.shape[0], im_array.shape[1]
im_tmp = np.zeros(shape=im_array.shape)
factor = random.randint(1, 5)
phase = random.random()
move_direction = random.choice(['left', 'right'])
for i in range(im_height):
dx = factor * math.sin(phase + 2 * math.pi * i / im_height)
dx = abs(int(dx))
if move_direction == 'right':
im_tmp[i, dx:] = im_array[i, :(im_width - dx)]
im_tmp[i, :dx] = im_array[i, (im_width - dx):]
else:
im_tmp[i, :(im_width - dx)] = im_array[i, dx:]
im_tmp[i, (im_width - dx):] = im_array[i, :dx]
return im_tmp
# 纵向扭曲
def distort_y_img(self, im_array):
im_height, im_width = im_array.shape[0], im_array.shape[1]
im_tmp = np.zeros(shape=im_array.shape)
factor = random.randint(4, 8)
period = random.randint(1, 3)
phase = random.random()
move_direction = random.choice(['up', 'down'])
for i in range(im_width):
dx = factor * (phase + math.sin(2 * math.pi * i * period / im_width))
dx = abs(int(dx))
if move_direction == 'up':
im_tmp[:im_height - dx, i] = im_array[dx:im_height, i]
im_tmp[im_height - dx:, i] = im_array[:dx, i]
else:
im_tmp[dx:, i] = im_array[:im_height - dx, i]
im_tmp[:dx, i] = im_array[im_height - dx:, i]
return im_tmp
#旋转文字
def draw_image_rotate(self, chars, color, background):
"""Create the CAPTCHA image itself.
:param chars: text to be generated.
:param color: color of the text.
:param background: color of the background.
The color should be a tuple of 3 numbers, such as (0, 255, 255).
"""
image = Image.new('RGB', (self._width, self._height), background)
offset = random.randint(8, 16)
for c in chars:
font = random.choice(self.truefonts)
# w, h = draw.textsize(c, font=font)
w,h = font.getsize(c)
im = Image.new('RGBA', (w , h),background)
color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
ImageDraw.Draw(im).text((0,0), c, font=font, fill=color)
# rotate
im = im.rotate(random.uniform(-30, 30),Image.BILINEAR, expand=1)
# 创建一个与旋转图像大小相同的白色图像填充四角
fff = Image.new('RGBA', im.size, (255,) * 4)
# 复合图像
im = Image.composite(im,fff,im)
w, h = im.size
image.paste(im, (offset, int((self._height - h) / 2)))
offset = offset + w + random.randint(-6,0)
return image
#普通文字
def draw_img(self, chars, color, background):
image = Image.new('RGB', (self._width, self._height), background)
draw_im = ImageDraw.Draw(image)
x, y = random.randint(8, 16), random.randint(8, 12)
for ch in chars:
color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
font = random.choice(self.truefonts)
draw_im.text(xy=(x, y), text=ch, fill=color, font=font)
# 字符间隔
x = x + font.getsize(ch)[0] + random.randint(5, 10)
y = random.randint(8, 12)
return image
def generate_image(self, chars):
"""Generate the image of the given characters.
:param chars: text to be generated.
"""
background = (255, 255, 255)
color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
#创建旋转文字的图片
im = self.draw_image_rotate(chars, color, background)
#创建普通文字的图片
# im = self.draw_img(chars, color, background)
#绘制噪音点
# self.create_noise_dots(im, color)
#绘制噪音曲线
self.create_noise_curve(im, color)
im_array = np.array(im)
# 横向扭曲
im_x_distort = self.distort_x_img(im_array)
# 横向扭曲后再纵向扭曲
im_y_distort = self.distort_y_img(im_x_distort)
return np.array(im),im_x_distort,im_y_distort
def generate_captcha():
#图片的宽度,高度,文字个数
width, height, n_len = 160, 70, 4
#生成文字
random_str = ''.join(random.sample(characters, n_len))
#字体集合'Carlito-Regular.ttf', 'DejaVuSansMono_0.ttf','s8514fix.fon'
fonts = [os.path.join(r'C:/Windows/Fonts', font) for font in ['arial.ttf']]
#字体大小集合
font_sizes = range(40,45,50)
#生成图片
generator = GenerateImageCaptcha(width=width, height=height, font_sizes=font_sizes, fonts=fonts)
imgs = generator.generate_image(random_str)[2]/255
# with open('a.txt','w') as fp:
# fp.write(str(imgs))
# print(type(imgs))
#保存图片
img_name = '{0}'.format(random_str)
misc.imsave('D:/spiders/picture/{0}.jpg'.format(img_name), imgs)
if __name__ == '__main__':
for _ in range(1):
generate_captcha()
from captcha_break.
之后 我把gru的输出增加了一倍。
from captcha_break.
这个生成器生成的验证码会比较难一点,看您有没有方法能提高准确率
from captcha_break.
我大概epoch30次左右,loss在0.1左右,但测试结果不太理想
from captcha_break.
还有就是我想问一下你,ctc输入20个序列为什么总是只有前两个序列和最后两个序列有结果,其他都为空?
from captcha_break.
我这里没有 Windows,所以没办法运行你的代码,你可以贴一些图片样例吗?
另外你也可以提供模型构建代码以及模型输出的结果。
我注意到你的图片尺寸是 160, 70,你可以减少一个池化层,看看效果怎么样。
from captcha_break.
还有就是我想问一下你,ctc输入20个序列为什么总是只有前两个序列和最后两个序列有结果,其他都为空?
我不太明白你这句话的意思,你可以提供一个模型输出的样例吗?
from captcha_break.
谢谢您 我找到原因了 原本的生成的验证码都是以三色彩通道输入的,我把输入全部进行了二值化处理,这样训练完的模型,在识别读取的图片像素矩阵时,准确率会很高。
from captcha_break.
各个网站的验证码图片种类太多,需要针对性训练,效果才会好,本身模型还是很不错的。
from captcha_break.
谢谢您 我找到原因了 原本的生成的验证码都是以三色彩通道输入的,我把输入全部进行了二值化处理,这样训练完的模型,在识别读取的图片像素矩阵时,准确率会很高。
请问Hbaianni,你的意思是:
1)训练图片都要做二值化处理,
2)还是测试图片要做二值化处理,
3)两者都要做二值化处理
from captcha_break.
在我看来,不需要手工做二值化处理,模型可以学会彩色验证码。
from captcha_break.
谢谢您 我找到原因了 原本的生成的验证码都是以三色彩通道输入的,我把输入全部进行了二值化处理,这样训练完的模型,在识别读取的图片像素矩阵时,准确率会很高。
请问Hbaianni,你的意思是:
1)训练图片都要做二值化处理,
2)还是测试图片要做二值化处理,
3)两者都要做二值化处理
是这个意思 因为生成的验证码图片像素值在读取图片时会发生变化(三色彩通道颜色是随机组合生成),所以影响了模型识别的准确率,如果都做二值化处理的话可以解决这个问题。这是我做完测试的理解,不对请指正!
from captcha_break.
在我看来,不需要手工做二值化处理,模型可以学会彩色验证码。
你说的没错,模型其实是可以学习到色彩特征的,主要是在读取图片识别时,像素数值会发生变化,影响了模型识别准确率。
from captcha_break.
Related Issues (20)
- 如何把保存下来的ctc模型载入继续训练呢?
- 您可否告知下这几个文件是独立运行的吗,如何训练自己的中文验证码呢 HOT 5
- RNN分类之后, 在评估处怎么获得每个字符的概率呢 HOT 1
- 楼主您好,请问3500常用汉字的验证码识别,该模型大小够吗? HOT 5
- 多行验证码如何识别呢? HOT 4
- loss为负数且不断减小
- cannot import name '_imaging' from 'PIL'
- 变长标签怎么处理
- 如果验证码最后两位相同,似乎一定识别错误 HOT 4
- 运行winpy/main.py遇到的问题 HOT 5
- train和val的acc都可以到99%,但是eval,只有0.00265,这是怎么回事呢
- 尝试把n_class+1程序可以运行,但是不知道对不对 HOT 1
- 请问如果是不定长的验证码 可以使用吗 HOT 2
- 使用CTC, 识别时不限制4个字符长度,识别率如何? HOT 3
- CTC模型不定长输出问题 HOT 1
- 效果不理想
- 训练完了怎么用啊,纯小白 HOT 2
- cnn_2019.ipynb(防止 tensorflow 占用所有显存)tensorflow2.0要怎么改 HOT 1
- tensorflow 2.0 训练的时候 日志不显示 不知道 训练到哪一步 HOT 1
- 请问我改如何替换掉ctc_2019中的lambda方法呢 因为lambda在加载保存的模型会有错误 HOT 1
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from captcha_break.