{"id":5440,"date":"2022-04-25T17:56:07","date_gmt":"2022-04-25T09:56:07","guid":{"rendered":"https:\/\/blog.iyatt.com\/?p=5440"},"modified":"2024-05-05T14:19:06","modified_gmt":"2024-05-05T06:19:06","slug":"ocr-%e6%96%87%e5%ad%97%e6%8f%90%e5%8f%96","status":"publish","type":"post","link":"https:\/\/blog.iyatt.com\/?p=5440","title":{"rendered":"OCR \u6587\u5b57\u63d0\u53d6"},"content":{"rendered":"\n<p>\u6d4b\u8bd5\u73af\u5883\uff1a<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">Ubuntu 20.04 x86_64\nPython 3.9.10\nopencv-python 4.5.5.64\npytesseract 0.3.9\njupyter 1.0.0\nmatplotlib 3.5.1<\/pre>\n\n\n\n<p>pytesseract \u4f9d\u8d56 tesseract-ocr\uff0c\u8fd9\u662f\u4e00\u4e2a\u5f00\u6e90\u7684 OCR \u9879\u76ee\uff0c\u9879\u76ee\u5730\u5740\uff1a<a rel=\"noreferrer noopener\" href=\"https:\/\/github.com\/tesseract-ocr\/tesseract\" target=\"_blank\">https:\/\/github.com\/tesseract-ocr\/tesseract<\/a><\/p>\n\n\n\n<p>\u6211\u8fd9\u91cc\u4f7f\u7528\u7684\u7248\u672c\u662f 5.1.0\uff0c\u57fa\u4e8e\u6e90\u7801\u7f16\u8bd1\u5b89\u88c5\uff0c\u6d41\u7a0b\u5982\u4e0b\uff1a<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\"># \u5b89\u88c5\u4e00\u4e9b\u4f9d\u8d56\nsudo apt update\nsudo apt install -y git build-essential autoconf automake libtool pkg-config libpng-dev libjpeg8-dev libtiff5-dev zlib1g-dev libicu-dev libpango1.0-dev libcairo2-dev\n\n# \u83b7\u53d6\u6e90\u7801\ncd \/tmp\ngit clone https:\/\/github.com\/tesseract-ocr\/tesseract.git --depth=1 --branch=5.1.0\n\n# \u7f16\u8bd1\u5b89\u88c5\ncd tesseract\n.\/autogen.sh\n.\/configure --prefix=$HOME\/local\/\nmake -j8\nmake install<\/pre>\n\n\n\n<p>\u5c06 tesseract \u547d\u4ee4\u6dfb\u52a0\u5230\u73af\u5883\u53d8\u91cf<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">echo \"export PATH=$PATH:$HOME\/local\/bin\/\" >> ~\/.bashrc\nsource ~\/.bashrc<\/pre>\n\n\n\n<p>\u7136\u540e\u6dfb\u52a0\u6a21\u578b\u6587\u4ef6\uff0c\u5b98\u65b9\u63d0\u4f9b\u4e86\u4e24\u79cd\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\"><li>\u6700\u4f73\uff08\u6700\u51c6\u786e\uff09\u8bad\u7ec3\u7684 LSTM \u6a21\u578b\uff1a<a href=\"https:\/\/github.com\/tesseract-ocr\/tessdata_best\" target=\"_blank\" rel=\"noreferrer noopener\">https:\/\/github.com\/tesseract-ocr\/tessdata_best<\/a><\/li><li>\u652f\u6301\u65e7\u7248\u548c LSTM OCR \u5f15\u64ce\u7684\u8bad\u7ec3\u6a21\u578b\uff1a<a href=\"https:\/\/github.com\/tesseract-ocr\/tessdata\" target=\"_blank\" rel=\"noreferrer noopener\">https:\/\/github.com\/tesseract-ocr\/tessdata<\/a><\/li><\/ul>\n\n\n\n<p>\u6211\u4f7f\u7528\u7684\u6700\u4f73\u6a21\u578b\uff0c\u5176\u5b9e\u4e5f\u4e0d\u9700\u8981\u4e0b\u8f7d\u6240\u6709\u7684\u6a21\u578b\uff0c\u4e00\u822c\u800c\u8a00\u53ea\u9700\u8981\u7528\u5230\u4e2d\u6587\u548c\u82f1\u6587\u8bc6\u522b\uff0c\u56e0\u6b64\u4e0b\u8f7d <strong>chi_sim.traineddata<\/strong> \u548c <strong>eng.traineddata<\/strong>\uff08\u672c\u6587\u8d44\u6e90\u4e2d\u4e5f\u6709\u63d0\u4f9b\uff09\uff0c\u7136\u540e\u5c06\u8fd9\u4e24\u4e2a\u6587\u4ef6\u62f7\u8d1d\u5230 <strong>$HOME\/local\/share\/tessdata<\/strong> \u8def\u5f84\u4e0b<\/p>\n\n\n\n<p>________________________________________________________________________________________<\/p>\n\n\n\n<p>\u4f7f\u7528\u793a\u4f8b\uff1a<\/p>\n\n\n\n<p>\u672c\u6587\u8d44\u6e90\u6587\u4ef6\u4e0b\u8f7d\uff1a<a href=\"https:\/\/pan.baidu.com\/s\/12BXjUnWrCHn3zIM_gV8Ybg?pwd=4nf8\" target=\"_blank\" rel=\"noreferrer noopener\">https:\/\/pan.baidu.com\/s\/12BXjUnWrCHn3zIM_gV8Ybg?pwd=4nf8<\/a><\/p>\n\n\n\n<p>\u7528 jupyter \u6253\u5f00 ocr.ipynb \u5e76\u8fd0\u884c\u6574\u4e2a\u7b14\u8bb0\u672c\uff0c\u53ef\u4ee5\u67e5\u770b OCR \u6548\u679c<\/p>\n\n\n\n<p>\u56fe\u7247\u9884\u89c8<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img decoding=\"async\" width=\"927\" height=\"920\" data-src=\"https:\/\/blog.iyatt.com\/wp-content\/uploads\/2022\/04\/image-264.png\" alt=\"\" class=\"wp-image-5455 lazyload\" data-srcset=\"https:\/\/blog.iyatt.com\/wp-content\/uploads\/2022\/04\/image-264.png 927w, https:\/\/blog.iyatt.com\/wp-content\/uploads\/2022\/04\/image-264-300x298.png 300w, https:\/\/blog.iyatt.com\/wp-content\/uploads\/2022\/04\/image-264-150x150.png 150w, https:\/\/blog.iyatt.com\/wp-content\/uploads\/2022\/04\/image-264-768x762.png 768w\" data-sizes=\"(max-width: 927px) 100vw, 927px\" src=\"data:image\/svg+xml;base64,PHN2ZyB3aWR0aD0iMSIgaGVpZ2h0PSIxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciPjwvc3ZnPg==\" style=\"--smush-placeholder-width: 927px; --smush-placeholder-aspect-ratio: 927\/920;\" \/><\/figure>\n\n\n\n<p>\u6587\u5b57\u63d0\u53d6<\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img decoding=\"async\" width=\"1024\" height=\"752\" data-src=\"https:\/\/blog.iyatt.com\/wp-content\/uploads\/2022\/04\/image-265-1024x752.png\" alt=\"\" class=\"wp-image-5456 lazyload\" data-srcset=\"https:\/\/blog.iyatt.com\/wp-content\/uploads\/2022\/04\/image-265-1024x752.png 1024w, https:\/\/blog.iyatt.com\/wp-content\/uploads\/2022\/04\/image-265-300x220.png 300w, https:\/\/blog.iyatt.com\/wp-content\/uploads\/2022\/04\/image-265-768x564.png 768w, https:\/\/blog.iyatt.com\/wp-content\/uploads\/2022\/04\/image-265.png 1037w\" data-sizes=\"(max-width: 1024px) 100vw, 1024px\" src=\"data:image\/svg+xml;base64,PHN2ZyB3aWR0aD0iMSIgaGVpZ2h0PSIxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciPjwvc3ZnPg==\" style=\"--smush-placeholder-width: 1024px; --smush-placeholder-aspect-ratio: 1024\/752;\" \/><\/figure>\n\n\n\n<p><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6d4b\u8bd5\u73af\u5883\uff1a pytesseract \u4f9d\u8d56 tesseract-ocr\uff0c\u8fd9\u662f\u4e00\u4e2a\u5f00\u6e90\u7684 OCR \u9879\u76ee\uff0c\u9879\u76ee\u5730\u5740\uff1a [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"zakra_page_container_layout":"customizer","zakra_page_sidebar_layout":"customizer","zakra_remove_content_margin":false,"zakra_sidebar":"customizer","zakra_transparent_header":"customizer","zakra_logo":0,"zakra_main_header_style":"default","zakra_menu_item_color":"","zakra_menu_item_hover_color":"","zakra_menu_item_active_color":"","zakra_menu_active_style":"","zakra_page_header":true,"_lmt_disableupdate":"","_lmt_disable":"","footnotes":""},"categories":[1],"tags":[],"class_list":["post-5440","post","type-post","status-publish","format-standard","hentry","category-all"],"modified_by":"IYATT-yx","_links":{"self":[{"href":"https:\/\/blog.iyatt.com\/index.php?rest_route=\/wp\/v2\/posts\/5440","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/blog.iyatt.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/blog.iyatt.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/blog.iyatt.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/blog.iyatt.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=5440"}],"version-history":[{"count":0,"href":"https:\/\/blog.iyatt.com\/index.php?rest_route=\/wp\/v2\/posts\/5440\/revisions"}],"wp:attachment":[{"href":"https:\/\/blog.iyatt.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=5440"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/blog.iyatt.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=5440"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/blog.iyatt.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=5440"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}