Windows和Linux下使用Python搭建一个图片OCR工具
2026-04-07 07:07:01
浏览次数:0
ocr-tool/
├── main.py # 主程序
├── requirements.txt # 依赖包
├── config.yaml # 配置文件
├── gui.py # 图形界面
├── cli.py # 命令行界面
├── utils/
│ ├── image_processor.py
│ └── ocr_engine.py
└── README.md
Pillow>=9.0.0
pytesseract>=0.3.10
opencv-python>=4.8.0
paddlepaddle>=2.5.0
paddleocr>=2.7.0
pyqt5>=5.15.0
pyyaml>=6.0
numpy>=1.24.0
python-dotenv>=1.0.0
Windows:
# 安装Tesseract OCR
# 下载地址: https://github.com/UB-Mannheim/tesseract/wiki
# 安装后需要将Tesseract添加到系统PATH
# 安装Python依赖
pip install -r requirements.txt
Linux (Ubuntu/Debian):
# 安装系统依赖
sudo apt update
sudo apt install -y tesseract-ocr tesseract-ocr-chi-sim
sudo apt install -y libgl1-mesa-glx libglib2.0-0
# 安装Python依赖
pip install -r requirements.txt
#!/usr/bin/env python3
"""
跨平台OCR工具主程序
支持Windows和Linux系统
"""
import sys
import os
from pathlib import Path
def setup_environment():
"""设置跨平台环境"""
if sys.platform == 'win32':
# Windows下设置Tesseract路径
tesseract_path = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
if os.path.exists(tesseract_path):
os.environ['TESSERACT_CMD'] = tesseract_path
# 创建必要的目录
Path('output').mkdir(exist_ok=True)
Path('logs').mkdir(exist_ok=True)
def main():
"""主程序入口"""
setup_environment()
if len(sys.argv) > 1 and sys.argv[1] == '--cli':
from cli import main as cli_main
cli_main()
else:
from gui import main as gui_main
gui_main()
if __name__ == '__main__':
main()
import pytesseract
import cv2
import numpy as np
from PIL import Image
import logging
from typing import List, Tuple, Optional, Dict
import os
logger = logging.getLogger(__name__)
class OCREngine:
"""OCR引擎封装类"""
def __init__(self, lang='eng+chi_sim', engine='tesseract'):
"""
初始化OCR引擎
Args:
lang: 语言设置,如 'eng', 'chi_sim', 'eng+chi_sim'
engine: OCR引擎类型 ('tesseract' 或 'paddle')
"""
self.lang = lang
self.engine_type = engine
self.paddle_ocr = None
if engine == 'paddle':
try:
from paddleocr import PaddleOCR
self.paddle_ocr = PaddleOCR(use_angle_cls=True, lang='ch')
logger.info("PaddleOCR引擎初始化成功")
except ImportError:
logger.warning("PaddleOCR未安装,将使用Tesseract")
self.engine_type = 'tesseract'
logger.info(f"OCR引擎初始化完成,类型: {self.engine_type}, 语言: {lang}")
def preprocess_image(self, image_path: str) -> np.ndarray:
"""
图像预处理
Args:
image_path: 图像路径
Returns:
预处理后的图像数组
"""
try:
# 读取图像
if isinstance(image_path, str):
img = cv2.imread(image_path)
else:
img = image_path
if img is None:
raise ValueError("无法读取图像")
# 转换为灰度图
if len(img.shape) == 3:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
else:
gray = img
# 应用自适应阈值
processed = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2
)
# 降噪
processed = cv2.medianBlur(processed, 3)
return processed
except Exception as e:
logger.error(f"图像预处理失败: {e}")
raise
def ocr_with_tesseract(self, image_path: str) -> Dict:
"""
使用Tesseract进行OCR识别
Args:
image_path: 图像路径
Returns:
识别结果字典
"""
try:
# 预处理图像
processed_img = self.preprocess_image(image_path)
# 转换为PIL Image
pil_img = Image.fromarray(processed_img)
# 配置Tesseract参数
custom_config = r'--oem 3 --psm 6'
# 获取文本
text = pytesseract.image_to_string(
pil_img,
lang=self.lang,
config=custom_config
)
# 获取详细数据
data = pytesseract.image_to_data(
pil_img,
lang=self.lang,
config=custom_config,
output_type=pytesseract.Output.DICT
)
result = {
'text': text.strip(),
'confidence': float(np.mean([float(x) for x in data['conf'] if int(x) > 0])),
'engine': 'tesseract',
'language': self.lang,
'details': data
}
logger.info(f"Tesseract OCR识别完成,置信度: {result['confidence']:.2f}")
return result
except Exception as e:
logger.error(f"Tesseract OCR失败: {e}")
raise
def ocr_with_paddle(self, image_path: str) -> Dict:
"""
使用PaddleOCR进行识别
Args:
image_path: 图像路径
Returns:
识别结果字典
"""
try:
if self.paddle_ocr is None:
raise ValueError("PaddleOCR未初始化")
# 执行OCR
result = self.paddle_ocr.ocr(image_path, cls=True)
# 解析结果
text_blocks = []
all_text = []
if result[0] is not None:
for line in result[0]:
box = line[0]
text = line[1][0]
confidence = line[1][1]
text_blocks.append({
'bbox': box,
'text': text,
'confidence': confidence
})
all_text.append(text)
result_text = '\n'.join(all_text)
return {
'text': result_text,
'confidence': float(np.mean([b['confidence'] for b in text_blocks])) if text_blocks else 0.0,
'engine': 'paddle',
'language': 'ch',
'details': text_blocks
}
except Exception as e:
logger.error(f"PaddleOCR失败: {e}")
raise
def recognize(self, image_path: str) -> Dict:
"""
执行OCR识别
Args:
image_path: 图像路径
Returns:
识别结果
"""
logger.info(f"开始OCR识别: {image_path}")
if self.engine_type == 'paddle':
return self.ocr_with_paddle(image_path)
else:
return self.ocr_with_tesseract(image_path)
def batch_recognize(self, image_paths: List[str]) -> List[Dict]:
"""
批量OCR识别
Args:
image_paths: 图像路径列表
Returns:
识别结果列表
"""
results = []
for i, img_path in enumerate(image_paths, 1):
try:
logger.info(f"处理第 {i}/{len(image_paths)} 张图片: {img_path}")
result = self.recognize(img_path)
result['filename'] = os.path.basename(img_path)
results.append(result)
except Exception as e:
logger.error(f"处理图片失败 {img_path}: {e}")
results.append({
'filename': os.path.basename(img_path),
'error': str(e),
'text': ''
})
return results
import cv2
import numpy as np
from PIL import Image
import io
from typing import Union, Tuple
import logging
logger = logging.getLogger(__name__)
class ImageProcessor:
"""图像处理工具类"""
@staticmethod
def resize_image(image: np.ndarray, max_size: Tuple[int, int] = (2000, 2000)) -> np.ndarray:
"""
调整图像大小
Args:
image: 输入图像
max_size: 最大尺寸 (宽, 高)
Returns:
调整后的图像
"""
height, width = image.shape[:2]
max_width, max_height = max_size
if width > max_width or height > max_height:
scale = min(max_width / width, max_height / height)
new_width = int(width * scale)
new_height = int(height * scale)
resized = cv2.resize(image, (new_width, new_height),
interpolation=cv2.INTER_AREA)
logger.info(f"图像从 {width}x{height} 调整到 {new_width}x{new_height}")
return resized
return image
@staticmethod
def enhance_image(image: np.ndarray) -> np.ndarray:
"""
图像增强处理
Args:
image: 输入图像
Returns:
增强后的图像
"""
# 转换为灰度图
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image.copy()
# 直方图均衡化
enhanced = cv2.equalizeHist(gray)
# 锐化
kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
enhanced = cv2.filter2D(enhanced, -1, kernel)
return enhanced
@staticmethod
def deskew_image(image: np.ndarray) -> np.ndarray:
"""
图像纠偏(自动旋转校正)
Args:
image: 输入图像
Returns:
纠偏后的图像
"""
# 转换为二值图像
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image.copy()
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# 查找轮廓
coords = np.column_stack(np.where(binary > 0))
if len(coords) > 0:
# 获取最小外接矩形
angle = cv2.minAreaRect(coords)[-1]
# 调整角度
if angle < -45:
angle = 90 + angle
elif angle > 45:
angle = angle - 90
# 旋转图像
if abs(angle) > 1.0: # 只对明显倾斜的图像进行旋转
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h),
flags=cv2.INTER_CUBIC,
borderMode=cv2.BORDER_REPLICATE)
logger.info(f"图像纠偏: 旋转 {angle:.2f} 度")
return rotated
return image
@staticmethod
def bytes_to_image(image_bytes: bytes) -> np.ndarray:
"""
字节流转换为图像
Args:
image_bytes: 图像字节数据
Returns:
OpenCV图像数组
"""
nparr = np.frombuffer(image_bytes, np.uint8)
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
return img
@staticmethod
def image_to_bytes(image: np.ndarray, format: str = 'PNG') -> bytes:
"""
图像转换为字节流
Args:
image: OpenCV图像
format: 图像格式
Returns:
字节数据
"""
success, encoded_image = cv2.imencode(f'.{format.lower()}', image)
if success:
return encoded_image.tobytes()
raise ValueError(f"图像编码失败: {format}")
# OCR工具配置
ocr:
default_language: "eng+chi_sim" # 默认语言
default_engine: "tesseract" # 默认引擎 (tesseract/paddle)
tesseract_path: "" # Tesseract路径(Windows需要设置)
image:
max_width: 2000
max_height: 2000
enable_preprocess: true
enable_deskew: true
output:
default_format: "txt" # 输出格式 (txt/json)
save_to_file: true
output_dir: "./output"
gui:
window_width: 1200
window_height: 800
theme: "light" # light/dark
#!/usr/bin/env python3
"""
OCR工具命令行界面
"""
import argparse
import sys
import os
import json
from pathlib import Path
from datetime import datetime
import logging
from utils.ocr_engine import OCREngine
from utils.image_processor import ImageProcessor
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def save_results(results, output_dir='./output', format='txt'):
"""保存识别结果"""
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
if format == 'json':
output_file = Path(output_dir) / f'ocr_results_{timestamp}.json'
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
logger.info(f"结果已保存到: {output_file}")
elif format == 'txt':
output_file = Path(output_dir) / f'ocr_results_{timestamp}.txt'
with open(output_file, 'w', encoding='utf-8') as f:
for result in results:
if 'filename' in result:
f.write(f"=== 文件: {result['filename']} ===\n")
if 'text' in result:
f.write(result['text'])
f.write("\n\n")
if 'error' in result:
f.write(f"错误: {result['error']}\n\n")
logger.info(f"结果已保存到: {output_file}")
def main():
parser = argparse.ArgumentParser(description='OCR图片识别工具')
parser.add_argument('input', nargs='+', help='输入图片路径或目录')
parser.add_argument('-l', '--language', default='eng+chi_sim',
help='OCR语言 (默认: eng+chi_sim)')
parser.add_argument('-e', '--engine', default='tesseract',
choices=['tesseract', 'paddle'],
help='OCR引擎 (默认: tesseract)')
parser.add_argument('-o', '--output', default='./output',
help='输出目录 (默认: ./output)')
parser.add_argument('-f', '--format', default='txt',
choices=['txt', 'json'],
help='输出格式 (默认: txt)')
parser.add_argument('--preprocess', action='store_true',
help='启用图像预处理')
parser.add_argument('--deskew', action='store_true',
help='启用图像纠偏')
parser.add_argument('--batch', action='store_true',
help='批量处理模式')
args = parser.parse_args()
# 创建输出目录
Path(args.output).mkdir(parents=True, exist_ok=True)
# 收集要处理的图片
image_paths = []
for input_path in args.input:
path = Path(input_path)
if path.is_dir():
# 遍历目录下的图片文件
for ext in ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.tiff']:
image_paths.extend(path.glob(ext))
elif path.is_file():
image_paths.append(path)
if not image_paths:
logger.error("未找到图片文件")
return 1
# 初始化OCR引擎
try:
ocr = OCREngine(lang=args.language, engine=args.engine)
except Exception as e:
logger.error(f"OCR引擎初始化失败: {e}")
return 1
# 处理图片
results = []
processor = ImageProcessor()
for img_path in image_paths:
try:
logger.info(f"处理图片: {img_path}")
# 读取图片
image = cv2.imread(str(img_path))
if image is None:
logger.error(f"无法读取图片: {img_path}")
continue
# 图像预处理
if args.preprocess:
image = processor.enhance_image(image)
if args.deskew:
image = processor.deskew_image(image)
# 调整大小
image = processor.resize_image(image)
# OCR识别
result = ocr.recognize(image)
result['filename'] = img_path.name
results.append(result)
# 显示结果
print(f"\n文件: {img_path.name}")
print(f"置信度: {result.get('confidence', 0):.2f}")
print(f"识别结果:\n{result.get('text', '')}")
print("-" * 50)
except Exception as e:
logger.error(f"处理失败 {img_path}: {e}")
results.append({
'filename': img_path.name,
'error': str(e),
'text': ''
})
# 保存结果
if results:
save_results(results, args.output, args.format)
return 0
if __name__ == '__main__':
sys.exit(main())
#!/usr/bin/env python3
"""
OCR工具图形界面
"""
import sys
import os
from pathlib import Path
from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,
QHBoxLayout, QPushButton, QTextEdit, QLabel,
QFileDialog, QComboBox, QProgressBar, QMessageBox,
QSplitter, QGroupBox, QCheckBox, QSpinBox, QTabWidget)
from PyQt5.QtCore import Qt, QThread, pyqtSignal
from PyQt5.QtGui import QFont, QPixmap, QImage
import cv2
import json
from datetime import datetime
from utils.ocr_engine import OCREngine
from utils.image_processor import ImageProcessor
class OCRWorker(QThread):
"""OCR工作线程"""
progress = pyqtSignal(int)
result_ready = pyqtSignal(dict)
finished = pyqtSignal()
error = pyqtSignal(str)
def __init__(self, image_paths, language, engine, preprocess, deskew):
super().__init__()
self.image_paths = image_paths
self.language = language
self.engine = engine
self.preprocess = preprocess
self.deskew = deskew
self.canceled = False
def run(self):
try:
ocr = OCREngine(lang=self.language, engine=self.engine)
processor = ImageProcessor()
for i, img_path in enumerate(self.image_paths):
if self.canceled:
break
try:
self.progress.emit(int((i / len(self.image_paths)) * 100))
# 读取并处理图片
image = cv2.imread(str(img_path))
if image is None:
continue
if self.preprocess:
image = processor.enhance_image(image)
if self.deskew:
image = processor.deskew_image(image)
image = processor.resize_image(image)
# OCR识别
result = ocr.recognize(image)
result['filename'] = str(img_path.name)
self.result_ready.emit(result)
except Exception as e:
self.result_ready.emit({
'filename': str(img_path.name),
'error': str(e),
'text': ''
})
self.progress.emit(100)
except Exception as e:
self.error.emit(str(e))
finally:
self.finished.emit()
def cancel(self):
self.canceled = True
class OCREditor(QMainWindow):
"""OCR编辑器主窗口"""
def __init__(self):
super().__init__()
self.image_paths = []
self.current_image = None
self.worker = None
self.init_ui()
def init_ui(self):
self.setWindowTitle('OCR图片识别工具')
self.setGeometry(100, 100, 1200, 800)
# 创建中心部件
central_widget = QWidget()
self.setCentralWidget(central_widget)
# 主布局
main_layout = QVBoxLayout(central_widget)
# 工具栏
toolbar = self.create_toolbar()
main_layout.addLayout(toolbar)
# 分割器
splitter = QSplitter(Qt.Horizontal)
# 左侧图片区域
left_widget = QWidget()
left_layout = QVBoxLayout(left_widget)
self.image_label = QLabel('点击"选择图片"加载图片')
self.image_label.setAlignment(Qt.AlignCenter)
self.image_label.setStyleSheet('border: 1px solid #ccc;')
self.image_label.setMinimumSize(400, 300)
left_layout.addWidget(self.image_label)
# 图片信息
info_group = QGroupBox('图片信息')
info_layout = QVBoxLayout()
self.info_text = QLabel('')
info_layout.addWidget(self.info_text)
info_group.setLayout(info_layout)
left_layout.addWidget(info_group)
splitter.addWidget(left_widget)
# 右侧结果区域
right_widget = QWidget()
right_layout = QVBoxLayout(right_widget)
# 标签页
self.tab_widget = QTabWidget()
# 文本结果标签页
text_tab = QWidget()
text_layout = QVBoxLayout(text_tab)
self.result_text = QTextEdit()
self.result_text.setFont(QFont('Consolas', 10))
text_layout.addWidget(self.result_text)
self.tab_widget.addTab(text_tab, '文本结果')
# JSON结果标签页
json_tab = QWidget()
json_layout = QVBoxLayout(json_tab)
self.json_text = QTextEdit()
self.json_text.setFont(QFont('Consolas', 10))
json_layout.addWidget(self.json_text)
self.tab_widget.addTab(json_tab, 'JSON视图')
right_layout.addWidget(self.tab_widget)
# 统计信息
stats_group = QGroupBox('识别统计')
stats_layout = QHBoxLayout()
self.confidence_label = QLabel('置信度: --')
self.language_label = QLabel('语言: --')
self.engine_label = QLabel('引擎: --')
stats_layout.addWidget(self.confidence_label)
stats_layout.addWidget(self.language_label)
stats_layout.addWidget(self.engine_label)
stats_group.setLayout(stats_layout)
right_layout.addWidget(stats_group)
splitter.addWidget(right_widget)
# 设置分割器比例
splitter.setSizes([400, 800])
main_layout.addWidget(splitter)
# 进度条
self.progress_bar = QProgressBar()
self.progress_bar.setVisible(False)
main_layout.addWidget(self.progress_bar)
def create_toolbar(self):
toolbar = QHBoxLayout()
# 选择图片按钮
self.select_btn = QPushButton('选择图片')
self.select_btn.clicked.connect(self.select_images)
toolbar.addWidget(self.select_btn)
# 选择文件夹按钮
self.select_dir_btn = QPushButton('选择文件夹')
self.select_dir_btn.clicked.connect(self.select_directory)
toolbar.addWidget(self.select_dir_btn)
# 语言选择
toolbar.addWidget(QLabel('语言:'))
self.lang_combo = QComboBox()
self.lang_combo.addItems(['英文', '中文', '英文+中文', '日语', '韩语'])
self.lang_combo.setCurrentIndex(2)
toolbar.addWidget(self.lang_combo)
# OCR引擎选择
toolbar.addWidget(QLabel('引擎:'))
self.engine_combo = QComboBox()
self.engine_combo.addItems(['Tesseract', 'PaddleOCR'])
toolbar.addWidget(self.engine_combo)
# 预处理选项
self.preprocess_check = QCheckBox('图像预处理')
self.preprocess_check.setChecked(True)
toolbar.addWidget(self.preprocess_check)
self.deskew_check = QCheckBox('自动纠偏')
toolbar.addWidget(self.deskew_check)
# 开始识别按钮
self.start_btn = QPushButton('开始识别')
self.start_btn.clicked.connect(self.start_ocr)
self.start_btn.setStyleSheet('background-color: #4CAF50; color: white;')
toolbar.addWidget(self.start_btn)
# 停止按钮
self.stop_btn = QPushButton('停止')
self.stop_btn.clicked.connect(self.stop_ocr)
self.stop_btn.setEnabled(False)
toolbar.addWidget(self.stop_btn)
# 保存按钮
self.save_btn = QPushButton('保存结果')
self.save_btn.clicked.connect(self.save_results)
toolbar.addWidget(self.save_btn)
toolbar.addStretch()
return toolbar
def select_images(self):
"""选择图片文件"""
files, _ = QFileDialog.getOpenFileNames(
self, '选择图片文件', '',
'图片文件 (*.png *.jpg *.jpeg *.bmp *.tiff)'
)
if files:
self.image_paths = [Path(f) for f in files]
self.load_first_image()
def select_directory(self):
"""选择文件夹"""
directory = QFileDialog.getExistingDirectory(self, '选择文件夹')
if directory:
image_extensions = ['*.png', '*.jpg', '*.jpeg', '*.bmp', '*.tiff']
self.image_paths = []
for ext in image_extensions:
self.image_paths.extend(Path(directory).glob(ext))
if self.image_paths:
self.load_first_image()
def load_first_image(self):
"""加载第一张图片预览"""
if self.image_paths:
try:
pixmap = QPixmap(str(self.image_paths[0]))
scaled_pixmap = pixmap.scaled(
self.image_label.size(),
Qt.KeepAspectRatio,
Qt.SmoothTransformation
)
self.image_label.setPixmap(scaled_pixmap)
# 显示图片信息
info = f"文件: {self.image_paths[0].name}\n"
info += f"大小: {pixmap.width()} x {pixmap.height()}\n"
info += f"格式: {self.image_paths[0].suffix}\n"
info += f"总数: {len(self.image_paths)} 张图片"
self.info_text.setText(info)
except Exception as e:
QMessageBox.warning(self, '错误', f'加载图片失败: {e}')
def start_ocr(self):
"""开始OCR识别"""
if not self.image_paths:
QMessageBox.warning(self, '警告', '请先选择图片!')
return
# 禁用UI
self.select_btn.setEnabled(False)
self.select_dir_btn.setEnabled(False)
self.start_btn.setEnabled(False)
self.stop_btn.setEnabled(True)
# 清空结果
self.result_text.clear()
self.json_text.clear()
# 获取参数
lang_map = {
'英文': 'eng',
'中文': 'chi_sim',
'英文+中文': 'eng+chi_sim',
'日语': 'jpn',
'韩语': 'kor'
}
language = lang_map.get(self.lang_combo.currentText(), 'eng+chi_sim')
engine = 'tesseract' if self.engine_combo.currentText() == 'Tesseract' else 'paddle'
# 创建工作线程
self.worker = OCRWorker(
self.image_paths,
language,
engine,
self.preprocess_check.isChecked(),
self.deskew_check.isChecked()
)
# 连接信号
self.worker.progress.connect(self.update_progress)
self.worker.result_ready.connect(self.on_result_ready)
self.worker.finished.connect(self.on_ocr_finished)
self.worker.error.connect(self.on_ocr_error)
# 显示进度条
self.progress_bar.setVisible(True)
self.progress_bar.setValue(0)
# 启动线程
self.worker.start()
def stop_ocr(self):
"""停止OCR识别"""
if self.worker:
self.worker.cancel()
self.worker.wait()
self.on_ocr_finished()
def update_progress(self, value):
"""更新进度条"""
self.progress_bar.setValue(value)
def on_result_ready(self, result):
"""处理OCR结果"""
if 'error' in result:
text = f"错误: {result['error']}"
else:
text = result.get('text', '')
confidence = result.get('confidence', 0)
# 更新统计信息
self.confidence_label.setText(f'置信度: {confidence:.2%}')
self.language_label.setText(f'语言: {result.get("language", "")}')
self.engine_label.setText(f'引擎: {result.get("engine", "").capitalize()}')
# 添加到文本结果
self.result_text.append(f"=== {result.get('filename', 'Unknown')} ===\n")
self.result_text.append(text)
self.result_text.append("\n")
# 更新JSON视图
self.json_text.setPlainText(json.dumps(result, ensure_ascii=False, indent=2))
def on_ocr_finished(self):
"""OCR完成"""
self.select_btn.setEnabled(True)
self.select_dir_btn.setEnabled(True)
self.start_btn.setEnabled(True)
self.stop_btn.setEnabled(False)
self.progress_bar.setVisible(False)
self.worker = None
QMessageBox.information(self, '完成', 'OCR识别完成!')
def on_ocr_error(self, error_msg):
"""OCR出错"""
QMessageBox.critical(self, '错误', f'OCR识别出错:\n{error_msg}')
self.on_ocr_finished()
def save_results(self):
"""保存结果到文件"""
if not self.result_text.toPlainText().strip():
QMessageBox.warning(self, '警告', '没有可保存的结果!')
return
file_path, _ = QFileDialog.getSaveFileName(
self, '保存结果',
f'ocr_result_{datetime.now().strftime("%Y%m%d_%H%M%S")}.txt',
'文本文件 (*.txt);;JSON文件 (*.json)'
)
if file_path:
try:
with open(file_path, 'w', encoding='utf-8') as f:
if file_path.endswith('.json'):
# 尝试解析结果
try:
data = json.loads(self.json_text.toPlainText())
json.dump(data, f, ensure_ascii=False, indent=2)
except:
f.write(self.json_text.toPlainText())
else:
f.write(self.result_text.toPlainText())
QMessageBox.information(self, '成功', f'结果已保存到:\n{file_path}')
except Exception as e:
QMessageBox.critical(self, '错误', f'保存失败: {e}')
def main():
app = QApplication(sys.argv)
app.setStyle('Fusion') # 使用Fusion样式以获得更好的跨平台体验
# 设置应用信息
app.setApplicationName('OCR图片识别工具')
app.setOrganizationName('OCR Tool')
editor = OCREditor()
editor.show()
sys.exit(app.exec_())
if __name__ == '__main__':
main()
# 单张图片识别
python cli.py image.jpg
# 批量识别
python cli.py folder_path --batch
# 指定语言和引擎
python cli.py image.jpg -l chi_sim -e paddle
# 启用预处理
python cli.py image.jpg --preprocess --deskew
# 保存为JSON格式
python cli.py image.jpg -f json -o ./results
# 启动GUI
python main.py
# 或直接运行
python gui.py
这个OCR工具完全跨平台,只需安装相应的依赖即可在Windows和Linux上运行。您可以根据需要调整配置和功能。