SAM/detect_c.py

91 lines
3.5 KiB
Python

import time
from pathlib import Path
import cv2
import torch
from PIL import ImageGrab
import numpy as np
import os
import glob
from models.common import DetectMultiBackend
from utils.general import non_max_suppression, scale_boxes
from utils.plots import Annotator, colors
from utils.augmentations import letterbox
from utils.torch_utils import select_device
FILE = Path(__file__).resolve()
folder_path = R'C:\Users\lengdan\Desktop\yolov5-master\data\images' # 本地文件夹
camera = 2 # 0调用本地相机, 1检测文件夹中的图像, 2检测屏幕上内容
class mydataload:
def __init__(self):
self.count = 0
if camera == 0:
self.cap = cv2.VideoCapture(0)
def __iter__(self):
return self
def __next__(self):
if camera == 0:
_, im0 = self.cap.read()
elif camera == 1:
file_list = glob.glob(os.path.join(folder_path, '*.jpg'))
# 对文件列表按时间戳排序,以确保最新添加的图像排在最后面
file_list.sort(key=os.path.getmtime)
im0 = cv2.imread(file_list[self.count])
self.count = self.count if self.count == len(file_list) - 1 else self.count + 1
else: # camera 检测屏幕上的内容
# 指定截图区域的左上角和右下角坐标
x1, y1 = 1000, 100 # 左上角
x2, y2 = 1900, 1000 # 右下角
# 截取屏幕区域
img = ImageGrab.grab(bbox=(x1, y1, x2, y2))
im0 = np.array(img)
im = letterbox(im0, 640, auto=True)[0] # padded resize
im = im.transpose((2, 0, 1)) # HWC to CHW, BGR to RGB
if camera != 2:
im = im[::-1]
im = np.ascontiguousarray(im) # contiguous
return im, im0
def get_image(model, im, im0s, conf_thres=0.5, iou_thres=0.5, line_thickness=3):
# temp_list = []
pred = model(im, visualize=False)
pred = non_max_suppression(pred, conf_thres, iou_thres, None, False, max_det=1000)
for i, det in enumerate(pred):
im0, names = im0s.copy(), model.names
annotator = Annotator(im0, line_width=line_thickness, example=str(names))
if len(det):
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
for *xyxy, conf, cls in reversed(det):
c = int(cls)
# temp_list.append((xyxy[0].item(), xyxy[1].item(), xyxy[2].item(), xyxy[3].item()))
label = f'{names[c]} {conf:.2f}'
annotator.box_label(xyxy, label, color=colors(c, True))
return annotator.result()
if __name__ == "__main__":
device = select_device('0')
model = DetectMultiBackend('7_29_last.pt', device=device, dnn=False, data='', fp16=False)
dataset = mydataload()
model.warmup(imgsz=(1, 3, 640, 640)) # warmup
for im, im0s in dataset:
t0 = time.time()
im = torch.from_numpy(im).to(model.device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
im0 = get_image(model, im, im0s)
if camera == 2:
im0 = cv2.cvtColor(im0, cv2.COLOR_BGR2RGB)
cv2.namedWindow('1', cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)
cv2.resizeWindow('1', im0.shape[1] // 2, im0.shape[0] // 2)
cv2.imshow('1', im0)
if cv2.waitKey(1) == ord('Q'):
exit(0)
print(time.time() - t0)