Computer Vision - Class Notes

Day #8 - Computer Vision

Computer Vision

In [1]:
import cv2
In [2]:
cv2.__version__
Out[2]:
'2.4.11'
In [3]:
# Load an image using 'imread' specifying the path to image
input = cv2.imread('input.jpg')
In [4]:
# To display out image variable, we us 'imkshow'
# The first parameter will be title shown on image window
# The second parametr is the image variable
cv2.imshow('Hello World',input)

# How long you keep the window open (time is in milliseconds here)
# Code will sit and wait at this line until I hit a key 
cv2.waitKey()

# This closes all open windows
# Failure to place this will cause your program to hang
cv2.destroyAllWindows()
In [5]:
# Importy numpy
import numpy as np
In [6]:
# Dimensions of our image array
input.shape
Out[6]:
(830L, 1245L, 3L)
In [7]:
# Our image array 
print input
[[[12 18 31]
  [12 18 31]
  [11 17 30]
  ...,
  [21 24 38]
  [21 24 38]
  [21 24 38]]

 [[12 18 31]
  [12 18 31]
  [12 18 31]
  ...,
  [21 24 38]
  [21 24 38]
  [21 24 38]]

 [[12 18 31]
  [12 18 31]
  [12 18 31]
  ...,
  [21 24 38]
  [21 24 38]
  [21 24 38]]

 ...,
 [[ 4  7 12]
  [ 4  7 12]
  [ 3  7 12]
  ...,
  [18 23 38]
  [19 24 39]
  [20 25 40]]

 [[ 3  6 11]
  [ 3  6 11]
  [ 3  6 11]
  ...,
  [18 23 38]
  [19 24 39]
  [20 25 40]]

 [[ 3  6 11]
  [ 3  6 11]
  [ 2  5 10]
  ...,
  [18 23 38]
  [18 23 38]
  [19 24 39]]]

Grayscaling

In [8]:
import cv2

# Load our input image
image = cv2.imread('input.jpg')
cv2.imshow('Original',input)
cv2.waitKey()

# We use cvtColor, to convert to grayscale
gray_image = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)

cv2.imshow('Grayscale',gray_image)
cv2.waitKey()
cv2.destroyAllWindows()
In [9]:
# Antoher faster method for grayscaling
# Reads in the image immediately as grayscale

img = cv2.imread('input.jpg',0)

cv2.imshow('Grayscale',img)
cv2.waitKey()
cv2.destroyAllWindows()
In [10]:
# Looking at the individual color levels for specific pixels

B, G, R = image[0,0]
B1, G1, R1 = image[10,50]
print B, G, R
print B1, G1, R1
print image.shape
12 18 31
13 19 32
(830L, 1245L, 3L)
In [11]:
# See value changes when we convert to grayscale

gray_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
print gray_img[0,0], gray_img[10,50]
print gray_img.shape
21 22
(830L, 1245L)
In [12]:
# Converting image from Color image to HSV image
# Hue: 0 - 180, Saturation: 0 - 255, Value : 0 - 255

img = cv2.imread('input.jpg')

hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

cv2.imshow('HSV image', hsv_image)
cv2.imshow('Hue Channel', hsv_image[:,:,0])
cv2.imshow('Saturation Channel',hsv_image[:,:,1])
cv2.imshow('Value Channel',hsv_image[:,:,2])

cv2.waitKey()
cv2.destroyAllWindows()
In [13]:
image = cv2.imread('input.jpg')

# OpenCV's 'split' function splits the image into each color index
B, G, R = cv2.split(image)

print B.shape
cv2.imshow('Red',R)
cv2.imshow('Green',G)
cv2.imshow('Blue',B)
cv2.waitKey()
cv2.destroyAllWindows()

# Let's re-make the original image
merged = cv2.merge([B, G, R])
cv2.imshow('Merged',merged)

# Let's amplify the blue color
merged = cv2.merge([B+100,G,R])
cv2.imshow('Merged with Blue Amplified',merged)

cv2.waitKey()
cv2.destroyAllWindows()
(830L, 1245L)
In [14]:
# Translating the matrix using a Transform Matrix

import cv2
import numpy as np

image = cv2.imread('input.jpg')

# Store Height and width of the image
height, width = image.shape[:2]

quarter_height, quarter_width = height/4, width/4

#

# T is our translation matrix
T = np.float32([[1,0,quarter_width],[0,1,quarter_height]])

# We use warpAffine to transform the image using the matrix, T
img_translation = cv2.warpAffine(image,T,(width,height))
cv2.imshow('Translation',img_translation)
cv2.waitKey()
cv2.destroyAllWindows()
In [15]:
# Let's take a look at T

print T
[[   1.    0.  311.]
 [   0.    1.  207.]]
In [16]:
# Rotating the matrix

import cv2
import numpy as np

image = cv2.imread('input.jpg')
height, width = image.shape[:2]

# Divide by two to rotate the image around its center
rotation_matrix = cv2.getRotationMatrix2D((width/2,height/2),-25,0.5)

rotated_image = cv2.warpAffine(image,rotation_matrix,(width,height))

cv2.imshow('Rotated Image',rotated_image)
cv2.waitKey()
cv2.destroyAllWindows()

Edge Detection and Image Gradients

In [17]:
import cv2
import numpy as np

image = cv2.imread('input.jpg',0)

height, width = image.shape[:2]

# Extract Sobel Edges
sobel_x = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=5)
sobel_y = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=5)

cv2.imshow('Original',image)
cv2.waitKey()
cv2.imshow('Sobel X',sobel_x)
cv2.waitKey()
cv2.imshow('Sobel Y', sobel_y)
cv2.waitKey()

sobel_OR = cv2.bitwise_or(sobel_x, sobel_y)
cv2.imshow('Sobel_OR',sobel_OR)
cv2.waitKey()

laplacian = cv2.Laplacian(image, cv2.CV_64F)
cv2.imshow('Laplacian',laplacian)
cv2.waitKey()

# Canny Edge Detection uses gradient values as thresholds
# The first threshold gradient
canny = cv2.Canny(image, 50, 120)
cv2.imshow('Canny',canny)
cv2.waitKey()

cv2.destroyAllWindows()

Project 1 - Live Sketch Using Webcam

In [18]:
import cv2
import numpy as np

# Our sketch generating function
def sketch(image):
    # Convert image to grayscale
    img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Clean up image using Guassian Blur
    img_gray_blur = cv2.GaussianBlur(img_gray, (5,5),0)

    # Extract Edges
    canny_edges = cv2.Canny(img_gray_blur,10,80)

    # Do an invert binarize the image
    ret, mask = cv2.threshold(canny_edges,70,255,cv2.THRESH_BINARY_INV)
    return mask

# Initialize webcam, cap is the object provided by VideoCapture
# It contains a boolean indicating if it was successful (ret)
# It also contains the images collected fromt eh webcam (frame)

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    cv2.imshow('Our Live Sketcher',sketch(frame))
    if cv2.waitKey(1) == 13: #13 is the Enter Key
        break

# Release camera and close windows
cap.release()
cv2.destroyAllWindows()

Object Detection

Finding Waldo - Template matching

In [19]:
# Template matching - We have (1) image of the waldo and anther image that contains Waldo
# We take the smaller image a drag it over all the values of the photos
# We choose the matrix within the photo that has the smallest difference in value

import cv2
import numpy as np

# Load input image and convert to grayscale
image = cv2.imread('WaldoBeach.jpg')

cv2.imshow('Where is Waldo?',image)
cv2.waitKey(0)
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)

# Load Template Image
template = cv2.imread('waldo.jpg',0) #read into grayscale

result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)

# Create Bounding Box
top_left = max_loc
bottom_right = (top_left[0]+50, top_left[1]+50)
cv2.rectangle(image, top_left, bottom_right, (0,0,255),3)

cv2.imshow('Where is Waldo?',image)
cv2.waitKey(0)
cv2.destroyAllWindows()

Oriented FAST and Rotated BRIEF (ORB)

In [ ]:
import cv2
import numpy as np

image = cv2.imread('input.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Create ORB object, we can specify the number of key points we desire
orb = cv2.ORB(500)

# Determine key points
keypoints = orb.detect(gray,None)

# Obtain the descriptors
keypoints, descriptors = orb.compute(gray, keypoints)
print 'Number of keypoints Detected: ',len(keypoints)

# Draw rich keypoints on input image
image = cv2.drawKeypoints(image, keypoints, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

cv2.imshow('Feature Method - ORB', image)
cv2.waitKey()
cv2.destroyAllWindows()
Number of keypoints Detected:  500

Object Detection using ORB

In [ ]:
import cv2
import numpy as np


def ORB_detector(new_image, image_template):
    # Function that compares input image to template
    # It then returns the number of ORB matches between them

    image1 = cv2.cvtColor(new_image, cv2.COLOR_BGR2GRAY)

    # Create ORB detector with 1000 keypoints with a scaling pyramid factor of 1.2
    orb = cv2.ORB(1000, 1.2)

    # Detect keypoints of original image
    (kp1, des1) = orb.detectAndCompute(image1, None)

    # Detect keypoints of rotated image
    (kp2, des2) = orb.detectAndCompute(image_template, None)

    # Create matcher 
    # Note we're no longer using Flannbased matching
    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

    # Do matching
    matches = bf.match(des1,des2)

    # Sort the matches based on distance.  Least distance
    # is better
    matches = sorted(matches, key=lambda val: val.distance)

    return len(matches)

cap = cv2.VideoCapture(0)

# Load our image template, this is our reference image
image_template = cv2.imread('box_in_scene.png', 0)
# image_template = cv2.imread('images/kitkat.jpg', 0) 

while True:

    # Get webcam images
    ret, frame = cap.read()

    # Get height and width of webcam frame
    height, width = frame.shape[:2]

    # Define ROI Box Dimensions (Note some of these things should be outside the loop)
    top_left_x = width / 3
    top_left_y = (height / 2) + (height / 4)
    bottom_right_x = (width / 3) * 2
    bottom_right_y = (height / 2) - (height / 4)

    # Draw rectangular window for our region of interest
    cv2.rectangle(frame, (top_left_x,top_left_y), (bottom_right_x,bottom_right_y), 255, 3)

    # Crop window of observation we defined above
    cropped = frame[bottom_right_y:top_left_y , top_left_x:bottom_right_x]

    # Flip frame orientation horizontally
    frame = cv2.flip(frame,1)

    # Get number of ORB matches 
    matches = ORB_detector(cropped, image_template)

    # Display status string showing the current no. of matches 
    output_string = "Matches = " + str(matches)
    cv2.putText(frame, output_string, (50,450), cv2.FONT_HERSHEY_COMPLEX, 2, (250,0,150), 2)

    # Our threshold to indicate object deteciton
    # For new images or lightening conditions you may need to experiment a bit 
    # Note: The ORB detector to get the top 1000 matches, 350 is essentially a min 35% match
    threshold = 350

    # If matches exceed our threshold then object has been detected
    if matches > threshold:
        cv2.rectangle(frame, (top_left_x,top_left_y), (bottom_right_x,bottom_right_y), (0,255,0), 3)
        cv2.putText(frame,'Object Found',(50,50), cv2.FONT_HERSHEY_COMPLEX, 2 ,(0,255,0), 2)

    cv2.imshow('Object Detector using ORB', frame)

    if cv2.waitKey(1) == 13: #13 is the Enter Key
        break

cap.release()
cv2.destroyAllWindows()

Face Recognition

Step 1 - Create Training Data

In [ ]:
import cv2
import numpy as np

# Load HAAR face classifier
face_classifier = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

# Load functions
def face_extractor(img):
    # Function detects faces and returns the cropped face
    # If no face detected, it returns the input image

    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    faces = face_classifier.detectMultiScale(gray, 1.3, 5)

    if faces is ():
        return None

    # Crop all faces found
    for (x,y,w,h) in faces:
        cropped_face = img[y:y+h, x:x+w]

    return cropped_face

# Initialize Webcam
cap = cv2.VideoCapture(0)
count = 0

# Collect 100 samples of your face from webcam input
while True:

    ret, frame = cap.read()
    if face_extractor(frame) is not None:
        count += 1
        face = cv2.resize(face_extractor(frame), (200, 200))
        face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)

        # Save file in specified directory with unique name
        file_name_path = 'C:\\Users\\nwerner\\DevMasters\\Class Notes\\Day #8\\Faces\\Face Photo ' + str(count) + '.jpg'
        cv2.imwrite(file_name_path, face)

        # Put count on images and display live count
        cv2.putText(face, str(count), (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (0,255,0), 2)
        cv2.imshow('Face Cropper', face)

    else:
        print "Face not found"
        pass

    if cv2.waitKey(1) == 13 or count == 100: #13 is the Enter Key
        break

cap.release()
cv2.destroyAllWindows()
print "Collecting Samples Complete"

Step 2 - Train Model

In [ ]:
import cv2
import numpy as np
from os import listdir
from os.path import isfile, join

# Get the training data we previously made
data_path = 'C:\\Users\\nwerner\\DevMasters\\Class Notes\\Day #8\\Faces\\'
onlyfiles = [f for f in listdir(data_path) if isfile(join(data_path, f))]

# Create arrays for training data and labels
Training_Data, Labels = [], []

# Open training images in our datapath
# Create a numpy array for training data
for i, files in enumerate(onlyfiles):
    image_path = data_path + onlyfiles[i]
    images = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    Training_Data.append(np.asarray(images, dtype=np.uint8))
    Labels.append(i)

# Create a numpy array for both training data and labels
Labels = np.asarray(Labels, dtype=np.int32)

# Initialize facial recognizer
model = cv2.createLBPHFaceRecognizer()
# NOTE: For OpenCV 3.0 use cv2.face.createLBPHFaceRecognizer()

# Let's train our model 
model.train(np.asarray(Training_Data), np.asarray(Labels))
print "Model trained sucessefully"

Step 3 - Run Our Facial Recognition

In [ ]:
import cv2
import numpy as np


face_classifier = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

def face_detector(img, size=0.5):

    # Convert image to grayscale
    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    faces = face_classifier.detectMultiScale(gray, 1.3, 5)
    if faces is ():
        return img, []

    for (x,y,w,h) in faces:
        cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,255),2)
        roi = img[y:y+h, x:x+w]
        roi = cv2.resize(roi, (200, 200))
    return img, roi


# Open Webcam
cap = cv2.VideoCapture(0)

while True:

    ret, frame = cap.read()

    image, face = face_detector(frame)

    try:
        face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)

        # Pass face to prediction model
        # "results" comprises of a tuple containing the label and the confidence value
        results = model.predict(face)

        if results[1] < 500:
            confidence = int( 100 * (1 - (results[1])/400) )
            display_string = str(confidence) + '% Confident it is User'

        cv2.putText(image, display_string, (100, 120), cv2.FONT_HERSHEY_COMPLEX, 1, (255,120,150), 2)

        if confidence > 75:
            cv2.putText(image, "Unlocked", (250, 450), cv2.FONT_HERSHEY_COMPLEX, 1, (0,255,0), 2)
            cv2.imshow('Face Recognition', image )
        else:
            cv2.putText(image, "Locked", (250, 450), cv2.FONT_HERSHEY_COMPLEX, 1, (0,0,255), 2)
            cv2.imshow('Face Recognition', image )

    except:
        cv2.putText(image, "No Face Found", (220, 120) , cv2.FONT_HERSHEY_COMPLEX, 1, (0,0,255), 2)
        cv2.putText(image, "Locked", (250, 450), cv2.FONT_HERSHEY_COMPLEX, 1, (0,0,255), 2)
        cv2.imshow('Face Recognition', image )
        pass

    if cv2.waitKey(1) == 13: #13 is the Enter Key
        break

cap.release()
cv2.destroyAllWindows()
In [ ]:

rss facebook twitter github youtube mail spotify lastfm instagram linkedin google google-plus pinterest medium vimeo stackoverflow reddit quora quora