We observed 10 samples of the products. The column A-C are the attributes of the 10 products, and the column D is the category of the products.
Now we have a new product, with the following stats:
Length: 1.5
PctPos: 0.5
PctNeg: 0.5
Please use KNN to classify this new prodcut. Please use K = 3 (3 nearest neighbors).
| Length | PctPos | PctNeg | Category |
| 1.66 | 0.84 | 0.28 | C |
| 1.62 | 0.86 | 0.25 | A |
| 2.04 | 0.7 | 0.23 | B |
| 1.68 | 0.78 | 0.03 | A |
| 1.62 | 0.76 | 0.95 | A |
| 1.68 | 0.88 | 0.02 | A |
| 2.08 | 0.66 | 0.29 | B |
| 1.82 | 0.66 | 0.58 | C |
| 2.06 | 0.68 | 0.53 | B |
| 1.8 | 0.64 | 0.41 | C |
import csv
def loadDataset(filename,
trainingSet=[]):
#file reader function
with open(filename, 'rt') as csvfile:
lines =
csv.reader(csvfile)
dataset = list(lines)
for x in
range(len(dataset)-1):
for y
in range(3):
dataset[x][y] = float(dataset[x][y])
trainingSet.append(dataset[x])
trainingSet=[]
testSet=[]
print ('Train: ' + repr(len(trainingSet)))
print ('Test: ' + repr(len(testSet)))
def euclideanDistance(instance1, instance2, length):
distance = 0
for x in range(length):
distance += pow((instance1[x] -
instance2[x]), 2)
return pow(distance,0.5)
data1 = [1.66, 1.62, 2.04, 'a'] #put all
the given length in data1 list.
data2 = [0.84,0.86,0.7,
'b'] ##put all the
given pctpos in data2 list.
distance = euclideanDistance(data1, data2, 3)
print ('Distance: ' + repr(distance))
def getNeighbors(trainingSet, testInstance, k):
distances = []
length = len(testInstance)-1
for x in range(len(trainingSet)):
dist =
euclideanDistance(testInstance, trainingSet[x], length)
distances.append((trainingSet[x],
dist))
distances.sort(key=lambda x:x[1])
neighbors = []
for x in range(k):
neighbors.append(distances[x][0])
return neighbors
trainSet = [[2.2, 2.4, 2.1, 'a'], [4.9, 4.1, 4.3, 'b']]
testInstance = [5, 5, 5]
k =
1
#testing of code
neighbors = getNeighbors(trainSet, testInstance, 1)
#print(neighbors)
def getResponse(neighbors):
classVotes = {}
for x in range(len(neighbors)):
response = neighbors[x][-1]
if response in classVotes:
classVotes[response] += 1
else:
classVotes[response] = 1
sortedVotes = sorted(classVotes.items(), key=lambda
x:x[1], reverse=True)
return sortedVotes[0][0]
neighbors = [[1.2,1.8,2.2,'a'], [2.3,2.9,2.4,'a'],
[3.1,3.4,3.5,'b']]
response = getResponse(neighbors)
#predictions = ['a', 'a', 'a']
def main():
trainingSet=[]
testSet=[[1.75,72,35],[1.82,82,42]]
loadDataset('sample.data',
trainingSet) #create a sample.data
test file.
print ('Train set: ' +
repr(len(trainingSet)))
print ('Test set: ' + repr(len(testSet)))
predictions=[]
k = 3
for x in range(len(testSet)):
neighbors =
getNeighbors(trainingSet, testSet[x], k)
print(neighbors)
result =
getResponse(neighbors)
predictions.append(result)
print('> predicted='
+ repr(result))
accuracy = getAccuracy(testSet, predictions)
main() #by calling the main function the program starts running and for better understanding open the code
#in jupyter notebook.
We observed 10 samples of the products. The column A-C are the attributes of the 10...