Skip navigation

Category Archives: Exercise 4

chair-classification

Above is an image of my Exercise 4.

==============================================================

Description:

This Processing applet was made to explore how machine learning can be used to classify chairs. The applet classifies images from a database I made of chairs designed by Verner Panton, Charles and Ray Eames, Le Corbusier, Pierre Jeanneret, Charlotte Perriand, Harry Bertoia, and Eero Saarinen. When the applet is ran, it chooses nine chair images and tries to classify them by their designers. The number on the top left of each image represents the actual designer of the chair and the number on the top right of each image represents the applet’s guess for the the designer of the chair. The applet learns from a database of chairs and their designers prior to choosing the nine to guess, and the applet’s guess is displayed in red if it is incorrect, and is displayed in green if it is correct. The designers that the numbers represent are as follows:

1:  Bertoia

2:  Eames

3:  Panton

4:  Saarinen

5:  Le Corbusier, Jeanneret, and Perriand

==============================================================

Code:

import wekaizing.*;
import java.io.File;
import java.lang.Integer;
class digitImage {
int number;
PImage digit;
int[] pixeldata;
public digitImage() {
pixeldata = new int[101];}
}
WekaData digitsTrain;
WekaData digitsTest;
WekaClassifier classifier;
digitImage[] digits;
int[] results;
PFont HNL_font;
void setup() {
background(0);
size(680,680);
HNL_font = loadFont(“HelveticaNeue-Light-100.vlw”);
textFont(HNL_font, 15);
digitsTrain = new WekaData();
digitsTest = new WekaData();
for (int i = 0; i < 100; i++) {
digitsTrain.AddAttribute(Integer.toString(i));
digitsTest.AddAttribute(Integer.toString(i));
}
Object[] digitarray = new Object[] {0,1,2,3,4,5,6,7,8,9};
digitsTrain.AddAttribute(“digit”,digitarray);
digitsTest.AddAttribute(“digit”,digitarray);
loadDigits(“digits”);
digitsTrain.setClassIndex(100);
digitsTest.setClassIndex(100);
classifier = new WekaClassifier(WekaClassifier.LOGISTIC);
classifier.Build(digitsTrain);
print(“Training done”);
results = classifier.Classify(digitsTest);
print(“Classification done”);
drawResults();
}
void loadDigits(String digitfolder) {
File digitfiles = new File(sketchPath, “data/” + digitfolder);
String[] files = digitfiles.list(filter);
digits = new digitImage[files.length];
String numbers[] = loadStrings(digitfolder + “/digits.txt”);
for (int i = 0; i < files.length; i++) {
println(“Loading image ” + files[i]);
digits[i] = new digitImage();
digits[i].digit = loadImage(“data/” + digitfolder + “/” + files[i]);
digits[i].number = Integer.valueOf(numbers[i]);
PImage resizedImg = loadImage(“data/” + digitfolder + “/” + files[i]);
resizedImg.resize(10,10);
resizedImg.loadPixels();
for (int j = 0; j < 100; j++) {
digits[i].pixeldata[j] = resizedImg.pixels[j];
}
digits[i].pixeldata[100] = digits[i].number;
if (i < 40) {
digitsTest.InsertData(digits[i].pixeldata);
} else {
digitsTrain.InsertData(digits[i].pixeldata);
}
}
}
void drawResults() {
float num_correct = 0.0, total = 0.0;
int imgx, imgy;
for (int i = 0; i < 12; i++) {
imgx = (i % 3) * 220 + 20;
imgy = (i / 3) * 220 + 20;
image(digits[i].digit,imgx,imgy);
}
for (int i = 0; i < 9; i++) {
imgx = (i % 3) * 220 + 25;
imgy = (i / 3) * 220 + 35;
fill(0);
text(digits[i].number, imgx, imgy);
if(digits[i].number == results[i]){
fill(0,255,0);
}
else{
fill(255,0,0);
}
text(results[i], imgx + 180, imgy);
total += 1.0;
if(digits[i].number == results[i])
num_correct += 1.0;
}
println(“\n” + “Accuracy = ” + num_correct/total*40 + “%”);
}
FilenameFilter filter = new FilenameFilter() {
public boolean accept(File dir, String name) {
if (name.toLowerCase().endsWith(“.png”) || name.toLowerCase().endsWith(“.jpg”) || name.toLowerCase().endsWith(“.gif”)) return true;
return false;
}
};

import wekaizing.*;

import java.io.File;

import java.lang.Integer;

class digitImage {

int number;

PImage digit;

int[] pixeldata;

public digitImage() {

pixeldata = new int[101];}

}

WekaData digitsTrain;

WekaData digitsTest;

WekaClassifier classifier;

digitImage[] digits;

int[] results;

PFont HNL_font;

void setup() {

background(0);

size(680,680);

HNL_font = loadFont(“HelveticaNeue-Light-100.vlw”);

textFont(HNL_font, 15);

digitsTrain = new WekaData();

digitsTest = new WekaData();

for (int i = 0; i < 100; i++) {

digitsTrain.AddAttribute(Integer.toString(i));

digitsTest.AddAttribute(Integer.toString(i));

}

Object[] digitarray = new Object[] {0,1,2,3,4,5,6,7,8,9};

digitsTrain.AddAttribute(“digit”,digitarray);

digitsTest.AddAttribute(“digit”,digitarray);

loadDigits(“digits”);

digitsTrain.setClassIndex(100);

digitsTest.setClassIndex(100);

classifier = new WekaClassifier(WekaClassifier.LOGISTIC);

classifier.Build(digitsTrain);

print(“Training done”);

results = classifier.Classify(digitsTest);

print(“Classification done”);

drawResults();

}

void loadDigits(String digitfolder) {

File digitfiles = new File(sketchPath, “data/” + digitfolder);

String[] files = digitfiles.list(filter);

digits = new digitImage[files.length];

String numbers[] = loadStrings(digitfolder + “/digits.txt”);

for (int i = 0; i < files.length; i++) {

println(“Loading image ” + files[i]);

digits[i] = new digitImage();

digits[i].digit = loadImage(“data/” + digitfolder + “/” + files[i]);

digits[i].number = Integer.valueOf(numbers[i]);

PImage resizedImg = loadImage(“data/” + digitfolder + “/” + files[i]);

resizedImg.resize(10,10);

resizedImg.loadPixels();

for (int j = 0; j < 100; j++) {

digits[i].pixeldata[j] = resizedImg.pixels[j];

}

digits[i].pixeldata[100] = digits[i].number;

if (i < 40) {

digitsTest.InsertData(digits[i].pixeldata);

} else {

digitsTrain.InsertData(digits[i].pixeldata);

}

}

}

void drawResults() {

float num_correct = 0.0, total = 0.0;

int imgx, imgy;

for (int i = 0; i < 12; i++) {

imgx = (i % 3) * 220 + 20;

imgy = (i / 3) * 220 + 20;

image(digits[i].digit,imgx,imgy);

}

for (int i = 0; i < 9; i++) {

imgx = (i % 3) * 220 + 25;

imgy = (i / 3) * 220 + 35;

fill(0);

text(digits[i].number, imgx, imgy);

if(digits[i].number == results[i]){

fill(0,255,0);

}

else{

fill(255,0,0);

}

text(results[i], imgx + 180, imgy);

total += 1.0;

if(digits[i].number == results[i])

num_correct += 1.0;

}

println(“\n” + “Accuracy = ” + num_correct/total*40 + “%”);

}

FilenameFilter filter = new FilenameFilter() {

public boolean accept(File dir, String name) {

if (name.toLowerCase().endsWith(“.png”) || name.toLowerCase().endsWith(“.jpg”) || name.toLowerCase().endsWith(“.gif”)) return true;

return false;

}

};

The idea was to study image classification through the notion of social norms and normality: for instance, what can be considered as ugly ? Also, what are the implications of subconscious physiognomy ? This program is supposed to show the limits of this approach by having the computer to decide for us who is good, bad or (and?) ugly.

My idea was to first train the classifier with images from various source : Supreme Court Justices as Good people, Most wanted people by the FBI as Bad people, and face pictures of people considered as Ugly (from a website I found on the internet, their work is morally questionable, so i won’t display these images). Then I used a database of face images as testing images : for each one, the classifier output a triplet of probability to belong in each of these categories. Their image is then mapped into a triangle.

gbu

The result is not that great, mainly because I think that the features where not that relevant (pixels of the images), and the 3 categories are not well-balanced (a 2 dimensional grid should have been more appropriate, with beautiful/ugly and good/bad as axes), but it is not that important, since he purpose here is to show the limit of classification.

François

Code

Twitter is useful for a lot of things. One of those things is information. I wanted to see if I could pull out the trash from the information. Answer, actually a really simple classifier with only about 300 labeled tweets can do a pretty good job. Right now it has no real interface except some output. I stopped work on the interface because it should be done in a web-technology, not processing.

Twitter...

Right now it will output something like the following, based on the last 20 public tweets:

btw, giving is def. hands down better than receiving . time to rest . gonna pray/talk with God . goodnight & Godbless world of twitter :]
charLength: 137  atCount: 0  punctuation: 9  linkCount: 0  RT: 0  percentCap: 1

A27 Almere richting Utrecht: Tussen knooppunt Almere-Stad en knooppunt Eemnes 7 km http://twurl.nl/wbrmuu
charLength: 105  atCount: 0  punctuation: 7  linkCount: 1  RT: 0  percentCap: 6

Sun K Kwak and her black tape installations http://tinyurl.com/c495jd
charLength: 69  atCount: 0  punctuation: 5  linkCount: 1  RT: 0  percentCap: 4

A27 Gorinchem richting Utrecht: Tussen afrit Lexmond en afrit Hagestein 4.7 km http://twurl.nl/qqhimi
charLength: 101  atCount: 0  punctuation: 7  linkCount: 1  RT: 0  percentCap: 5

Engadget Update: Sony PSP hacked for use as PC status monitor: 
No money for a secondary display.. http://tinyurl.com/dkteuk
charLength: 124  atCount: 0  punctuation: 10  linkCount: 1  RT: 0  percentCap: 7

TechRepublic - Fast Video Indexer 1.12 (Windows) http://bit.ly/4n8ZYp
charLength: 69  atCount: 0  punctuation: 9  linkCount: 1  RT: 0  percentCap: 11
6/20 were informative

It decided 6 of those 20 were informative, based on the following six features:

  • charLength: how many characters was the tweet
  • atCount: how many ‘@’ are in the tweet; these are used in twitter to indicate response
  • punctuation: the number of punctuation characters used
  • linkCount: the number of links …
  • RT: if ‘RT’ appears in the tweet, this means re-tweet, or indicates someone is directly repeating what someone else said
  • percentCap: the percentage (of 100) of capital letter in the tweet… indicative of shouting

After trying multiple times on this exercise I decided I did not know how the weka classifying works and gave up. I had trouble changing the data from digits to my ad data and I couldn’t get it to take in any kind of image bigger than 10×10. How can I apply the histogram similarity sorting to the training data?
This is the idea mock up version:
Decades
This is the farthest sort of working version I got…the rest just didn’t do anything:
Decades 2

 

Mock-upAre We There Yet?; The Pursuit of the American Dream

This piece serves as a kind of open-ended barometer of the state of mind of the U.S., “measuring” how close we are today to our understanding of the American Dream. 

The piece makes this assessment based on the headlines of the day on the New York Times. The applet was trained on the NYTimes headlines from January 2009, and I classified each headline as evidence that we’re living the American Dream (“good”), or as evidence that the American Dream is not a reality yet (“bad”). TF-IDF was used to find the most important words of each headline and to compare to the day’s headlines.

Everyday the headlines are downloaded and classified by finding the labels of the nearest neighbors (most similar training headlines). The ratio of “good” vs “bad” headlines is displayed as a line across the digital canvas (up = American Dream). Currently the applet is hard coded to use the headlines of the presentation day (March 17th). The code for finding the current date was found, but there wasn’t time to put the date into the needed form.

The open-ended appearance intends to encourage viewers to interpret the messages “not there yet” or “almost there” according to their own beliefs and ideals.

Interesting ways to grow:

          create a web widget for NYTimes where people would get to rate headlines, maybe with a scale. This would multiply the training data received and “sharpen” the classifier. Also the classifier then would reflect a collective notion of the American Dream instead of only mine.

          Visualize significant words that are associated or in conflict with the American Dream

The classifier works in the sense that it is generating labels, but it’s not very meaningful. Several problems are occurring:

          NYTimes data has many bugs, so lots of data is getting lost

          The training data is very biased because it is so limited in time (1 month) and it’s mostly 90% “bad”

          The headlines were chosen as the material to observe because each word in a headline is very carefully chosen but it might be that it’s not enough data to accurately predict the content of the piece. An alternative is to use the body of the article instead.

Download zip here: http://joanaricou.com/nytimes_tutorial4.zip

 

http://paulshen.name/sketches/4

Paul Shen

This is a “test” for your sexuality. You answer some related questions, and the algorithm decides whether you’re a man or a woman. Your “identity” is the bigger circle at the center. The questions are the smaller ones.

Mouse-over a question to see its description. If you agree with it, left click; if you strongly agree, click once more. Similar, if you disagree, right click, and click once more if you strongly disagree.

If a node’s fill color is pink, it means you answered it like a woman is “supposed to”. If it’s blue, you answered it like a man. The color of the border represents how the computer thinks you would answer it. Gray means “undecided”.

The questions are meant to be highly judgmental/prejudicial/stereotypical and sarcastic.

Below is a screenshot. The runnable program is here (sorry I can’t provide the actual code; there’s some “researchware” involved.) Type “java Main” to run.

I used the Belief Propagation algorithm to “propagate” each node’s status to that of the other nodes’.

man_woman_test

Here’s the code for exercise #4.  As we discussed in class, making vectors of titles instead of bags of title words isn’t a good method of classifying titles.  –jet


//-*-Java-*-

import wekaizing.*;
import java.io.File;
import java.lang.Integer;

WekaData songData;
WekaClassifier songClass;
HashMap songWords;
Integer songWordCount = 0;
static final int titleMax = 5;

HashMap genres;
Integer genreCount = 0; // how many we've actually seen
String[] genreReverse;

PFont entry_font;
PFont title_font;

int dX = 1200;
int dY = 800;

String lastInput = new String();
String currentInput = new String(); 

void setup()
{

    // initialize screen and font
    background(0);
    size(dX, dY);
    entry_font = loadFont("Helvetica-12.vlw");
    title_font = loadFont("Helvetica-48.vlw");
    textFont(entry_font, 12);
    textFont(title_font, 48);

    LoadSongs();
}

void draw()
{
    background(0);

    fill(255, 255,255);
    //    textFont(entry_font,12);
    text("Tell me the name of your favorite song,\n and I'll tell you if I would like it!", 50, 100);
    textFont(title_font, 48);
    text(currentInput, 50, 300);

    int sw[] = new int[titleMax];
    int g = 0;

    // lose the return
    String testTitle = lastInput;

    if (testTitle.length() > 4) {
	testTitle = testTitle.substring(0, testTitle.length() -1 );
	String words[] = split(testTitle, ' ');
	//	println(testTitle);
	int wc =0;
	for (int j = 0; j < titleMax;  j++) {
	    if (j < words.length) {
		//	println(j + " looking for: " + words&#91;j&#93;);
		if (songWords.containsKey(words&#91;j&#93;)) {
		    //		    println(j + " found: " + words&#91;j&#93;);
		    sw&#91;wc&#93; = (Integer) songWords.get(words&#91;j&#93;);
		    wc++;
		}
	    }
	    else {
		sw&#91;wc&#93; = 0;
	    }
	}

	Object&#91;&#93; sData = {sw&#91;0&#93;, sw&#91;1&#93;, sw&#91;2&#93;, sw&#91;3&#93;, sw&#91;4&#93;,  0};

	println(sw&#91;0&#93; + " " + sw&#91;1&#93; + " " + sw&#91;2&#93; + " " + sw&#91;3&#93; + " " + sw&#91;4&#93;);

	String tmpStr = lastInput.substring(0, lastInput.length() -1);

	if (0 != sw&#91;0&#93;) {
	    int pred = songClass.Classify(sData);
	    double&#91;&#93; probs = songClass.ClassifyProbs(sData);

	    if (probs&#91;1&#93; > .6) {
		text("\"" + tmpStr + "\"" + " is\n" + genreReverse[pred] + " so I would like it!",    100, 400);
	    }
	    else if (probs[1] > .2) {
		text("\"" + tmpStr + "\"" + " is probably\n" + genreReverse[pred] + " so I might like it.",    100, 400);
	    }
	    else {
		text("I don't know what sort of song\n" + "\"" + tmpStr + "\"" + " is\nso I don't know if I would like it.",    100, 400);
	    }

	    println(probs[0] + " " + probs[1]);
	}
	else {
	    text("I don't know what sort of song\n" + "\"" + tmpStr + "\"" + " is\nso I don't know if I would like it.",    100, 400);
	}
    }
}

void LoadSongs()
{
    String[] lines;
    String[] items;
    String[] words;

    genres = new HashMap(40);
    songWords = new HashMap(8096);

    int sw[] = new int[titleMax];

    // make "0" the null set, testing is easier
    Integer tI = 0;
    String tS = "";
    songWords.put(tS, tI); songWordCount++;
    genres.put(tS, tI); genreCount++;

    //initialize data structures for datasets into weka format
    songData = new WekaData(5,6000);
    songData.AddAttribute("s0");  // only use first four words of song
    songData.AddAttribute("s1");
    songData.AddAttribute("s2");
    songData.AddAttribute("s3");
    songData.AddAttribute("s4");
    Object[] classes = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40};
    songData.AddAttribute("class",classes); //Add class attribute

    lines = loadStrings("music-genre.txt");
    for (int i = 0; i < lines.length; i++) {
	if (i % 1000 == 0) {
	    println("loading song " + i);
	}
	Integer songGenre = 0;
	items = split(lines&#91;i&#93;, '|');
	// for now we only care about song and genre
	// song items&#91;0&#93;  genre lines&#91;3&#93;
	// genre
	if (genres.containsKey(items&#91;3&#93;)) {
	    songGenre = (Integer) genres.get(items&#91;3&#93;);
	}
	else {
	    songGenre = genreCount;
	    genres.put(items&#91;3&#93;, genreCount++);
	    // print("genre: "); println(genreCount);
	}

	words = split(items&#91;0&#93;, ' ');
	// print("title: ");  println(items&#91;0&#93;);
	for (int j = 0; j < titleMax;  j++) {
	    if (j < words.length) {
		// print("word: ");  println(words&#91;j&#93;);
		if (songWords.containsKey(words&#91;j&#93;)) {
		    // print("found: ");   println(words&#91;j&#93;);
		    sw&#91;j&#93; = (Integer) songWords.get(words&#91;j&#93;);
		}
		else {
		    sw&#91;j&#93; = songWordCount;
		    songWords.put(words&#91;j&#93;, songWordCount++);
		    // print("put: ");    println(words&#91;j&#93;);
		    // print("songWordCount: ");    println(songWordCount);
		}
	    }
	    else {
		sw&#91;j&#93; = 0;
	    }
	}
	Object&#91;&#93; sData = {sw&#91;0&#93;, sw&#91;1&#93;, sw&#91;2&#93;, sw&#91;3&#93;, sw&#91;4&#93;,  (int)songGenre};
	// println(sw&#91;0&#93; + " " + sw&#91;1&#93; + " " + sw&#91;2&#93; + " " + sw&#91;3&#93; + " " + sw&#91;4&#93; + " " + (int)songGenre);
	songData.InsertData(sData);
    }

    // build the reverse lookup table
    genreReverse = new String&#91;genreCount&#93;;

    Set entries = genres.entrySet();
    Iterator it = entries.iterator();
    while (it.hasNext()) {
	Map.Entry entry = (Map.Entry) it.next();
	genreReverse&#91;(Integer)entry.getValue()&#93; = (String)entry.getKey();
	//	println(entry.getValue() + "-->" + genreReverse[(Integer)entry.getValue()]);
    }

    songData.setClassIndex(4);
    String now = hour() + ":" + minute() + ":" + second();

    println("starting training at " + now);
    //Train the classifier
    songClass = new WekaClassifier(WekaClassifier.LOGISTIC);
    // J48
    // KSTAR
    // BAYESNET
    // LOGISTIC
    // VOTEDPERCEPTRON

    songClass.Build(songData);
    now = hour() + ":" + minute() + ":" + second();
    println("Training done at " + now);
    //    songData.PrintData(dataPath("out.arff"));

}

void keyPressed()
{
    if (key == ENTER) {
	lastInput = currentInput = currentInput + key;
	currentInput = "";
    }
    else if(key == BACKSPACE && currentInput.length() > 0)
	{
	    currentInput = currentInput.substring(0, currentInput.length() - 1);
	}
    else
	{
	    currentInput = currentInput + key;
	}
}

Smile Classification

I altered Carols and Barkin’s code for detecting hand written digits and used it to detect whether a face is smiling or not. It was 80% accurate testing 20 pics and using 80 as test data.

import wekaizing.*;
import java.io.File;
import java.lang.Integer;

// The class we use to store our digits from files
// Contains the number digit stands for, the image of the digit
// And the resized pixel data of the digit
class faceImage {
int number;
PImage digit;
int[] pixeldata;

//The pixel data is kept 101 long for 10×10 pixels and 1 number
public faceImage() {
pixeldata = new int[101];}
}

WekaData facesTrain;
WekaData facesTest;
WekaClassifier classifier;
faceImage[] faces;

int[] results;
PFont courier_font;

void setup() {

background(0);
size(560,120);
courier_font = loadFont(“CourierNew-12.vlw”);
textFont(courier_font, 15);

facesTrain = new WekaData();
facesTest = new WekaData();

// create an attribute for each pixel
for (int i = 0; i < 100; i++) {
facesTrain.AddAttribute(Integer.toString(i));
facesTest.AddAttribute(Integer.toString(i));
}

// create an attribute for the class (face)
Object[] facearray = new Object[] {0,1};
facesTrain.AddAttribute(“face”,facearray);
facesTest.AddAttribute(“face”,facearray);

// load data from disk into data structure
loadFaces(“faces”);

// set the class variable to be the last attribute
facesTrain.setClassIndex(100);
facesTest.setClassIndex(100);

//Train the classifier
classifier = new WekaClassifier(WekaClassifier.LOGISTIC);
classifier.Build(facesTrain);
print(“Training done”);

//Test the 100 digits
results = classifier.Classify(facesTest);
print(“Classification done”);

drawResults();
}

// load digits from disk
void loadFaces(String facesfolder) {

// open file for face images
File facefiles = new File(sketchPath, “data/” + facesfolder);
String[] files = facefiles.list(filter);
faces = new faceImage[files.length];

// digits.txt contains the actual true class labels for the digit images
String numbers[] = loadStrings(facesfolder + “/faces.txt”);

// for each image file
for (int i = 0; i < files.length; i++) {
println(“Loading image ” + files[i]);

// load digit image (used for displaying)
faces[i] = new faceImage();
faces[i].digit = loadImage(“data/” + facesfolder + “/” + files[i]);

// load true class for digit
faces[i].number = Integer.valueOf(numbers[i]);

// get a 10×10 version of the image, will be used as feature for classifier
// lower resolution means we can learn from less data (though we could be more
// accurate with higher resolution if we had more data)
PImage resizedImg = loadImage(“data/” + facesfolder + “/” + files[i]);
resizedImg.resize(10,10);

// copy the pixels of lower resolution data into data structure that will be
// used by Weka
resizedImg.loadPixels();
for (int j = 0; j < 100; j++) {
faces[i].pixeldata[j] = resizedImg.pixels[j];
}
// the last entry is the true label
faces[i].pixeldata[100] = faces[i].number;

// the first 100 digits will be used for testing, the rest as training data.
if (i < 20) {
facesTest.InsertData(faces[i].pixeldata);
} else { //Otherwise training
facesTrain.InsertData(faces[i].pixeldata);
}
}
}

// Draws the digits in a grid
void drawResults() {
// count the number of digits we got correct
float num_correct = 0.0, total = 0.0;

// display each image, 10 per row
int imgx, imgy;
for (int i = 0; i < 20; i++) {
//Center every digit on its 56×56 grid
imgx = (i % 10) * 56 + 14;
imgy = (i / 10) * 56 + 24;
image(faces[i].digit,imgx,imgy);
}

//Display the predicted and actual digits above the drawings
for (int i = 0; i < 20; i++) {
imgx = (i % 10) * 56 + 14;
imgy = (i / 10) * 56 + 14;

//actual value
//fill(255, 255, 255);
//if (faces[i].number == 1) { text(faces[i].number, imgx, imgy);}
//text(faces[i].number, imgx, imgy);

// predicted value
if (results[i] == 1) {
fill(0, 255, 0);
text(“:)”, imgx+5, imgy);
}
if (results[i] == 0) {
fill(255, 0, 0);
text(“:(“, imgx+5, imgy);
}
//text(results[i], imgx + 28, imgy);

// if actual value and predicted value are the same, then we got this digit correct
total += 1.0;
if(faces[i].number == results[i])
num_correct += 1.0;

}

// print accuracy
println(“\n” + “Accuracy = ” + num_correct/total*100 + “%”);
}

// filter for png files
FilenameFilter filter = new FilenameFilter() {
public boolean accept(File dir, String name) {
if (name.toLowerCase().endsWith(“.png”)) return true;
return false;
}
};

I took sentences from the Torah, the Bible, and the Qu’ran to train a 3-way classifier (using bag-of-words, TF-IDF, svd, and logistic regression). The visual shows the “test examples”; sentences classified by the classifier, scrolling through the lines. The sentence’s color shows the true source it came from, and the color of the line it’s flowing through shows from which text it was classified to be by the classifier. Can you tell which color corresponds to which text?

threetexts

Green = Torah, Blue = Bible, Red = Qu’ran.

The code can be found here.

And the applet here.