Skip navigation

Here’s the code for exercise #4.  As we discussed in class, making vectors of titles instead of bags of title words isn’t a good method of classifying titles.  –jet


//-*-Java-*-

import wekaizing.*;
import java.io.File;
import java.lang.Integer;

WekaData songData;
WekaClassifier songClass;
HashMap songWords;
Integer songWordCount = 0;
static final int titleMax = 5;

HashMap genres;
Integer genreCount = 0; // how many we've actually seen
String[] genreReverse;

PFont entry_font;
PFont title_font;

int dX = 1200;
int dY = 800;

String lastInput = new String();
String currentInput = new String(); 

void setup()
{

    // initialize screen and font
    background(0);
    size(dX, dY);
    entry_font = loadFont("Helvetica-12.vlw");
    title_font = loadFont("Helvetica-48.vlw");
    textFont(entry_font, 12);
    textFont(title_font, 48);

    LoadSongs();
}

void draw()
{
    background(0);

    fill(255, 255,255);
    //    textFont(entry_font,12);
    text("Tell me the name of your favorite song,\n and I'll tell you if I would like it!", 50, 100);
    textFont(title_font, 48);
    text(currentInput, 50, 300);

    int sw[] = new int[titleMax];
    int g = 0;

    // lose the return
    String testTitle = lastInput;

    if (testTitle.length() > 4) {
	testTitle = testTitle.substring(0, testTitle.length() -1 );
	String words[] = split(testTitle, ' ');
	//	println(testTitle);
	int wc =0;
	for (int j = 0; j < titleMax;  j++) {
	    if (j < words.length) {
		//	println(j + " looking for: " + words&#91;j&#93;);
		if (songWords.containsKey(words&#91;j&#93;)) {
		    //		    println(j + " found: " + words&#91;j&#93;);
		    sw&#91;wc&#93; = (Integer) songWords.get(words&#91;j&#93;);
		    wc++;
		}
	    }
	    else {
		sw&#91;wc&#93; = 0;
	    }
	}

	Object&#91;&#93; sData = {sw&#91;0&#93;, sw&#91;1&#93;, sw&#91;2&#93;, sw&#91;3&#93;, sw&#91;4&#93;,  0};

	println(sw&#91;0&#93; + " " + sw&#91;1&#93; + " " + sw&#91;2&#93; + " " + sw&#91;3&#93; + " " + sw&#91;4&#93;);

	String tmpStr = lastInput.substring(0, lastInput.length() -1);

	if (0 != sw&#91;0&#93;) {
	    int pred = songClass.Classify(sData);
	    double&#91;&#93; probs = songClass.ClassifyProbs(sData);

	    if (probs&#91;1&#93; > .6) {
		text("\"" + tmpStr + "\"" + " is\n" + genreReverse[pred] + " so I would like it!",    100, 400);
	    }
	    else if (probs[1] > .2) {
		text("\"" + tmpStr + "\"" + " is probably\n" + genreReverse[pred] + " so I might like it.",    100, 400);
	    }
	    else {
		text("I don't know what sort of song\n" + "\"" + tmpStr + "\"" + " is\nso I don't know if I would like it.",    100, 400);
	    }

	    println(probs[0] + " " + probs[1]);
	}
	else {
	    text("I don't know what sort of song\n" + "\"" + tmpStr + "\"" + " is\nso I don't know if I would like it.",    100, 400);
	}
    }
}

void LoadSongs()
{
    String[] lines;
    String[] items;
    String[] words;

    genres = new HashMap(40);
    songWords = new HashMap(8096);

    int sw[] = new int[titleMax];

    // make "0" the null set, testing is easier
    Integer tI = 0;
    String tS = "";
    songWords.put(tS, tI); songWordCount++;
    genres.put(tS, tI); genreCount++;

    //initialize data structures for datasets into weka format
    songData = new WekaData(5,6000);
    songData.AddAttribute("s0");  // only use first four words of song
    songData.AddAttribute("s1");
    songData.AddAttribute("s2");
    songData.AddAttribute("s3");
    songData.AddAttribute("s4");
    Object[] classes = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40};
    songData.AddAttribute("class",classes); //Add class attribute

    lines = loadStrings("music-genre.txt");
    for (int i = 0; i < lines.length; i++) {
	if (i % 1000 == 0) {
	    println("loading song " + i);
	}
	Integer songGenre = 0;
	items = split(lines&#91;i&#93;, '|');
	// for now we only care about song and genre
	// song items&#91;0&#93;  genre lines&#91;3&#93;
	// genre
	if (genres.containsKey(items&#91;3&#93;)) {
	    songGenre = (Integer) genres.get(items&#91;3&#93;);
	}
	else {
	    songGenre = genreCount;
	    genres.put(items&#91;3&#93;, genreCount++);
	    // print("genre: "); println(genreCount);
	}

	words = split(items&#91;0&#93;, ' ');
	// print("title: ");  println(items&#91;0&#93;);
	for (int j = 0; j < titleMax;  j++) {
	    if (j < words.length) {
		// print("word: ");  println(words&#91;j&#93;);
		if (songWords.containsKey(words&#91;j&#93;)) {
		    // print("found: ");   println(words&#91;j&#93;);
		    sw&#91;j&#93; = (Integer) songWords.get(words&#91;j&#93;);
		}
		else {
		    sw&#91;j&#93; = songWordCount;
		    songWords.put(words&#91;j&#93;, songWordCount++);
		    // print("put: ");    println(words&#91;j&#93;);
		    // print("songWordCount: ");    println(songWordCount);
		}
	    }
	    else {
		sw&#91;j&#93; = 0;
	    }
	}
	Object&#91;&#93; sData = {sw&#91;0&#93;, sw&#91;1&#93;, sw&#91;2&#93;, sw&#91;3&#93;, sw&#91;4&#93;,  (int)songGenre};
	// println(sw&#91;0&#93; + " " + sw&#91;1&#93; + " " + sw&#91;2&#93; + " " + sw&#91;3&#93; + " " + sw&#91;4&#93; + " " + (int)songGenre);
	songData.InsertData(sData);
    }

    // build the reverse lookup table
    genreReverse = new String&#91;genreCount&#93;;

    Set entries = genres.entrySet();
    Iterator it = entries.iterator();
    while (it.hasNext()) {
	Map.Entry entry = (Map.Entry) it.next();
	genreReverse&#91;(Integer)entry.getValue()&#93; = (String)entry.getKey();
	//	println(entry.getValue() + "-->" + genreReverse[(Integer)entry.getValue()]);
    }

    songData.setClassIndex(4);
    String now = hour() + ":" + minute() + ":" + second();

    println("starting training at " + now);
    //Train the classifier
    songClass = new WekaClassifier(WekaClassifier.LOGISTIC);
    // J48
    // KSTAR
    // BAYESNET
    // LOGISTIC
    // VOTEDPERCEPTRON

    songClass.Build(songData);
    now = hour() + ":" + minute() + ":" + second();
    println("Training done at " + now);
    //    songData.PrintData(dataPath("out.arff"));

}

void keyPressed()
{
    if (key == ENTER) {
	lastInput = currentInput = currentInput + key;
	currentInput = "";
    }
    else if(key == BACKSPACE && currentInput.length() > 0)
	{
	    currentInput = currentInput.substring(0, currentInput.length() - 1);
	}
    else
	{
	    currentInput = currentInput + key;
	}
}

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: