Saturday, June 2, 2012

XML Response in Python

Writing an XML response doc in python is pretty easy.
While working on one of the projects i wrote some
methods thats make it even easy to use:


import xml.dom.minidom


class MyXml:
    def __init__(self):
        self.doc = xml.dom.minidom.Document()

    def add_root(self, node_str):
        """creates and returns root node"""
        root = self.doc.createElementNS("http://mynamespace.com", node_str)
        self.doc.appendChild(root)
        return root       


    def add_node(self, node, node_str):
        """creates and returns a child node"""
        ch_node = self.doc.createElementNS("http://mynamespace.com", node_str)
        node.appendChild(ch_node)
        return root
       
    def add_txt_value(self, node, value):
        """creates a text node and appends to existing node"""
        txt_node = self.doc.createTextNode(str(value))
        node.appendChild(txt_node)


#==================================================
# example to create a xml response document you can simply add nodes and text
#as given below
#<?xml version="1.0" encoding="utf-8"?>
# <response>
#       <success> Hey i got your msg</success>
# </response>
#==================================================


if __name__ == '__main__':
    xmlObj = MyXml()
    #to create root node
    root = xmlObj.add_root("response")
    #to add child node arg1 parent node, arg2 child node
    node1 = xmlObj.add_node(root, "success")
    #to add success string to success node
    xmlObj.add_txt_value(node1, "Hey i got your msg")

Wednesday, January 4, 2012

Mahout Recommendation Engine

Apache mahout implements scalable data mining algorithms over apache hadoop. Classification , clustering and collaborative filtering algorithms are implemented in mahout that can be used for analyzing large scale data and predicting user behavior.

Mahout implements collaborative filtering based on :
1. User Preferences
2. Item similarity (product similarity)

Here i am giving a sample code for item similarity based recommendation building.
Requirements:
1. For building mahout project one needs maven.
2. InputFile : content of the file will be like :
userid, itemid, preference
101,202,3
101,203,5
102,202,2
note: both userid and item id are supposed to be long type and preference is supposed to be of float type.
string is not supported by mahout recommendation API so you need to resolve your data in IDs before feeding into mahout recommender.

Output: Given code takes input in above given format and write output in given file as :
user,recom1,recom2,recom3,recom4,recom5
Note: Recommendations will be arranged in descending order of recommendation strength. If customer preference is not known and then in that case there will be no ordering and given below recommender will be converted to binary recommender , that means either you like some product (1) or you don't like that product (0).


import java.io.File;
import java.util.List;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender;
import org.apache.mahout.cf.taste.recommender.*;
import org.apache.mahout.cf.taste.model.*;
import org.apache.mahout.cf.taste.eval.*;
import org.apache.mahout.common.*;
import java.io.FileWriter;
import java.io.BufferedWriter;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.common.Weighting;
import org.apache.mahout.cf.taste.impl.recommender.slopeone.MemoryDiffStorage;
import org.apache.mahout.cf.taste.recommender.slopeone.DiffStorage;


public class HiveLog {
   
    public static void main(String... args) throws Exception
   {
       
        // create data source (model) - from the csv file          

        File inputFile= new File("/home/test/test_input.csv");

        final DataModel model = new FileDataModel( inputFile );
        FileWriter fstream=new FileWriter("/home/test/recommendation.csv",true);
        BufferedWriter out=new BufferedWriter(fstream);      

RecommenderBuilder recommenderBuilder=new RecommenderBuilder(){
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {

DiffStorage diffStorage = new MemoryDiffStorage( model, Weighting.WEIGHTED, Long.MAX_VALUE);
return new SlopeOneRecommender(model,Weighting.WEIGHTED, Weighting.WEIGHTED, diffStorage);
}
};


Recommender recommender=recommenderBuilder.buildRecommender(model);
      // for all users
        for (LongPrimitiveIterator it = model.getUserIDs(); it.hasNext();)
{
          long userId = it.nextLong();
           
            // get the recommendations for the user
            List<RecommendedItem> recommendations = recommender.recommend(userId,8);
            int i=0;
            for (RecommendedItem recommendedItem : recommendations)
   {
if (i==0)
{
  out.write(userId+","+recommendedItem.getItemID());
i++;
}
else
{
out.write(","+recommendedItem.getItemID());
i++;
          }
   }
out.newLine();
        }  

out.close();
  }
}


For more details and mahout algorithms implementation  please write.