<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
     xmlns:content="http://purl.org/rss/1.0/modules/content/"
     xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
     xmlns:atom="http://www.w3.org/2005/Atom"
     xmlns:dc="http://purl.org/dc/elements/1.1/"
     xmlns:wfw="http://wellformedweb.org/CommentAPI/"
     >
  <channel>
    <title>brain of mat kelcey</title>
    <link>http://matpalm.com/blog</link>
    <description>thoughts from a data scientist wannabe</description>
    <generator>Blogofile</generator>
    <sy:updatePeriod>hourly</sy:updatePeriod>
    <sy:updateFrequency>1</sy:updateFrequency>
    <item>
      <title>fastmap and the jaccard distance</title>
      <link>http://matpalm.com/blog/2008/10/31/fastmap-and-the-jaccard-distance/</link>
      <category><![CDATA[algorithms]]></category>
      <category><![CDATA[deduplication]]></category>
      <category><![CDATA[c++]]></category>
      <guid>http://matpalm.com/blog/?p=14</guid>
      <description>fastmap and the jaccard distance</description>
      <content:encoded><![CDATA[<p>given a set of pairwise distances how do you determine what points correspond to those distances?</p>
<p><a href="http://www.matpalm.com/resemblance/jaccard_distance/">my latest experiment</a> considers this problem in relation to jaccard distances, a resemblance measure similar to jaccard coefficients used in <a href="http://www.matpalm.com/resemblance/jaccard_coeff/">a previous experiment</a></p>
<p>by using the <a href="http://www.kyriakides.net/CBCL/references/Faloutsos/p163-faloutsos.pdf">fastmap</a> algorithm we get points from distances and once you have points you have visualisation!</p>]]></content:encoded>
    </item>
    <item>
      <title>shingling and the jaccard index</title>
      <link>http://matpalm.com/blog/2008/10/06/shingling-and-the-jaccard-index/</link>
      <category><![CDATA[ruby]]></category>
      <category><![CDATA[algorithms]]></category>
      <category><![CDATA[deduplication]]></category>
      <category><![CDATA[c++]]></category>
      <guid>http://matpalm.com/blog/?p=10</guid>
      <description>shingling and the jaccard index</description>
      <content:encoded><![CDATA[<p>on the weekend i did another experiment using shingling and the jaccard index to try to determine if two sets of data were “duplicates”</p>
<p>it works quite well and includes a ruby and c++ version with low level bit operations.</p>
<p>project page is <a href="http://www.matpalm.com/resemblance/">www.matpalm.com/resemblance</a></p>
<p>code at <a href="http://github.com/matpalm/resemblance">github.com/matpalm/resemblance</a></p>]]></content:encoded>
    </item>
  </channel>
</rss>

