+ match = stub_pattern.match(x.guid.string)
+ if match:
+ stub = match.groups()[0]
+ else:
+ # Fall back to our own stubs
+ stub = re.sub(r'[^a-zA-Z0-9_]', '-', x.title.string).lower()
+
+ commit_msg = """Importing WordPress post "%s" [%s]""" % (x.title.string, x.guid.string)
+ timestamp = time.mktime(time.strptime(x.find('wp:post_date_gmt').string, "%Y-%m-%d %H:%M:%S"))
+ content = '\[[!meta title="%s"]]\n' % (x.title.string.replace('"', r'\"'))
+ content += "\[[!meta date=\"%s\"]]\n" % datetime.fromtimestamp(timestamp)
+ content += x.find('content:encoded').string.replace('\r\n', '\n')
+
+ """
+ We do it differently here because we have duplicates otherwise.
+ Take a look:
+ <category><![CDATA[Health]]></category>
+ <category domain="category" nicename="health"><![CDATA[Health]]></category>
+
+ If we do the what original did, we end up with all tags and cats doubled.
+ Therefore we only pick out nicename="foo". Our 'True' below is our 'foo'.
+ I'd much rather have the value of 'nicename', and tried, but my
+ python skillz are extremely limited....
+ """
+ categories = x.findAll('category', nicename=True)
+ if categories:
+ content += "\n"
+ for cat in categories:
+ # remove 'tags/' because we have a 'tagbase' set.
+ # your choice: 'tag', or 'taglink'
+ # content += "\n\[[!tag %s]]" % (cat.string.replace(' ', '-'))
+ content += "\n\[[!taglink %s]]" % (cat.string.replace(' ', '-'))
+ # this is just debugging, and for fun
+ # print >>sys.stderr, cat.string.replace(' ', '-')
+
+ # moved this thing down
+ data = content.encode('ascii', 'html_replace')
+ print "commit refs/heads/%s" % branch
+ print "committer %s <%s> %d +0000" % (name, email, timestamp)
+ print "data %d" % len(commit_msg)
+ print commit_msg
+ print "M 644 inline %s" % os.path.join(subdir, "%s.mdwn" % stub)
+ print "data %d" % len(data)
+ print data
+
+if __name__ == "__main__":
+ if len(sys.argv) not in (4, 5):
+ print >>sys.stderr, "%s: usage: %s name email subdir [branch] < wordpress-export.xml | git-fast-import " % (sys.argv[0], sys.argv[0])
+ else:
+ main(*sys.argv[1:])
+
+</pre>
+-----