« b2evolution 2.3.0 quickstart | jPod on Linux » |
To move your blog posts someplace else you first need to get them off of Yahoo 360. If you only have a few you can edit the posts and then copy and paste them into your new blog. If you have a bunch that may prove to be impractical. I'm going to tell you how I converted my blog posts to a Movable type import file which can be imported by many different types of blog software.
First thing is you need linux. Find out the permanent URL of your last blog posting and record it somewhere, then make a new directory and copy the following script into the directory. Name the script whatever you want but remember to set the execute bit, in my case I called it get.
Code
#!/bin/sh | |
# call as get "last blog post url" | |
wget "$1 " | |
i=$(echo "$1" | sed -n -e 's|http://*.360.yahoo.com/\(.*\)$|\1|p') | |
echo $i | |
while [ -n $i ] ; do | |
n=$(sed -n -e 's|<span>Previous Post: <a href="\(.*\)">.*$|\1|p' $i) | |
echo $n | |
wget $n | |
i=$(echo $n | sed -n -e 's|http.*\(blog.*\)|\1|p') | |
while [ ! -e $i ] ; do | |
wget $n | |
i=$(echo $n | sed -n -e 's|http.*\(blog.*\)|\1|p') | |
done | |
done |
When the script finishes you will have all your blog postings saved as HTML files in the directory you ran it from with names the same as the url minus the http and the domain, the posting shown as an example will become blog-hwLjH.Myfxxxx8RH6xCteD6OzzzzzrB?p=287.
Then I wrote this little program to take the output files and convert them to Movable Type format.
Code
#include <fstream> | |
#include <iostream> | |
#include <iomanip> | |
#include <sstream> | |
#include <string> | |
#include <cctype> | |
| |
using namespace std; | |
| |
int main(int argc,char *argv[]) | |
{ | |
cout << argv[1] << endl; | |
ifstream in; | |
ofstream out; | |
string iname = argv[1]; | |
string ofilename; | |
size_t found_begin, found_end; | |
found_begin = iname.find("p="); | |
ofilename="blog"; | |
ofilename+=iname.substr(found_begin+2); | |
ofilename+=".txt"; | |
cout << ofilename << endl; | |
in.open(argv[1]); | |
out.open(ofilename.c_str()); | |
stringstream data; | |
string d; | |
bool titlef = false; | |
bool bodyf = false; | |
bool datef = false; | |
string title; | |
string body; | |
string hold; | |
string date; | |
string tag; | |
struct tm ttt; | |
if (!in.good() ) cout << "Couldn't open file" << endl; | |
out <<"AUTHOR: Wayne" << endl ; | |
while ( in.good() && !(titlef && bodyf && datef)) | |
{ | |
getline(in,d); | |
if (!titlef) | |
{ | |
found_begin = d.find("<dt class=\"post-head\">"); | |
if (found_begin!=string::npos) | |
{ | |
found_end= d.find("</dt>"); | |
if (found_end!=string::npos) | |
{ | |
title = d.substr(found_begin+22,found_end-found_begin-22); | |
cout <<"TITLE: " << title << endl ; | |
out <<"TITLE: " << title << endl << "STATUS: Publish" << endl << "ALLOW COMMENTS: 1"<<endl; | |
titlef = true; | |
} | |
} | |
} | |
if (!bodyf) | |
{ | |
found_begin = d.find("<div class=\"content-wrapper\">"); | |
if (found_begin!=string::npos) | |
{ | |
getline(in,d); | |
found_end= d.find("<div class=\"foot\">"); | |
while ( in.good() && (found_end==string::npos)) | |
{ | |
hold=hold+d; | |
getline(in,d); | |
found_end= d.find("<div class=\"foot\">"); | |
} | |
body = hold.substr(0,hold.length()-6); | |
cout << "-----" << endl << "BODY: " << endl << body << "-----" << endl ; | |
bodyf = true; | |
} | |
} | |
found_begin=d.find("\"?tag="); | |
if (found_begin!=string::npos) | |
{ | |
found_end=d.find("rel",found_begin)-2; | |
tag="CATEGORY: " + d.substr(found_begin+6,found_end-found_begin-6); | |
out << tag << endl; | |
cout << tag << endl; | |
found_begin=d.find("\"?tag=",found_end); | |
while (found_begin!=string::npos) | |
{ | |
found_end=d.find("rel",found_begin)-2; | |
tag=","; | |
tag="CATEGORY: " + d.substr(found_begin+6,found_end-found_begin-6); | |
out << tag << endl; | |
cout << tag << endl; | |
found_begin=d.find("\"?tag=",found_end); | |
} | |
} | |
if (!datef) | |
{ | |
found_end = d.find("(EDT)"); //change this to match the time zone of your articles | |
if (found_end==string::npos) found_end = d.find("(EST)");//change this to match the time zone of your articles | |
if (found_end!=string::npos) | |
{ | |
found_begin= d.find("<p>"); | |
if (found_begin!=string::npos) | |
{ | |
date = d.substr(found_begin+3,found_end-found_begin-3); | |
datef = true; | |
strptime(date.c_str(),"%a %b %d, %Y - %r", &ttt); | |
char datec[100]; | |
strftime(datec,99,"%m/%d/%Y %R:00",&ttt); | |
date = datec; | |
cout <<"DATE: " << date << endl ; | |
out <<"DATE: " << date << endl ; | |
out << "-----" << endl << "BODY: " << endl << body << endl <<"--------" << endl ; | |
} | |
} | |
} | |
| |
} | |
out.close(); | |
in.close(); | |
} |
Compile and run this code passing the name of the file you want to convert. You can take the files from the previous step and put them into one file with the command
cat blog* >> postings
If you call the program parse the command would be
./parse postings
You can then take the resulting .txt file and import it using the Movable Type import tool of your choice. This file has only been tested on b2evolution and if it doesn't work you have the source, fix it yourself.