Wayne D's Blog

My Digital Domain

How to get your blog posts off Yahoo 360

01/20/08

How to get your blog posts off Yahoo 360

04:41:20 pm by wdawe, Categories: uncategorized

To move your blog posts someplace else you first need to get them off of Yahoo 360. If you only have a few you can edit the posts and then copy and paste them into your new blog. If you have a bunch that may prove to be impractical. I'm going to tell you how I converted my blog posts to a Movable type import file which can be imported by many different types of blog software.

First thing is you need linux. Find out the permanent URL of your last blog posting and record it somewhere, then make a new directory and copy the following script into the directory. Name the script whatever you want but remember to set the execute bit, in my case I called it get.

Code

#!/bin/sh
# call as get "last blog post url"
wget "$1 "
i=$(echo "$1" | sed -n -e 's|http://*.360.yahoo.com/\(.*\)$|\1|p')
echo $i
while [ -n $i ]  ; do
        n=$(sed -n -e 's|<span>Previous Post: <a href="\(.*\)">.*$|\1|p' $i)
        echo $n
        wget $n
        i=$(echo $n | sed -n -e 's|http.*\(blog.*\)|\1|p')
        while [ ! -e $i ] ; do
                wget $n
                i=$(echo $n | sed -n -e 's|http.*\(blog.*\)|\1|p')
        done
done

Execute the script using the url of your last blog post as the argument
./get http://ca.blog.360.yahoo.com/blog-hwLjH.Myfxxxx8RH6xCteD6OzzzzzrB?p=287

When the script finishes you will have all your blog postings saved as HTML files in the directory you ran it from with names the same as the url minus the http and the domain, the posting shown as an example will become blog-hwLjH.Myfxxxx8RH6xCteD6OzzzzzrB?p=287.

Then I wrote this little program to take the output files and convert them to Movable Type format.

Code

#include <fstream>
#include <iostream>
#include <iomanip>
#include <sstream>
#include <string>
#include <cctype>
 
using namespace std;
 
int main(int argc,char *argv[])
{
 cout << argv[1] << endl;
 ifstream in;
 ofstream out;
 string iname = argv[1];
 string ofilename;
 size_t found_begin, found_end;
found_begin = iname.find("p=");
 ofilename="blog";
 ofilename+=iname.substr(found_begin+2);
 ofilename+=".txt";
 cout << ofilename << endl;
 in.open(argv[1]);
 out.open(ofilename.c_str());
 stringstream data;
 string d;
 bool titlef = false;
 bool bodyf = false;
 bool datef = false;
 string title;
 string body;
 string hold;
 string date;
 string tag;
 struct tm ttt;
 if (!in.good() ) cout << "Couldn't open file" << endl;
 out <<"AUTHOR: Wayne" << endl ;
 while ( in.good() && !(titlef && bodyf && datef))
 {
        getline(in,d);
        if (!titlef)
        {
                found_begin = d.find("<dt class=\"post-head\">");
                if (found_begin!=string::npos)
                {
                        found_end= d.find("</dt>");
                        if (found_end!=string::npos)
                        {
                            title = d.substr(found_begin+22,found_end-found_begin-22);
                                cout <<"TITLE: " <<  title << endl ;
                                out <<"TITLE: " <<  title << endl << "STATUS: Publish" << endl << "ALLOW COMMENTS: 1"<<endl;
                                titlef = true;
                        }
                }
        }
        if (!bodyf)
        {
                found_begin = d.find("<div class=\"content-wrapper\">");
                if (found_begin!=string::npos)
                {
                getline(in,d);
                found_end= d.find("<div class=\"foot\">");
                while ( in.good() && (found_end==string::npos))
                {
                        hold=hold+d;
                        getline(in,d);
                        found_end= d.find("<div class=\"foot\">");
                }
            body = hold.substr(0,hold.length()-6);
                cout <<  "-----" << endl << "BODY: " << endl << body << "-----" << endl ;
                bodyf = true;
         }
   }
   found_begin=d.find("\"?tag=");
   if (found_begin!=string::npos)
   {
                found_end=d.find("rel",found_begin)-2;
                tag="CATEGORY: " + d.substr(found_begin+6,found_end-found_begin-6);
                out << tag << endl;
                cout << tag << endl;
                found_begin=d.find("\"?tag=",found_end);
                while (found_begin!=string::npos)
                {
                        found_end=d.find("rel",found_begin)-2;
                        tag=",";
                        tag="CATEGORY: " + d.substr(found_begin+6,found_end-found_begin-6);
                        out << tag << endl;
                        cout << tag << endl;
                        found_begin=d.find("\"?tag=",found_end);
                }
   }
        if (!datef)
        {
                found_end = d.find("(EDT)"); //change this to match the time zone of your articles
                if (found_end==string::npos) found_end = d.find("(EST)");//change this to match the time zone of your articles
                if (found_end!=string::npos)
                {
                found_begin= d.find("<p>");
                if (found_begin!=string::npos)
                {
                   date = d.substr(found_begin+3,found_end-found_begin-3);
                   datef = true;
                   strptime(date.c_str(),"%a %b %d, %Y - %r", &ttt);
                   char datec[100];
                   strftime(datec,99,"%m/%d/%Y %R:00",&ttt);
                   date = datec;
                        cout <<"DATE: " << date << endl ;
                        out <<"DATE: " << date << endl ;
                        out << "-----" << endl << "BODY: " << endl << body << endl <<"--------" << endl ;
                }
         }
   }
 
 }
 out.close();
 in.close();
}

Compile and run this code passing the name of the file you want to convert. You can take the files from the previous step and put them into one file with the command
cat blog* >> postings
If you call the program parse the command would be
./parse postings
You can then take the resulting .txt file and import it using the Movable Type import tool of your choice. This file has only been tested on b2evolution and if it doesn't work you have the source, fix it yourself.

Permalink

No feedback yet

Form is loading...

Comment feed for this post

Wayne D's Blog

Cool web tools, EEPC tips and Linux info. Browse around, I'm sure you will find something to interest you.

Search

XML Feeds

RSS 2.0: Posts, Comments
Atom: Posts, Comments

What is RSS?

	`#!/bin/sh`
	`# call as get "last blog post url"`
	`wget "$1 "`
	`i=$(echo "$1" \| sed -n -e 's\|http://.360.yahoo.com/\(.\)$\|\1\|p')`
	`echo $i`
	`while [ -n $i ] ; do`
	`n=$(sed -n -e 's\|<span>Previous Post: <a href="\(.\)">.$\|\1\|p' $i)`
	`echo $n`
	`wget $n`
	`i=$(echo $n \| sed -n -e 's\|http.\(blog.\)\|\1\|p')`
	`while [ ! -e $i ] ; do`
	`wget $n`
	`i=$(echo $n \| sed -n -e 's\|http.\(blog.\)\|\1\|p')`
	`done`
	`done`

	`#include <fstream>`
	`#include <iostream>`
	`#include <iomanip>`
	`#include <sstream>`
	`#include <string>`
	`#include <cctype>`

	`using namespace std;`

	`int main(int argc,char *argv[])`
	`{`
	`cout << argv[1] << endl;`
	`ifstream in;`
	`ofstream out;`
	`string iname = argv[1];`
	`string ofilename;`
	`size_t found_begin, found_end;`
	`found_begin = iname.find("p=");`
	`ofilename="blog";`
	`ofilename+=iname.substr(found_begin+2);`
	`ofilename+=".txt";`
	`cout << ofilename << endl;`
	`in.open(argv[1]);`
	`out.open(ofilename.c_str());`
	`stringstream data;`
	`string d;`
	`bool titlef = false;`
	`bool bodyf = false;`
	`bool datef = false;`
	`string title;`
	`string body;`
	`string hold;`
	`string date;`
	`string tag;`
	`struct tm ttt;`
	`if (!in.good() ) cout << "Couldn't open file" << endl;`
	`out <<"AUTHOR: Wayne" << endl ;`
	`while ( in.good() && !(titlef && bodyf && datef))`
	`{`
	`getline(in,d);`
	`if (!titlef)`
	`{`
	`found_begin = d.find("<dt class=\"post-head\">");`
	`if (found_begin!=string::npos)`
	`{`
	`found_end= d.find("</dt>");`
	`if (found_end!=string::npos)`
	`{`
	`title = d.substr(found_begin+22,found_end-found_begin-22);`
	`cout <<"TITLE: " << title << endl ;`
	`out <<"TITLE: " << title << endl << "STATUS: Publish" << endl << "ALLOW COMMENTS: 1"<<endl;`
	`titlef = true;`
	`}`
	`}`
	`}`
	`if (!bodyf)`
	`{`
	`found_begin = d.find("<div class=\"content-wrapper\">");`
	`if (found_begin!=string::npos)`
	`{`
	`getline(in,d);`
	`found_end= d.find("<div class=\"foot\">");`
	`while ( in.good() && (found_end==string::npos))`
	`{`
	`hold=hold+d;`
	`getline(in,d);`
	`found_end= d.find("<div class=\"foot\">");`
	`}`
	`body = hold.substr(0,hold.length()-6);`
	`cout << "-----" << endl << "BODY: " << endl << body << "-----" << endl ;`
	`bodyf = true;`
	`}`
	`}`
	`found_begin=d.find("\"?tag=");`
	`if (found_begin!=string::npos)`
	`{`
	`found_end=d.find("rel",found_begin)-2;`
	`tag="CATEGORY: " + d.substr(found_begin+6,found_end-found_begin-6);`
	`out << tag << endl;`
	`cout << tag << endl;`
	`found_begin=d.find("\"?tag=",found_end);`
	`while (found_begin!=string::npos)`
	`{`
	`found_end=d.find("rel",found_begin)-2;`
	`tag=",";`
	`tag="CATEGORY: " + d.substr(found_begin+6,found_end-found_begin-6);`
	`out << tag << endl;`
	`cout << tag << endl;`
	`found_begin=d.find("\"?tag=",found_end);`
	`}`
	`}`
	`if (!datef)`
	`{`
	`found_end = d.find("(EDT)"); //change this to match the time zone of your articles`
	`if (found_end==string::npos) found_end = d.find("(EST)");//change this to match the time zone of your articles`
	`if (found_end!=string::npos)`
	`{`
	`found_begin= d.find("<p>");`
	`if (found_begin!=string::npos)`
	`{`
	`date = d.substr(found_begin+3,found_end-found_begin-3);`
	`datef = true;`
	`strptime(date.c_str(),"%a %b %d, %Y - %r", &ttt);`
	`char datec[100];`
	`strftime(datec,99,"%m/%d/%Y %R:00",&ttt);`
	`date = datec;`
	`cout <<"DATE: " << date << endl ;`
	`out <<"DATE: " << date << endl ;`
	`out << "-----" << endl << "BODY: " << endl << body << endl <<"--------" << endl ;`
	`}`
	`}`
	`}`

	`}`
	`out.close();`
	`in.close();`
	`}`