やりたいこと
jQuery Deferredを使ってコールバックを順番に処理する。
プログラム
食べログAPIの内容をmongoDBに入れるプログラムを作成した。
画像APIにもアクセスするため、通常のコールバックだと階層がやたら深くなる。
ところが、jQuery Deferredのおかげで大分見やすくなっている。
#!/usr/bin/env node
//===============================================
// Load Libraries
var cheerio = require("cheerio");
var request = require("request");
var yaml = require("yaml");
var fs = require("fs");
var command = require("commander");
var mongoose = require("mongoose");
var log4js = require('log4js');
var jquery = require('jquery');
//===============================================
// Logging Setting
log4js.configure({
appenders: [{
"type": "dateFile",
"filename": "./logs/insert_tabelog_datas.log",
"pattern": "-yyyy-MM-dd"
}]
});
var logger = log4js.getLogger("dateFile");
//===============================================
// Load config file
var configData = fs.readFileSync("./config/config.yml","utf8");
var config = yaml.eval(configData);
//===============================================
// Make request URL
var requestBaseUrl = config.tabelog_api.base_url;
var apiKey = config.tabelog_api.api_key;
// paramter
// parse command options and create help command automatically
command
.version('1.0.0')
.usage('[option]')
.option('-p, --prefecture <String>', 'prefecture name (default japan)')
.option('-n, --pagenum <n>', 'page number (max 60)', parseInt)
.parse(process.argv);
var pageNumber = 1;
var prefecture = "japan";
if (command.prefecture) prefecture = command.prefecture;
if (command.pagenum) pageNumber = command.pagenum;
var queryParams = new Array();
queryParams.push("Prefecture=" + prefecture);
queryParams.push("PageNum=" + pageNumber);
queryParams.push("ResultSet=large");
queryParams.push("Key=" + apiKey);
// join
var requestUrl = requestBaseUrl + "?" + queryParams.join("&");
//===============================================
// DB setting
var db = mongoose.connect('mongodb://' + config.event_db.host + '/' + config.event_db.event_data,
function (err) {
if (err) {
logger.error("Connection Fail. mongodb://" + config.event_db.host + "/" + config.event_db.event_data);
} else {
logger.info("Connection Success!");
}
}
);
var InsertTabelogDataConfigSchema = new mongoose.Schema({
cur_prefecture : { type: Number, default: 0 },
cur_pagenum : Number,
max_pagenum : Number,
created_time : { type: Date, default: Date.now },
update_time : { type: Date, default: Date.now }
});
var EventsSchema = new mongoose.Schema({
event_id : { type: Number, default: 0 },
genre_id : Number,
title : String,
image : String,
description : String,
url : String,
station : String,
address : String,
business_hour: String,
holiday : String,
latitude : Number,
longitude : Number,
created_time : { type: Date, default: Date.now },
update_time : { type: Date, default: Date.now }
});
EventsSchema.pre('save', function(next) {
if(!this.isNew) return next();
var model = this;
model.db.db.executeDbCommand({
findAndModify: 'current_event_id', // 'コマンド名': '対象のコレクション名'
query: { name: model.collection.name }, // 検索オプション
update: { $set: { name: model.collection.name }, $inc: { sequence: 1 } },
new: true, // 更新したデータを受け取るかどうか
upsert: true // 見つからなかったら挿入するかどうか
}, function(err, data) {
if(!err && data.documents[0].ok) {
// model.id に取得した値をセット
model.event_id = data.documents[0].value.sequence;
next();
} else {
next(err || new Error(data.documents[0].errmsg));
}
});
});
var Events = db.model("events_001s", EventsSchema);
//===============================================
// Get xml data about restaurant from tabelogAPI
function fetch_tabelogAPI_response(api_request_url) {
var deferred = jquery.Deferred();
request({url: api_request_url}, function(error, response, body)
{
if (!error && response.statusCode == 200) {
logger.info("response statusCode : " + response.statusCode);
$ = cheerio.load(body, {ignoreWhitespace: true, xmlMode: true});
var url = response.request.href;
var latest_id = 0;
return deferred.resolve($);
} else {
deferred.reject(new Error(error));
}
});
return deferred.promise();
}
//===============================================
// Parse response data from tabelogAPI
function parse_tabelogAPI_response(response) {
var restaurant_data = new Array();
response("Item").each(function(i, xmlItem) {
var rcd = $(xmlItem).children()[0]["children"][0]["data"];
var data = {};
data["genre_id"] = 1;
data["title"] = $(xmlItem).children()[1]["children"][0]["data"];
data["image"] = "";
data["description"] = "";
data["url"] = $(xmlItem).children()[2]["children"][0]["data"];
data["station"] = $(xmlItem).children()[12]["children"][0]["data"];
data["address"] = $(xmlItem).children()[13]["children"][0]["data"];
data["business_hour"] = $(xmlItem).children()[15]["children"][0]["data"];
data["holiday"] = $(xmlItem).children()[16]["children"][0]["data"];
data["latitude"] = parseFloat($(xmlItem).children()[17]["children"][0]["data"]);
data["longitude"] = parseFloat($(xmlItem).children()[18]["children"][0]["data"]);
data["rcd"] = rcd;
restaurant_data.push(data);
logger.info("Retrieve data: " + i);
logger.info(data);
logger.info("Rcd : " + rcd);
});
return restaurant_data;
}
//===============================================
// Get image URL from tabelogAPI
var image_api_base_url = "http://api.tabelog.com/Ver1/ReviewImageSearch/?Key=" + apiKey + "&Rcd=";
function fetch_restaurant_image_url(data) {
var deferred = jquery.Deferred();
var arg = new Array(2);
arg[0] = 0;
arg[1] = data;
var result = parse_tabelog_imageAPI_response(arg);
for (var idx = 1; idx < data.length; idx++) {
result = result.then(parse_tabelog_imageAPI_response);
}
result.then(function (arg) {
return deferred.resolve(arg[1]);
});
return deferred.promise();
}
function parse_tabelog_imageAPI_response(data) {
var deferred = jquery.Deferred();
var idx = data[0];
var info = data[1];
var image_api_url = image_api_base_url + info[idx]["rcd"];
request({url: image_api_url}, function (error, response, body)
{
if (!error && response.statusCode == 200) {
logger.info("response statusCode : " + response.statusCode);
$ = cheerio.load(body, {ignoreWhitespace: true, xmlMode: true});
info[idx]["image"] = $("Item").children()[2]["children"][0]["data"];
logger.info("image url of rcd " + info[idx]["rcd"] + " : " + info[idx]["image"]);
var result = new Array(2);
result[0] = idx + 1;
result[1] = info;
return deferred.resolve(result);
} else {
return deferred.reject(new Error(error));
}
});
return deferred.promise();
}
//===============================================
// Get restaurant description:
function fetch_restaurant_description(data) {
var deferred = jquery.Deferred();
var arg = new Array(2);
arg[0] = 0;
arg[1] = data;
var result = parse_tabelog_description_response(arg);
for (var idx = 1; idx < data.length; idx++) {
result = result.then(parse_tabelog_description_response);
}
result.then(function (arg) {
return deferred.resolve(arg[1]);
});
return deferred.promise();
}
function parse_tabelog_description_response(data) {
var deferred = jquery.Deferred();
var idx = data[0];
var info = data[1];
request({url: info[idx]["url"]}, function (error, response, body)
{
if (!error && response.statusCode == 200) {
logger.info("response statusCode : " + response.statusCode);
$ = cheerio.load(body, {ignoreWhitespace: true, xmlMode: true});
info[idx]["description"] = $("p.comment").first().text();
var result = new Array(2);
result[0] = idx + 1;
result[1] = info;
return deferred.resolve(result);
} else {
return deferred.reject(new Error(error));
}
});
return deferred.promise();
}
//===============================================
// Delete unuseful field
function delete_unuseful_field(data) {
var deferred = jquery.Deferred();
for (var idx = 0; idx < data.length; idx++) {
delete data[idx]["rcd"];
}
return deferred.resolve(data);
}
//===============================================
// Insert data for mongo
function insert_restaurant_data(data) {
var deferred = jquery.Deferred();
for (var idx = 0; idx < data.length; idx++) {
var newPost = new Events(data[idx]);
newPost.save(function(err) {
if (err) {
logger.error("insert error :" + data[idx]);
} else {
logger.info("insert is successful with " + data[idx]);
}
});
}
return deferred.resolve(data);
}
//===============================================
// [Main] Analyze response
logger.info("request url: " + requestUrl);
// Fetch restaurant data
fetch_tabelogAPI_response(requestUrl)
// Parse the data
.then(parse_tabelogAPI_response)
// Get image URL from tabeloAPI
.then(fetch_restaurant_image_url)
// Get description from restaurant page
.then(fetch_restaurant_description)
// Delete unuseful field of data
.then(delete_unuseful_field)
// Insert retrieval data to mongo
.then(insert_restaurant_data)
// output
.then(function(data) {
console.log(data);
});