Contents
HaskellでGETするにはNetwork.HTTPモジュールが便利。http://suggestqueries.google.com/complete/search?hl=ja&ie=utf_8&oe=utf_8&client=toolbar&q=[search words]
ここで利用している関数の型は以下。import Control.Monad.Trans (liftIO) import Network.HTTP main = do resultxml <- liftIO $ (Network.HTTP.simpleHTTP (Network.HTTP.getRequest ("http://suggestqueries.google.com/complete/search?hl=ja&ie=utf_8&oe=utf_8&client=toolbar&q=" ++ urlEncode("haskell cabal"))) >>= getResponseBody) putStrLn resultxml
取得した結果のXMLは以下の様な感じ。Prelude> import Network.HTTP Prelude Network.HTTP> :t Network.HTTP.simpleHTTP Network.HTTP.simpleHTTP :: HStream ty => Request ty -> IO (Network.Stream.Result (Response ty)) Prelude Network.HTTP> :t Network.HTTP.getRequest Network.HTTP.getRequest :: String -> Request_String Prelude Network.HTTP> :t getResponseBody getResponseBody :: Network.Stream.Result (Response ty) -> IO ty Prelude Network.HTTP> :t urlEncode urlEncode :: String -> String
文字化けしてしまっているので、<?xml version="1.0"?> <toplevel> <CompleteSuggestion> <suggestion data="haskell cabal" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal sandbox" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal install" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal ã¢ãããã¼ã" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal windows" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal ä¾åé¢ä¿" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal 使ã æ¹" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal ã¢ã³ã¤ã³ã¹ãã¼ã«" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal proxy" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal github" /> </CompleteSuggestion> </toplevel>
ちなみに、そのまんまだけど、”decodeString” 関数の型は以下。import Control.Monad.Trans (liftIO) import Network.HTTP import Codec.Binary.UTF8.String as S main = do resultxml <- liftIO $ (Network.HTTP.simpleHTTP (Network.HTTP.getRequest ("http://suggestqueries.google.com/complete/search?hl=ja&ie=utf_8&oe=utf_8&client=toolbar&q=" ++ urlEncode("haskell cabal"))) >>= getResponseBody) putStrLn $ S.decodeString resultxml
文字化け解消後の結果は以下。Prelude> import Codec.Binary.UTF8.String as S Prelude S> :t S.decodeString S.decodeString :: String -> String
<?xml version="1.0"?> <toplevel> <CompleteSuggestion> <suggestion data="haskell cabal" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal sandbox" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal install" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal アップデート" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal windows" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal 依存関係" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal 使い方" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal アンインストール" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal proxy" /> </CompleteSuggestion> <CompleteSuggestion> <suggestion data="haskell cabal github" /> </CompleteSuggestion> </toplevel>
ここでは、Text.XML.Lightモジュールを利用してパージングする。 手始めに、”CompleteSuggestion”要素を取り出してみる。<?xml version="1.0"?> <toplevel> <CompleteSuggestion> <suggestion data="haskell cabal" /> </CompleteSuggestion> </toplevel>
ここで利用している関数の型は以下。import Text.XML.Light main = do case parseXMLDoc "<?xml version=\"1.0\"?><toplevel><CompleteSuggestion><suggestion data=\"haskell cabal\" /></CompleteSuggestion></toplevel>" of Nothing -> putStrLn "Error" Just root -> putStrLn $ show $ findChildren (unqual "CompleteSuggestion") root
そして結果は以下。Prelude> import Text.XML.Light Prelude Text.XML.Light> :t parseXMLDoc parseXMLDoc :: Text.XML.Light.Lexer.XmlSource s => s -> Maybe Element Prelude Text.XML.Light> parseXMLDoc "<?xml version=\"1.0\"?><toplevel><CompleteSuggestion><suggestion data=\"haskell cabal\" /></CompleteSuggestion></toplevel>" Just ( Element { elName = QName { qName = "toplevel", qURI = Nothing, qPrefix = Nothing }, elAttribs = [], elContent = [ Elem ( Element { elName = QName { qName = "CompleteSuggestion", qURI = Nothing, qPrefix = Nothing }, elAttribs = [], elContent = [ Elem ( Element { elName = QName { qName = "suggestion", qURI = Nothing, qPrefix = Nothing }, elAttribs = [ Attr { attrKey = QName { qName = "data", qURI = Nothing, qPrefix = Nothing }, attrVal = "haskell cabal" } ], elContent = [], elLine = Just 1 } ) ], elLine = Just 1 } ) ], elLine = Just 1 } ) Prelude Text.XML.Light> :t unqual unqual :: String -> QName Prelude Text.XML.Light> unqual "CompleteSuggestion" QName {qName = "CompleteSuggestion", qURI = Nothing, qPrefix = Nothing} Prelude Text.XML.Light> :t findChildren findChildren :: QName -> Element -> [Element]
“CompleteSuggestion”要素だけ取得できているから、[ Element { elName = QName { qName = "CompleteSuggestion", qURI = Nothing, qPrefix = Nothing }, elAttribs = [], elContent = [ Elem ( Element { elName = QName { qName = "suggestion", qURI = Nothing, qPrefix = Nothing }, elAttribs = [ Attr { attrKey = QName { qName = "data", qURI = Nothing, qPrefix = Nothing }, attrVal = "haskell cabal" } ], elContent = [], elLine = Just 1 } ) ], elLine = Just 1 } ]
ここで使った”findChild”関数の型は以下。import Text.XML.Light main = do case parseXMLDoc "<?xml version=\"1.0\"?><toplevel><CompleteSuggestion><suggestion data=\"haskell cabal\" /></CompleteSuggestion></toplevel>" of Nothing -> putStrLn "Error" Just root -> putStrLn $ show $ findChild (unqual "suggestion") (findChildren (unqual "CompleteSuggestion") root !! 0)
取得結果は以下。Prelude Text.XML.Light> :t findChild findChild :: QName -> Element -> Maybe Element
Just ( Element { elName = QName { qName = "suggestion", qURI = Nothing, qPrefix = Nothing }, elAttribs = [ Attr { attrKey = QName { qName = "data", qURI = Nothing, qPrefix = Nothing }, attrVal = "haskell cabal" } ], elContent = [], elLine = Just 1 } )
“elAttribs”の型はこんな感じ。import Text.XML.Light main = do case parseXMLDoc "<?xml version=\"1.0\"?><toplevel><CompleteSuggestion><suggestion data=\"haskell cabal\" /></CompleteSuggestion></toplevel>" of Nothing -> putStrLn "Error" Just root -> putStrLn $ show $ case findChild (unqual "suggestion") (findChildren (unqual "CompleteSuggestion") root !! 0) of Nothing -> [] Just elem -> elAttribs elem
これで属性の配列が取得できる。Prelude Text.XML.Light> :t elAttribs elAttribs :: Element -> [Attr]
目的の値は”attrVal”のところにあるから、[ Attr { attrKey = QName { qName = "data", qURI = Nothing, qPrefix = Nothing }, attrVal = "haskell cabal" } ]
これで取得できるようになった。main = do case parseXMLDoc "<?xml version=\"1.0\"?><toplevel><CompleteSuggestion><suggestion data=\"haskell cabal\" /></CompleteSuggestion></toplevel>" of Nothing -> putStrLn "Error" Just root -> putStrLn $ show $ case findChild (unqual "suggestion") (findChildren (unqual "CompleteSuggestion") root !! 0) of Nothing -> "" Just elem -> let [Attr key val] = elAttribs elem in val
"haskell cabal"
これで完成。import Control.Monad.Trans (liftIO) import Network.HTTP import Codec.Binary.UTF8.String as S import Data.List import Text.XML.Light getText :: Element -> String -> String getText parent childName = case findChild (unqual childName) parent of Nothing -> "" Just child -> let [Attr key val] = elAttribs child in val parseCompleteSuggestionTag :: Element -> String parseCompleteSuggestionTag element = getText element "suggestion" main = do resultxml <- liftIO $ (Network.HTTP.simpleHTTP (Network.HTTP.getRequest ("http://suggestqueries.google.com/complete/search?hl=ja&ie=utf_8&oe=utf_8&client=toolbar&q=" ++ urlEncode("haskell cabal"))) >>= getResponseBody) case parseXMLDoc resultxml of Nothing -> putStrLn "parse error" Just root -> putStrLn ("[\"" ++ (Data.List.intercalate "\",\"" $ map S.decodeString $ map (\a -> parseCompleteSuggestionTag a) (findChildren (unqual "CompleteSuggestion") root)) ++ "\"]")
HaskellのウェブフレームワークYesodとこんな感じのXMLパージングスクリプト使って[ "haskell cabal", "haskell cabal sandbox", "haskell cabal install", "haskell cabal アップデート", "haskell cabal windows", "haskell cabal 依存関係", "haskell cabal 使い方", "haskell cabal アンインストール", "haskell cabal proxy", "haskell cabal github" ]