2013年3月9日土曜日

開発環境

Real World Haskell』(Bryan O'SullivanJohn GoerzenDon Stewart(著)、山下 伸夫伊東 勝利株式会社タイムインターメディア(翻訳)、オライリー・ジャパン、2009年、ISBN978-4-87311-423-3)の8章(効率的なファイル処理、正規表現、ファイル名マッチング)の8.7(作成したパターン照合器を使う)の練習問題3.を解いてみる。

3.

コード(BBEdit)

GlobRegex.hs

-- file: GlobRegex.hs
module GlobRegex
    (
      globToRegex
    , matchesGlob
    ) where

import Data.Char (toUpper)
import Text.Regex.Posix ((=~))

-- Trueの場合は大文字小文字を区別しない
-- Flaseの場合は大文字小文字を区別する
type Ignore = Bool
globToRegex :: String -&lgt; Ignore -&lgt; String
globToRegex cs i | i = '^': (map toUpper (globToRegex' cs)) ++ "$"
                 | otherwise = '^': globToRegex' cs ++ "$"

globToRegex' :: String  -&lgt; String
globToRegex' "" = ""
-- 拡張ワイルドカード構文用に追加
globToRegex' ('*':'*':cs) = ".*" ++ globToRegex' cs
globToRegex' ('*':cs) = ".*" ++ globToRegex' cs
globToRegex' ('?':cs) = '.' : globToRegex' cs
globToRegex' ('[':'!':c:cs) = "[^" ++ c:charClass cs
globToRegex' ('[':c:cs) = '[':c:charClass cs
globToRegex' ('[':_) = error "unterminated character class"

globToRegex' (c:cs) = escape c ++ globToRegex' cs

escape :: Char -&lgt; String
escape c | c `elem` regexChars = '\\':[c]
         | otherwise = [c]
    where regexChars = "\\+()^$.{}]|"

charClass :: String -&lgt; String
charClass (']':cs) = ']' : globToRegex' cs
charClass (c:cs) = c: charClass cs
charClass [] = error "unterminated character class"

matchesGlob :: Ignore -&lgt; String -&lgt; FilePath -&lgt; Bool
matchesGlob i pat name | i = (map toUpper name) =~ globToRegex pat i
                       | otherwise = name =~ globToRegex pat i

Glob.hs

-- file: Glob.hs
module Glob (namesMatching) where

import Data.List (isInfixOf)
import System.Directory (doesDirectoryExist, doesFileExist,
                         getCurrentDirectory, getDirectoryContents)
import System.FilePath (dropTrailingPathSeparator, splitFileName, (</&lgt;), pathSeparator)
import System.Posix.Files (fileExist)
-- Control.Exceptionだと上手くいかなかったのでOldExceptionに修正
-- ただし、将来のバージョンでは使えなくなるみたい(その旨が警告された)
-- かといって、今のところ修正方法が分からない。。
import Control.OldException (handle)
import Control.Monad (forM)
import GlobRegex (matchesGlob)

isPattern :: String -&lgt; Bool
isPattern = any (`elem`"[*?")

-- 拡張ワイルドカード構文か調べる
isDeep :: String -&lgt; Bool
isDeep = isInfixOf "**"

namesMatching pat i
    | not (isPattern pat) = do
-- 書き換えた箇所
      exists <- fileExist pat
      return (if exists then [pat] else [])
-- 拡張ワイルドカード構文の場合
    | isDeep pat = do
      names <- getAllFileNames $ head pat
      return (filter (matchesGlob i pat) names)
    | otherwise = do
      case splitFileName pat of
          ("", baseName) -&lgt; do
              curDir <- getCurrentDirectory
              listMatches curDir baseName
          (dirName, baseName) -&lgt; do
              dirs <- if isPattern dirName
                          then namesMatching (dropTrailingPathSeparator dirName) i
                          else return [dirName]
              let listDir = if isPattern baseName
                                then listMatches
                                else listPlain
              pathNames <- forM dirs $ \dir -&lgt; do
                                baseNames <- listDir dir baseName
                                return (map (dir </&lgt;) baseNames)
              return (concat pathNames)
getAllFileNames :: Char -&lgt; IO [FilePath]
getAllFileNames c = do
    contents <- getDirectoryContents "."
    getAllFileNames' (return []) "" contents
    where start | c == pathSeparator = [pathSeparator]
                | otherwise = "."
          pre | c == pathSeparator = [pathSeparator]
              | otherwise = ""
getAllFileNames' :: IO[FilePath] -&lgt; FilePath -&lgt; [FilePath] -&lgt; IO [FilePath]
getAllFileNames' x _ [] = x
getAllFileNames' x pre (f:fs) | f == "." = getAllFileNames' x pre fs
                              | f == ".." = getAllFileNames' x pre fs
                              | otherwise = do
                                  exists <- doesDirectoryExist (pre ++ f)
                                  contents <- if exists
                                              then getDirectoryContents (pre ++ f)
                                              else return []
                                  if exists
                                  then do
                                      rest <- (getAllFileNames' x pre fs)
                                      recur <- (getAllFileNames' (return []) (pre ++ f ++ [pathSeparator]) contents)
                                      return (rest ++ recur)
                                  else do
                                      x' <- x
                                      getAllFileNames' (return ((pre ++ f):x')) pre fs
{-
doesNameExist :: FilePath -&lgt; IO Bool
doesNameExist name = do
    fileExists <- doesFileExist name
    if fileExists
      then return True
      else doesDirectoryExist name
-}

listMatches :: FilePath -&lgt; String -&lgt; IO [String]
listMatches dirName pat = do
    dirName' <- if null dirName
                then getCurrentDirectory
                else return dirName
    handle (const (return [])) $ do
        names <- getDirectoryContents dirName'
        let names' = if isHidden pat
                     then filter isHidden names
                     else filter (not . isHidden) names
-- ここのpathSeparatorでUNIXかWindowsか判定
        let i = if pathSeparator == '/'
                then True
                else False
        return (filter (matchesGlob i pat) names')

isHidden :: String -&lgt; Bool
isHidden ('.':_) = True
isHidden _ = False

listPlain :: FilePath -&lgt; String -&lgt; IO [String]
listPlain dirName baseName = do
    exists <- if null baseName
                then doesDirectoryExist dirName
-- 書き換えた箇所
                else fileExist (dirName </&lgt; baseName)
    return (if exists then [baseName] else [])

入出力結果(Terminal, ghci)

$ ghci
GHCi, version 7.4.2: http://www.haskell.org/ghc/  :? for help
Loading package ghc-prim ... linking ... done.
Loading package integer-gmp ... linking ... done.
Loading package base ... linking ... done.
Prelude> :load Glob
[1 of 2] Compiling GlobRegex        ( GlobRegex.hs, interpreted )
[2 of 2] Compiling Glob             ( Glob.hs, interpreted )

Glob.hs:12:1:
    Warning: Module `Control.OldException' is deprecated:
               Future versions of base will not support the old exceptions style. Please switch to extensible exceptions.

Glob.hs:12:30:
    Warning: In the use of `handle'
             (imported from Control.OldException):
             Deprecated: "Future versions of base will not support the old exceptions style. Please switch to extensible exceptions."
Ok, modules loaded: Glob, GlobRegex.
*Glob> namesMatching "*.txt" False
Loading package array-0.4.0.0 ... linking ... done.
Loading package bytestring-0.9.2.1 ... linking ... done.
Loading package deepseq-1.3.0.0 ... linking ... done.
Loading package containers-0.4.2.1 ... linking ... done.
Loading package transformers-0.3.0.0 ... linking ... done.
Loading package mtl-2.1.2 ... linking ... done.
Loading package regex-base-0.93.2 ... linking ... done.
Loading package regex-posix-0.95.2 ... linking ... done.
Loading package filepath-1.3.0.0 ... linking ... done.
Loading package old-locale-1.0.0.4 ... linking ... done.
Loading package old-time-1.1.0.0 ... linking ... done.
Loading package unix-2.5.1.1 ... linking ... done.
Loading package directory-1.1.0.2 ... linking ... done.
["./gpl-3.0.txt","./hello-in.txt","./in.txt","./input.txt","./out.txt","./out_tmp.txt","./output.txt","./quux.txt","./Scratchpad.txt","./tmp.txt"]
*Glob> namesMatching "**.txt" False
["tmp.txt","Scratchpad.txt","quux.txt","output.txt","out_tmp.txt","out.txt","input.txt","in.txt","hello-in.txt","gpl-3.0.txt","sample_folder/tmp.txt"]
*Glob> :quit
Leaving GHCi.
$

エラー、修正の繰り返しで途中から自分自身でも何を書いてるのか分からなくなったけど、結果だけ見ると、とりあえずはこれで合ってるのかな。。

0 コメント:

コメントを投稿