1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
| package wordcount
import ( "bufio" "fmt" "io" "log" "os" "sort" "strings" "unicode" "unicode/utf8" )
type Pair struct { Key string Value int }
type PairList []Pair
func (p PairList) Swap(i, j int) { p[i], p[j] = p[j], p[i] } func (p PairList) Len() int { return len(p) } func (p PairList) Less(i, j int) bool { return p[j].Value < p[i].Value }
func SplitOnNonLetters(s string) []string { notALetter := func(char rune) bool { return !unicode.IsLetter(char) } return strings.FieldsFunc(s, notALetter) }
type WordCount map[string]int
func (source WordCount) Merge(wordcount WordCount) WordCount { for k, v := range wordcount { source[k] += v }
return source }
func (wordcount WordCount) Report() { words := make([]string, 0, len(wordcount)) wordWidth, frequencyWidth := 0, 0 for word, frequency := range wordcount { words = append(words, word) if width := utf8.RuneCountInString(word); width > wordWidth { wordWidth = width } if width := len(fmt.Sprint(frequency)); width > frequencyWidth { frequencyWidth = width } } sort.Strings(words) gap := wordWidth + frequencyWidth - len("Word") - len("Frequency") fmt.Printf("Word %*s%s\n", gap, " ", "Frequency") for _, word := range words { fmt.Printf("%-*s %*d\n", wordWidth, word, frequencyWidth, wordcount[word]) } }
func (wordcount WordCount) SortReport() { p := make(PairList, len(wordcount)) i := 0 for k, v := range wordcount { p[i] = Pair{k, v} i++ }
sort.Sort(p)
wordWidth, frequencyWidth := 0, 0 for _, pair := range p { word, frequency := pair.Key, pair.Value if width := utf8.RuneCountInString(word); width > wordWidth { wordWidth = width } if width := len(fmt.Sprint(frequency)); width > frequencyWidth { frequencyWidth = width } } gap := wordWidth + frequencyWidth - len("Word") - len("Frequency") fmt.Printf("Word %*s%s\n", gap, " ", "Frequency")
for _, pair := range p { fmt.Printf("%-*s %*d\n", wordWidth, pair.Key, frequencyWidth, pair.Value) }
}
func (wordcount WordCount) UpdateFreq(filename string) { var file *os.File var err error
if file, err = os.Open(filename); err != nil { log.Println("failed to open the file: ", err) return } defer file.Close()
reader := bufio.NewReader(file) for { line, err := reader.ReadString('\n') for _, word := range SplitOnNonLetters(strings.TrimSpace(line)) { if len(word) > utf8.UTFMax || utf8.RuneCountInString(word) > 1 { wordcount[strings.ToLower(word)] += 1 } } if err != nil { if err != io.EOF { log.Println("failed to finish reading the file: ", err) } break } } }
func (wordcount WordCount) WordFreqCounter(files []string) {
results := make(chan Pair, len(files)) done := make(chan struct{}, len(files))
for i := 0; i < len(files); { go func(done chan<- struct{}, results chan<- Pair, filename string) { wordcount := make(WordCount) wordcount.UpdateFreq(filename) for k, v := range wordcount { pair := Pair{k, v} results <- pair } done <- struct{}{} }(done, results, files[i])
i++ }
for working := len(files); working > 0; { select { case pair := <-results: wordcount[pair.Key] += pair.Value
case <-done: working--
} }
DONE: for { select { case pair := <-results: wordcount[pair.Key] += pair.Value default: break DONE } }
close(results) close(done)
}
|