用线程计数不同的单词 [英] Counting distinct words with Threads

查看:89
本文介绍了用线程计数不同的单词的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

目标是计算文件中的不同单词.

更新:以前的代码已成功完成.现在我必须做同样的事情,但是要使用 threads (哦,伙计,我讨厌它们...),此外,我还想使用 semaphores 来实现更好的流程. /p>

代码包含以前尝试中遗漏的一些额外内容,我正在尝试找出可以使用的内容.

我一次只能读一个单词,但是在大多数情况下,我在容器中看到一个空"字.因此,直到我一直从容器中得到任何东西之前,我都无法测试 Sorter 类,依此类推...

该程序的新增功能是 WordContainer 类,用于存储一个单词,以将其从阅读器传递到排序器:

 package main2;

import java.util.ArrayList;

public class WordContainer
{
   private ArrayList<String> words;

   public synchronized String take()
   {
      String nextWord = null;
      while (words.isEmpty())
      {
         try
         {
            wait();
         }
         catch (InterruptedException e)
         {
         }
      }

      nextWord = words.remove(0);

      notify();
      return nextWord;
   }

   public synchronized void put(String word)
   {
      while (words.size() > 999)
      {
         try
         {
            wait();
         }
         catch (InterruptedException e)
         {
         }
      }
      words.add(word);
      notify();
   }
}

DataSet类与Sorter方法结合使用,生成 Sorter 类:

    package main2;

import java.util.concurrent.Semaphore;

public class Sorter extends Thread
{
   private WordContainer wordContainer;
   private int top;
   private String[] elements;
   private boolean stopped;
   private Semaphore s;
   private Semaphore s2;

   public Sorter(WordContainer wordContainer, Semaphore s, Semaphore s2)
   {
      this.wordContainer = wordContainer;
      elements = new String[1];
      top = 0;
      stopped = false;
      this.s = s;
      this.s2 = s2;
   }

   public void run()
   {
      String nextWord = wordContainer.take();
      while (nextWord != null)
      {
         try
         {
            s.acquire();
         }
         catch (InterruptedException e)
         {
            e.printStackTrace();
         }

         nextWord = wordContainer.take();
         s2.release();
         add(nextWord);
      }
   }

   public void startSorting()
   {
      start();
   }

   public void stopSorting()
   {
      stopped = true;
   }

   public boolean member(String target)
   {
      if (top > 0)
      {
         return binarySearch(target, 0, top);
      }
      else
      {
         return false;
      }
   }

   private boolean binarySearch(String target, int from, int to)
   {

      if (from == to - 1)
      {
         return elements[from].equals(target);
      }

      int middle = (to - from) / 2 + from;

      if (elements[from].equals(target))
      {
         return true;
      }

      if (elements[middle].compareTo(target) > 0)
      {
         // search left
         return binarySearch(target, from, middle);
      }
      else
      {
         // search right
         return binarySearch(target, middle, to);
      }
   }

   public void add(String nextElement)
   {
      if (top < elements.length)
      {
         elements[top++] = nextElement;
         System.out.println("[" + top + "] " + nextElement);
         sort();
      }
      else
      {
         String[] newArray = new String[elements.length * 2];
         for (int i = 0; i < elements.length; i++)
         {
            newArray[i] = elements[i];
         }
         elements = newArray;
         add(nextElement);
      }
   }

   private void sort()
   {
      int index = 0;

      while (index < top - 1)
      {
         if (elements[index].compareTo(elements[index + 1]) < 0)
         {
            index++;
         }
         else
         {
            String temp = elements[index];
            elements[index] = elements[index + 1];
            elements[index + 1] = temp;
            if (index > 0)
            {
               index--;
            }
         }
      }
   }

   public int size()
   {
      return top;
   }

   public String getSortedWords()
   {
      String w = "";
      for (int i = 0; i < elements.length; i++)
      {
         w += elements[i] + ", ";
      }

      return w;
   }

   public int getNumberOfDistinctWords()
   {
      return top;
   }
}

阅读器类现在看起来像这样:

    package main2;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.concurrent.Semaphore;

public class Reader extends Thread
{
   private static final int whitespace = 45;
   private static final int word = 48;
   private static final int finished = -1;
   private WordContainer wordContainer;
   private Semaphore s;
   private Semaphore s2;
   private String[] wordsR;
   private int state;
   private BufferedReader reader;
   private int nextFreeIndex;

   public Reader(File words, WordContainer wordContainer, Semaphore s,
         Semaphore s2)
   {
      state = whitespace;
      try
      {
         reader = new BufferedReader(new FileReader(words));
      }
      catch (FileNotFoundException e)
      {
         e.printStackTrace();
      }
      nextFreeIndex = 0;
      wordsR = new String[1];
      this.wordContainer = wordContainer;
      this.s = s;
      this.s2 = s;
   }

   public void startReading()
   {
      start();
   }

   public void run()
   {
      String nextWord = readNext();
      while (nextWord != null)
      {
         nextWord = readNext();
         wordContainer.put(nextWord);

         s.release();
         try
         {
            s2.acquire();
         }
         catch (InterruptedException e)
         {
            e.printStackTrace();
         }
      }
   }

   public String readNext()
   {
      int next;
      StringBuffer nextWord = new StringBuffer();

      while (true)
      {
         try
         {
            next = reader.read();
         }
         catch (IOException e)
         {
            next = -1;
         }

         char nextChar = (char) next;

         switch (state)
         {
            case whitespace:
               if (isWhiteSpace(nextChar))
               {
                  state = whitespace;
               }
               else if (next == -1)
               {
                  state = finished;
               }
               else
               {
                  nextWord.append(nextChar);
                  state = word;
               }
               break;
            case word:
               if (isWhiteSpace(nextChar))
               {
                  state = whitespace;
                  return nextWord.toString();
               }
               else if (next == -1)
               {
                  state = finished;
                  return nextWord.toString();
               }
               else
               {
                  nextWord.append(nextChar);
                  state = word;
               }
               break;
            case finished:
               return null;
         }
      }
   }

   private boolean isWhiteSpace(char nextChar)
   {

      switch (nextChar)
      {
         case '-':
         case '"':
         case ':':
         case '\'':
         case ')':
         case '(':
         case '!':
         case ']':
         case '?':
         case '.':
         case ',':
         case ';':
         case '[':
         case ' ':
         case '\t':
         case '\n':
         case '\r':
            return true;
      }
      return false;
   }

   public void close()
   {
      try
      {
         reader.close();
      }
      catch (IOException e)
      {
      }
   }

   public String getWords()
   {
      return wordContainer.take();
   }
}

测试类

package test;

import java.io.File;
import java.io.IOException;
import java.util.concurrent.Semaphore;

import main2.Reader;
import main2.Sorter;
import main2.WordContainer;

import junit.framework.Assert;
import junit.framework.TestCase;

public class TestDistinctWordsWithThreads extends TestCase
{
   public void test() throws IOException, InterruptedException
   {
      File words = new File("resources" + File.separator + "AV1611Bible.txt");

      if (!words.exists())
      {
         System.out.println("File [" + words.getAbsolutePath()
               + "] does not exist");
         Assert.fail();
      }

      WordContainer container = new WordContainer();

      Semaphore s = new Semaphore(0);
      Semaphore s2 = new Semaphore(0);

      Reader reader = new Reader(words, container, s, s2);
      Sorter sorter = new Sorter(container, s, s2);

      reader.startReading();
      sorter.startSorting();

      reader.join();
      sorter.join();

      System.out.println(reader.getWords());
      Assert.assertTrue(sorter.getNumberOfDistinctWords() == 14720);

      /*
       * String bible = reader.getWords(); System.out.println(bible); String[]
       * bible2 = sorter.getSortedWords(); System.out.println(bible2);
       * assertTrue(bible2.length < bible.length());
       */
   }
}

解决方案

您为什么不尝试以下操作:

public int countWords(File file) {
    Scanner sc = new Scanner(file);
    Set<String> allWords = new HashSet<String>();
    while(sc.hasNext()) {
        allWords.add(sc.next());
    }
    return allWords.size();
}

The objective is to count distinct words from a file.

UPDATE: Previous Code was successfully finished. Now I have to do the same but using threads (Oh man, I hate them...) and in addition I want to make it with semaphores for better flow.

Code contains some extra stuff left out from previous attempts, I'm trying to figure out what can be used..

I can read one word at a time but mostly I get a "null" in the container. So until I get anything from the container all the time I can't test the Sorter class and so on...

The new addition to the program is WordContainer class to store one word to pass it from reader to sorter:

 package main2;

import java.util.ArrayList;

public class WordContainer
{
   private ArrayList<String> words;

   public synchronized String take()
   {
      String nextWord = null;
      while (words.isEmpty())
      {
         try
         {
            wait();
         }
         catch (InterruptedException e)
         {
         }
      }

      nextWord = words.remove(0);

      notify();
      return nextWord;
   }

   public synchronized void put(String word)
   {
      while (words.size() > 999)
      {
         try
         {
            wait();
         }
         catch (InterruptedException e)
         {
         }
      }
      words.add(word);
      notify();
   }
}

DataSet Class combined with Sorter method resulting in Sorter Class:

    package main2;

import java.util.concurrent.Semaphore;

public class Sorter extends Thread
{
   private WordContainer wordContainer;
   private int top;
   private String[] elements;
   private boolean stopped;
   private Semaphore s;
   private Semaphore s2;

   public Sorter(WordContainer wordContainer, Semaphore s, Semaphore s2)
   {
      this.wordContainer = wordContainer;
      elements = new String[1];
      top = 0;
      stopped = false;
      this.s = s;
      this.s2 = s2;
   }

   public void run()
   {
      String nextWord = wordContainer.take();
      while (nextWord != null)
      {
         try
         {
            s.acquire();
         }
         catch (InterruptedException e)
         {
            e.printStackTrace();
         }

         nextWord = wordContainer.take();
         s2.release();
         add(nextWord);
      }
   }

   public void startSorting()
   {
      start();
   }

   public void stopSorting()
   {
      stopped = true;
   }

   public boolean member(String target)
   {
      if (top > 0)
      {
         return binarySearch(target, 0, top);
      }
      else
      {
         return false;
      }
   }

   private boolean binarySearch(String target, int from, int to)
   {

      if (from == to - 1)
      {
         return elements[from].equals(target);
      }

      int middle = (to - from) / 2 + from;

      if (elements[from].equals(target))
      {
         return true;
      }

      if (elements[middle].compareTo(target) > 0)
      {
         // search left
         return binarySearch(target, from, middle);
      }
      else
      {
         // search right
         return binarySearch(target, middle, to);
      }
   }

   public void add(String nextElement)
   {
      if (top < elements.length)
      {
         elements[top++] = nextElement;
         System.out.println("[" + top + "] " + nextElement);
         sort();
      }
      else
      {
         String[] newArray = new String[elements.length * 2];
         for (int i = 0; i < elements.length; i++)
         {
            newArray[i] = elements[i];
         }
         elements = newArray;
         add(nextElement);
      }
   }

   private void sort()
   {
      int index = 0;

      while (index < top - 1)
      {
         if (elements[index].compareTo(elements[index + 1]) < 0)
         {
            index++;
         }
         else
         {
            String temp = elements[index];
            elements[index] = elements[index + 1];
            elements[index + 1] = temp;
            if (index > 0)
            {
               index--;
            }
         }
      }
   }

   public int size()
   {
      return top;
   }

   public String getSortedWords()
   {
      String w = "";
      for (int i = 0; i < elements.length; i++)
      {
         w += elements[i] + ", ";
      }

      return w;
   }

   public int getNumberOfDistinctWords()
   {
      return top;
   }
}

Reader Class now looks like this:

    package main2;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.concurrent.Semaphore;

public class Reader extends Thread
{
   private static final int whitespace = 45;
   private static final int word = 48;
   private static final int finished = -1;
   private WordContainer wordContainer;
   private Semaphore s;
   private Semaphore s2;
   private String[] wordsR;
   private int state;
   private BufferedReader reader;
   private int nextFreeIndex;

   public Reader(File words, WordContainer wordContainer, Semaphore s,
         Semaphore s2)
   {
      state = whitespace;
      try
      {
         reader = new BufferedReader(new FileReader(words));
      }
      catch (FileNotFoundException e)
      {
         e.printStackTrace();
      }
      nextFreeIndex = 0;
      wordsR = new String[1];
      this.wordContainer = wordContainer;
      this.s = s;
      this.s2 = s;
   }

   public void startReading()
   {
      start();
   }

   public void run()
   {
      String nextWord = readNext();
      while (nextWord != null)
      {
         nextWord = readNext();
         wordContainer.put(nextWord);

         s.release();
         try
         {
            s2.acquire();
         }
         catch (InterruptedException e)
         {
            e.printStackTrace();
         }
      }
   }

   public String readNext()
   {
      int next;
      StringBuffer nextWord = new StringBuffer();

      while (true)
      {
         try
         {
            next = reader.read();
         }
         catch (IOException e)
         {
            next = -1;
         }

         char nextChar = (char) next;

         switch (state)
         {
            case whitespace:
               if (isWhiteSpace(nextChar))
               {
                  state = whitespace;
               }
               else if (next == -1)
               {
                  state = finished;
               }
               else
               {
                  nextWord.append(nextChar);
                  state = word;
               }
               break;
            case word:
               if (isWhiteSpace(nextChar))
               {
                  state = whitespace;
                  return nextWord.toString();
               }
               else if (next == -1)
               {
                  state = finished;
                  return nextWord.toString();
               }
               else
               {
                  nextWord.append(nextChar);
                  state = word;
               }
               break;
            case finished:
               return null;
         }
      }
   }

   private boolean isWhiteSpace(char nextChar)
   {

      switch (nextChar)
      {
         case '-':
         case '"':
         case ':':
         case '\'':
         case ')':
         case '(':
         case '!':
         case ']':
         case '?':
         case '.':
         case ',':
         case ';':
         case '[':
         case ' ':
         case '\t':
         case '\n':
         case '\r':
            return true;
      }
      return false;
   }

   public void close()
   {
      try
      {
         reader.close();
      }
      catch (IOException e)
      {
      }
   }

   public String getWords()
   {
      return wordContainer.take();
   }
}

Test Class

package test;

import java.io.File;
import java.io.IOException;
import java.util.concurrent.Semaphore;

import main2.Reader;
import main2.Sorter;
import main2.WordContainer;

import junit.framework.Assert;
import junit.framework.TestCase;

public class TestDistinctWordsWithThreads extends TestCase
{
   public void test() throws IOException, InterruptedException
   {
      File words = new File("resources" + File.separator + "AV1611Bible.txt");

      if (!words.exists())
      {
         System.out.println("File [" + words.getAbsolutePath()
               + "] does not exist");
         Assert.fail();
      }

      WordContainer container = new WordContainer();

      Semaphore s = new Semaphore(0);
      Semaphore s2 = new Semaphore(0);

      Reader reader = new Reader(words, container, s, s2);
      Sorter sorter = new Sorter(container, s, s2);

      reader.startReading();
      sorter.startSorting();

      reader.join();
      sorter.join();

      System.out.println(reader.getWords());
      Assert.assertTrue(sorter.getNumberOfDistinctWords() == 14720);

      /*
       * String bible = reader.getWords(); System.out.println(bible); String[]
       * bible2 = sorter.getSortedWords(); System.out.println(bible2);
       * assertTrue(bible2.length < bible.length());
       */
   }
}

解决方案

Why don't you sinply try something like:

public int countWords(File file) {
    Scanner sc = new Scanner(file);
    Set<String> allWords = new HashSet<String>();
    while(sc.hasNext()) {
        allWords.add(sc.next());
    }
    return allWords.size();
}

这篇关于用线程计数不同的单词的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆