(Search Web) Modify Listing 1 WebCrawler.java to search for the word Computer Programming starting from the URL http://cs.armstrong.edu/liang. Your program terminates once the word is found. Display the URL for the page that contains the word.
Listing 1 WebCrawler.java
1 import java.util.Scanner;
2 import java.util.ArrayList;
3
4 public class WebCrawler {
5 public static void main(String[] args) {
6 java.util.Scanner input = new java.util.Scanner(System.in);
7 System.out.print("Enter a URL: ");
8 String url = input.nextLine();
9 crawler(url); // Traverse the Web from the a starting url
10 }
11
12 public static void crawler(String startingURL) {
13 ArrayList
listOfPendingURLs = new ArrayList<>(); 14 ArrayList
listOfTraversedURLs = new ArrayList<>(); 15
16 listOfPendingURLs.add(startingURL);
17 while (!listOfPendingURLs.isEmpty() &&
18 listOfTraversedURLs.size() <= 100) {
19 String urlString = listOfPendingURLs.remove(0);
20 if (!listOfTraversedURLs.contains(urlString)) {
21 listOfTraversedURLs.add(urlString);
22 System.out.println("Crawl " + urlString);
23
24 for (String s: getSubURLs(urlString)) {
25 if (!listOfTraversedURLs.contains(s))
26 listOfPendingURLs.add(s);
27 }
28 }
29 }
30 }
31
32 public static ArrayList
getSubURLs(String urlString) { 33 ArrayList
list = new ArrayList<>(); 34
35 try {
36 java.net.URL url = new java.net.URL(urlString);
37 Scanner input = new Scanner(url.openStream());
38 int current = 0;
39 while (input.hasNext()) {
40 String line = input.nextLine();
41 current = line.indexOf("http:", current);
42 while (current > 0) {
43 int endIndex = line.indexOf("\"", current);
44 if (endIndex > 0) { // Ensure that a correct URL is found
45 list.add(line.substring(current, endIndex));
46 current = line.indexOf("http:", endIndex);
47 }
48 else
49 current = -1;
50 }
51 }
52 }
53 catch (Exception ex) {
54 System.out.println("Error: " + ex.getMessage());
55 }
56
57 return list;
58 }
59 }
We need at least 10 more requests to produce the solution.
0 / 10 have requested this problem solution
The more requests, the faster the answer.