Added comment to clarify

Added a check and a function to handle UTF-8 article strings
Rewrote shell_exec(curl...) to PHP5 cURL function
2014-08-04 14:10:02 +02:00 · 2014-08-04 14:05:03 +02:00 · 2014-07-31 13:37:07 +02:00
3 changed files with 51 additions and 10 deletions
--- a/11
+++ b/11
@@ -0,0 +1,11 @@
+0.3 - 2014-08-04
+    Added a check to see whatever the article string is in
+    UTF-8 or not. If it is in UTF-8, use a specific function
+    to handle it. Regular 'ucwords' can't handle UTF-8.
+
+0.2 - 2014-07-31
+    Changed 'shell_exec(curl -s $URL)' to PHP5 cURL function
+    Added HISTORY file
+
+0.1 - 2014-07-28
+    First release of wie
--- a/README.md
+++ b/README.md
@@ -13,6 +13,9 @@ Or run it with 'php wie.php'. To display the help, type:

    ./wie.php --help

+## Requirements ##
+The script requires PHP5 and the PHP5 cURL module (php5-curl on Debian systems).
+
 ## Thanks ##
 Many thanks goes to flinga who came up with the idea for this script, please see
 the THANKS file for more information.
--- a/wie.php
+++ b/wie.php
@@ -20,14 +20,9 @@

 $defaultLang = "en"; // default language
 $progName = $argv[0];
-function usage()
-{
-    print "Wikipedia ingress extractor (wie), version 0.1\n";
-    print "Usage: $GLOBALS[progName] [--lang=sv] article\n";
-    print "Default language if none specified is $GLOBALS[defaultLang].\n";
-    print "Remember to quote the article if there's more than one word,\n";
-    print "for example Roger Bacon as 'Roger Bacon'.\n";
-}
+
+
+
 // check if no argument was specified
 if (!isset($argv[1]))
 {
@@ -55,10 +50,26 @@ else
    $article = $argv[1];
 }

-$article = ucwords($article); // uppercase article
+// check if article is UTF-8 encoded, in which case regular ucwords won't work
+if (mb_check_encoding($article, 'UTF-8'))
+{
+    $article = utf8_ucwords($article); // uppercase article in UTF-8
+}
+else
+{
+    $article = ucwords($article); // uppercase article
+}
+
 $article = preg_replace("/\s/", "_" ,$article); // make spaces to underscore
 $url = "http://$lang.wikipedia.org/wiki/$article"; 
-$data = shell_exec("curl -s $url"); // retrive the page
+
+// get the wiki page
+$ch = curl_init("$url");
+curl_setopt($ch, CURLOPT_HEADER, 0);
+curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
+$data = curl_exec($ch);
+curl_close($ch);
+
 preg_match("/\<p\>(.*)\<\/p\>/", $data, $match); // fetch text inside first <p>

 // check is we had a match
@@ -71,4 +82,20 @@ if (!isset($match[1]))
 $string = strip_tags($match[1]); 
 print (wordwrap($string, 65, "\n") . "\n"); 

+// misc functions
+function usage()
+{
+    print "Wikipedia ingress extractor (wie), version 0.2\n";
+    print "Usage: $GLOBALS[progName] [--lang=sv] article\n";
+    print "Default language if none specified is $GLOBALS[defaultLang].\n";
+    print "Remember to quote the article if there's more than one word,\n";
+    print "for example Roger Bacon as 'Roger Bacon'.\n";
+}
+
+function utf8_ucwords($str)
+{
+    $str = mb_convert_case($str, MB_CASE_TITLE, "UTF-8");
+    return $str;    
+}
+
 ?>
Author	SHA1	Message	Date
Jack-Benny Persson	1a443709b3	Added comment to clarify	2014-08-04 14:10:02 +02:00
Jack-Benny Persson	7a124e16ca	Added a check and a function to handle UTF-8 article strings	2014-08-04 14:05:03 +02:00
Jack-Benny Persson	72dae5c75b	Rewrote shell_exec(curl...) to PHP5 cURL function * Added HISTORY file * Bumped to version 0.2 * Updated README with PHP5 cURL module	2014-07-31 13:37:07 +02:00