Added a check and a function to handle UTF-8 article strings
This commit is contained in:
parent
72dae5c75b
commit
7a124e16ca
9
HISTORY
9
HISTORY
@ -1,6 +1,11 @@
|
||||
0.1 - 2014-07-28
|
||||
First release of wie
|
||||
0.3 - 2014-08-04
|
||||
Added a check to see whatever the article string is in
|
||||
UTF-8 or not. If it is in UTF-8, use a specific function
|
||||
to handle it. Regular 'ucwords' can't handle UTF-8.
|
||||
|
||||
0.2 - 2014-07-31
|
||||
Changed 'shell_exec(curl -s $URL)' to PHP5 cURL function
|
||||
Added HISTORY file
|
||||
|
||||
0.1 - 2014-07-28
|
||||
First release of wie
|
||||
|
38
wie.php
38
wie.php
@ -20,14 +20,9 @@
|
||||
|
||||
$defaultLang = "en"; // default language
|
||||
$progName = $argv[0];
|
||||
function usage()
|
||||
{
|
||||
print "Wikipedia ingress extractor (wie), version 0.2\n";
|
||||
print "Usage: $GLOBALS[progName] [--lang=sv] article\n";
|
||||
print "Default language if none specified is $GLOBALS[defaultLang].\n";
|
||||
print "Remember to quote the article if there's more than one word,\n";
|
||||
print "for example Roger Bacon as 'Roger Bacon'.\n";
|
||||
}
|
||||
|
||||
|
||||
|
||||
// check if no argument was specified
|
||||
if (!isset($argv[1]))
|
||||
{
|
||||
@ -55,7 +50,16 @@ else
|
||||
$article = $argv[1];
|
||||
}
|
||||
|
||||
$article = ucwords($article); // uppercase article
|
||||
// check if article is UTF-8 encoded, in which case regular ucwords won't work
|
||||
if (mb_check_encoding($article, 'UTF-8'))
|
||||
{
|
||||
$article = utf8_ucwords($article);
|
||||
}
|
||||
else
|
||||
{
|
||||
$article = ucwords($article); // uppercase article
|
||||
}
|
||||
|
||||
$article = preg_replace("/\s/", "_" ,$article); // make spaces to underscore
|
||||
$url = "http://$lang.wikipedia.org/wiki/$article";
|
||||
|
||||
@ -78,4 +82,20 @@ if (!isset($match[1]))
|
||||
$string = strip_tags($match[1]);
|
||||
print (wordwrap($string, 65, "\n") . "\n");
|
||||
|
||||
// misc functions
|
||||
function usage()
|
||||
{
|
||||
print "Wikipedia ingress extractor (wie), version 0.2\n";
|
||||
print "Usage: $GLOBALS[progName] [--lang=sv] article\n";
|
||||
print "Default language if none specified is $GLOBALS[defaultLang].\n";
|
||||
print "Remember to quote the article if there's more than one word,\n";
|
||||
print "for example Roger Bacon as 'Roger Bacon'.\n";
|
||||
}
|
||||
|
||||
function utf8_ucwords($str)
|
||||
{
|
||||
$str = mb_convert_case($str, MB_CASE_TITLE, "UTF-8");
|
||||
return $str;
|
||||
}
|
||||
|
||||
?>
|
||||
|
Loading…
x
Reference in New Issue
Block a user