3 Commits
v0.1 ... v0.3

Author SHA1 Message Date
1a443709b3 Added comment to clarify 2014-08-04 14:10:02 +02:00
7a124e16ca Added a check and a function to handle UTF-8 article strings 2014-08-04 14:05:03 +02:00
72dae5c75b Rewrote shell_exec(curl...) to PHP5 cURL function
* Added HISTORY file
* Bumped to version 0.2
* Updated README with PHP5 cURL module
2014-07-31 13:37:07 +02:00
3 changed files with 51 additions and 10 deletions

11
HISTORY Normal file
View File

@@ -0,0 +1,11 @@
0.3 - 2014-08-04
Added a check to see whatever the article string is in
UTF-8 or not. If it is in UTF-8, use a specific function
to handle it. Regular 'ucwords' can't handle UTF-8.
0.2 - 2014-07-31
Changed 'shell_exec(curl -s $URL)' to PHP5 cURL function
Added HISTORY file
0.1 - 2014-07-28
First release of wie

View File

@@ -13,6 +13,9 @@ Or run it with 'php wie.php'. To display the help, type:
./wie.php --help
## Requirements ##
The script requires PHP5 and the PHP5 cURL module (php5-curl on Debian systems).
## Thanks ##
Many thanks goes to flinga who came up with the idea for this script, please see
the THANKS file for more information.

47
wie.php
View File

@@ -20,14 +20,9 @@
$defaultLang = "en"; // default language
$progName = $argv[0];
function usage()
{
print "Wikipedia ingress extractor (wie), version 0.1\n";
print "Usage: $GLOBALS[progName] [--lang=sv] article\n";
print "Default language if none specified is $GLOBALS[defaultLang].\n";
print "Remember to quote the article if there's more than one word,\n";
print "for example Roger Bacon as 'Roger Bacon'.\n";
}
// check if no argument was specified
if (!isset($argv[1]))
{
@@ -55,10 +50,26 @@ else
$article = $argv[1];
}
$article = ucwords($article); // uppercase article
// check if article is UTF-8 encoded, in which case regular ucwords won't work
if (mb_check_encoding($article, 'UTF-8'))
{
$article = utf8_ucwords($article); // uppercase article in UTF-8
}
else
{
$article = ucwords($article); // uppercase article
}
$article = preg_replace("/\s/", "_" ,$article); // make spaces to underscore
$url = "http://$lang.wikipedia.org/wiki/$article";
$data = shell_exec("curl -s $url"); // retrive the page
// get the wiki page
$ch = curl_init("$url");
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$data = curl_exec($ch);
curl_close($ch);
preg_match("/\<p\>(.*)\<\/p\>/", $data, $match); // fetch text inside first <p>
// check is we had a match
@@ -71,4 +82,20 @@ if (!isset($match[1]))
$string = strip_tags($match[1]);
print (wordwrap($string, 65, "\n") . "\n");
// misc functions
function usage()
{
print "Wikipedia ingress extractor (wie), version 0.2\n";
print "Usage: $GLOBALS[progName] [--lang=sv] article\n";
print "Default language if none specified is $GLOBALS[defaultLang].\n";
print "Remember to quote the article if there's more than one word,\n";
print "for example Roger Bacon as 'Roger Bacon'.\n";
}
function utf8_ucwords($str)
{
$str = mb_convert_case($str, MB_CASE_TITLE, "UTF-8");
return $str;
}
?>