Я конвертирую файл CSV, который имеет только один столбец в массив PHP. В столбце csv есть некоторые неанглийские символы, такие как арабские символы, но PHP правильно читает английские символы, но арабские символы заменяются на некоторые числа. Это образец моего кода
<?php
function str_to_csv( $row )
{
if( $row=='' )
{
return array();
}
$a = array();
$src = explode(',', $row );
do{
$p = array_shift($src);
while( mb_substr_count($p,'"') % 2 != 0 )
{
if( count($src)==0 ){ return false; }
$p .= ','.array_shift($src);
}
$match = null;
if( preg_match('/^"(.+)"[
]*$/', $p, $match ))
{
$p = $match[1];
}
$a[] = str_replace('""','"',$p);
}while( count($src) > 0 );
return $a;
}function file_getcsv( $f )
{
$line = fgets( $f );
while( ($a = str_to_csv($line))===false )
{
if( feof($f) ){ return false; }
$line .= "\n".fgets( $f );
}
return $a;
}function file_to_csv( $filename )
{
ini_set("auto_detect_line_endings", true);
$a = array();
$f = fopen($filename,'r');
while( !feof($f) )
{
$rec = file_getcsv($f);
if( $rec===false ){ return false; }
if( !empty($rec) )
{
$a[] = $rec;
}
}
fclose($f);
return $a;
}
$data = file_to_csv('club3.csv');
echo '<pre>';print_r($data);
?>
Это мой образец Excel, и он имеет только один столбец
....
Royal Kings
Mere Cats
Spin Doctors
رأس العين
....
когда я делаю var_dump, чем массив такой
...
Royal Kings
)
[32935] => Array
(
[0] =>
Mere Cats
)
[32936] => Array
(
[0] =>
Spin Doctors
)
[32937] => Array
(
[0] =>
1#3 'D9JF
)
...
Существует большая проблема с арабским языком
Проблема заключается не только в чтении букв, но и в арабском языке есть несколько форм букв, слова в начале и в середине и в конце
Пример:
ب ، ـبـ ، ـب ، ـب
чтобы исправить, что вы можете использовать Unicode
используйте эту функцию, чтобы решить все проблемы
примечание: эта функция создана: Abd AL-Latif
смотрите на github -> https://goo.gl/m8pkGx
функция :
function fixArabicCharactersAndCreateProbablyString($word)
{
$new_word = array();
$char_type = array();
$isolated_chars = array('ا', 'د', 'ذ', 'أ', 'آ', 'ر', 'ؤ', 'ء', 'ز', 'و', 'ى', 'ة', 'إ');
$alef = array('أ','ا','إ','آ');
$lam = array('ل');
$al_char = array();
$all_chars = array
(
'ا' => array(
'la_beg' => 'ﻻ',
'la_end' => 'ﻼ',
'middle' => 'ﺎ',
'isolated' => 'ﺍ'
),
'إ' => array(
'la_beg' => 'ﻹ',
'la_end' => 'ﻺ',
'middle' => 'ﺈ',
'isolated' => 'ﺇ'
),
'ؤ' => array(
'middle' => 'ﺅ',
'isolated' => 'ﺆ'
),
'ء' => array(
'middle' => 'ﺀ',
'isolated' => 'ﺀ'
),
'أ' => array(
'la_beg' => 'ﻷ',
'la_end' => 'ﻸ',
'middle' => 'ﺄ',
'isolated' => 'ﺃ'
),
'آ' => array(
'la_beg' => 'ﻵ',
'la_end' => 'ﻶ',
'middle' => 'ﺂ',
'isolated' => 'ﺁ'
),
'ى' => array(
'middle' => 'ﻰ',
'isolated' => 'ﻯ'
),
'ب' => array(
'beginning' => 'ﺑ',
'middle' => 'ﺒ',
'end' => 'ﺐ',
'isolated' => 'ﺏ'
),
'ت' => array(
'beginning' => 'ﺗ',
'middle' => 'ﺘ',
'end' => 'ﺖ',
'isolated' => 'ﺕ'
),
'ث' => array(
'beginning' => 'ﺛ',
'middle' => 'ﺜ',
'end' => 'ﺚ',
'isolated' => 'ﺙ'
),
'ج' => array(
'beginning' => 'ﺟ',
'middle' => 'ﺠ',
'end' => 'ﺞ',
'isolated' => 'ﺝ'
),
'ح' => array(
'beginning' => 'ﺣ',
'middle' => 'ﺤ',
'end' => 'ﺢ',
'isolated' => 'ﺡ'
),
'خ' => array(
'beginning' => 'ﺧ',
'middle' => 'ﺨ',
'end' => 'ﺦ',
'isolated' => 'ﺥ'
),
'د' => array(
'middle' => 'ﺪ',
'isolated' => 'ﺩ'
),
'ذ' => array(
'middle' => 'ﺬ',
'isolated' => 'ﺫ'
),
'ر' => array(
'middle' => 'ﺮ',
'isolated' => 'ﺭ'
),
'ز' => array(
'middle' => 'ﺰ',
'isolated' => 'ﺯ'
),
'س' => array(
'beginning' => 'ﺳ',
'middle' => 'ﺴ',
'end' => 'ﺲ',
'isolated' => 'ﺱ'
),
'ش' => array(
'beginning' => 'ﺷ',
'middle' => 'ﺸ',
'end' => 'ﺶ',
'isolated' => 'ﺵ'
),
'ص' => array(
'beginning' => 'ﺻ',
'middle' => 'ﺼ',
'end' => 'ﺺ',
'isolated' => 'ﺹ'
),
'ض' => array(
'beginning' => 'ﺿ',
'middle' => 'ﻀ',
'end' => 'ﺾ',
'isolated' => 'ﺽ'
),
'ط' => array(
'beginning' => 'ﻃ',
'middle' => 'ﻄ',
'end' => 'ﻂ',
'isolated' => 'ﻁ'
),
'ظ' => array(
'beginning' => 'ﻇ',
'middle' => 'ﻈ',
'end' => 'ﻆ',
'isolated' => 'ﻅ'
),
'ع' => array(
'beginning' => 'ﻋ',
'middle' => 'ﻌ',
'end' => 'ﻊ',
'isolated' => 'ﻉ'
),
'غ' => array(
'beginning' => 'ﻏ',
'middle' => 'ﻐ',
'end' => 'ﻎ',
'isolated' => 'ﻍ'
),
'ف' => array(
'beginning' => 'ﻓ',
'middle' => 'ﻔ',
'end' => 'ﻒ',
'isolated' => 'ﻑ'
),
'ق' => array(
'beginning' => 'ﻗ',
'middle' => 'ﻘ',
'end' => 'ﻖ',
'isolated' => 'ﻕ'
),
'ك' => array(
'beginning' => 'ﻛ',
'middle' => 'ﻜ',
'end' => 'ﻚ',
'isolated' => 'ﻙ'
),
'ل' => array(
'beginning' => 'ﻟ',
'middle' => 'ﻠ',
'end' => 'ﻞ',
'isolated' => 'ﻝ'
),
'م' => array(
'beginning' => 'ﻣ',
'middle' => 'ﻤ',
'end' => 'ﻢ',
'isolated' => 'ﻡ'
),
'ن' => array(
'beginning' => 'ﻧ',
'middle' => 'ﻨ',
'end' => 'ﻦ',
'isolated' => 'ﻥ'
),
'ه' => array(
'beginning' => 'ﻫ',
'middle' => 'ﻬ',
'end' => 'ﻪ',
'isolated' => 'ﻩ'
),
'و' => array(
'middle' => 'ﻮ',
'isolated' => 'ﻭ'
),
'ي' => array(
'beginning' => 'ﻳ',
'middle' => 'ﻴ',
'end' => 'ﻲ',
'isolated' => 'ﻱ'
),
'ئ' => array(
'beginning' => 'ﺋ',
'middle' => 'ﺌ',
'end' => 'ﺊ',
'isolated' => 'ﺉ'
),
'ة' => array(
'middle' => 'ﺔ',
'isolated' => 'ﺓ'
)
);
if(in_array($word[0].$word[1], $isolated_chars))
{
$new_word[] = $all_chars[$word[0].$word[1]]['isolated'];
$char_type[] = 'not_normal';
$al_char[] = false;
}
else
{
if(in_array($word[0].$word[1], $lam) AND in_array($word[2].$word[3], $alef))
{
$new_word[] = $all_chars [$word[2].$word[3]]['la_beg'];
$char_type[] = 'not_normal';
$al_char[] = true;
}
else
{
$new_word[] = $all_chars[$word[0].$word[1]]['beginning'];
$char_type[] = 'normal';
$al_char[] = false;
}
}
if(strlen($word) > 4)
{
if($char_type[0] == 'not_normal')
{
if(in_array($word[2].$word[3], $isolated_chars))
{
if($al_char[count($al_char)-1] == false)
{
$new_word[] = $all_chars[$word[2].$word[3]]['isolated'];
$char_type[] = 'not_normal';
}
$al_char[] = false;
}
else
{
if(in_array($word[2].$word[3], $lam) AND in_array($word[4].$word[5], $alef))
{
$new_word[] = $all_chars[$word[4].$word[5]]['la_beg'];
$char_type[] = 'not_normal';
$al_char[] = true;
}
else
{
$new_word[] = $all_chars[$word[2].$word[3]]['beginning'];
$char_type[] = 'normal';
$al_char[] = false;
}
}
}
else
{
if(in_array($word[2].$word[3], $lam) AND in_array($word[4].$word[5], $alef))
{
$new_word[] = $all_chars[$word[4].$word[5]]['la_end'];
$char_type[] = 'not_normal';
$al_char[] = true;
}
else
{
$new_word[] = $all_chars[$word[2].$word[3]]['middle'];
if(in_array($word[2].$word[3], $isolated_chars))
{
$char_type[] = 'not_normal';
$al_char[] = false;
}
else
{
$char_type[] = 'normal';
$al_char[] = false;
}
}
}
$x = 4;
}
else
{
$x = 2;
}
for($x=4;$x< (strlen($word)-4) ;$x++)
{
if($char_type[count($char_type)-1] == 'not_normal' AND $x %2 == 0)
{
if(in_array($word[$x].$word[$x+1], $isolated_chars))
{
if($al_char[count($al_char)-1] == false)
{
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['isolated'];
$char_type[] = 'not_normal';
}
$al_char[] = false;
}
elseif(in_array($word[$x].$word[$x+1], $lam) AND in_array($word[$x+2].$word[$x+3], $alef))
{
$new_word[] = $all_chars[$word[$x+2].$word[$x+3]]['la_beg'];
$char_type[] = 'not_normal';
$al_char[] = true;
}
else
{
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['beginning'];
$char_type[] = 'normal';
$al_char[] = false;
}
}
elseif($char_type[count($char_type)-1] == 'normal' AND $x %2 == 0)
{
if(in_array($word[$x].$word[$x+1], $isolated_chars))
{
if($al_char[count($al_char)-1] == false)
{
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['middle'];
$char_type[] = 'not_normal';
}
$al_char[] = false;
}
elseif(in_array($word[$x].$word[$x+1], $lam) AND in_array($word[$x+2].$word[$x+3], $alef))
{
$new_word[] = $all_chars[$word[$x+2].$word[$x+3]]['la_end'];
$char_type[] = 'not_normal';
$al_char[] = true;
}
else
{
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['middle'];
$char_type[] = 'normal';
$al_char[] = false;
}
}
}
if(strlen($word)>6)
{
if($char_type[count($char_type)-1] == 'not_normal')
{
if(in_array($word[$x].$word[$x+1], $isolated_chars))
{
if($al_char[count($al_char)-1] == false)
{
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['isolated'];
$char_type[] = 'not_normal';
}
$al_char[] = false;
}
else
{
if($word[strlen($word)-2].$word[strlen($word)-1] == 'ء')
{
if($al_char[count($al_char)-1] == true)
{
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['isolated'];
$char_type[] = 'normal';
}
$al_char[] = false;
}
elseif(in_array($word[$x].$word[$x+1], $lam) AND in_array($word[$x+2].$word[$x+3], $alef))
{
$new_word[] = $all_chars[$word[$x+2].$word[$x+3]]['la_end'];
$char_type[] = 'not_normal';
$al_char[] = true;
}
else
{
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['beginning'];
$char_type[] = 'normal';
$al_char[] = false;
}
}
$x += 2;
}
elseif($char_type[count($char_type)-1] == 'normal' AND $al_char[count($al_char)-1] == false)
{
if(in_array($word[$x].$word[$x+1], $isolated_chars))
{
if($al_char[count($al_char)-1] == false)
{
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['middle'];
$char_type[] = 'not_normal';
}
$al_char[] = false;
}
elseif(in_array($word[$x].$word[$x+1], $lam) AND in_array($word[$x+2].$word[$x+3], $alef))
{
$new_word[] = $all_chars[$word[$x+2].$word[$x+3]]['la_end'];
$char_type[] = 'not_normal';
$al_char[] = true;
}
else
{
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['middle'];
$char_type[] = 'normal';
$al_char[] = false;
}
$x += 2;
}}
if($char_type[count($char_type)-1] == 'not_normal')
{
if(in_array($word[$x].$word[$x+1], $isolated_chars))
{
if($al_char[count($al_char)-1] == false)
{
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['isolated'];
}
}
else
{
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['isolated'];
}
}
else
{
if(in_array($word[$x].$word[$x+1], $isolated_chars))
{
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['middle'];
}
else
{
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['end'];
}
}
return implode('',array_reverse($new_word));
}
использовать функцию:
$word = 'لا اله الا الله محمد رسول الله , اللهم لا علم لي الا ما علمتني انك انت العليم الحكيم';
fixArabicCharactersAndCreateProbablyString($word)
Других решений пока нет …