C语言 strtok() 函数的实现
声明:本页面是StackOverFlow热门问题的中英对照翻译,遵循CC BY-SA 4.0协议,如果您需要使用它,必须同样遵循CC BY-SA许可,注明原文地址和作者信息,同时你必须将它归于原作者(不是我):StackOverFlow
原文地址: http://stackoverflow.com/questions/28931379/
Warning: these are provided under cc-by-sa 4.0 license. You are free to use/share it, But you must attribute it to the original authors (not me):
StackOverFlow
Implementation of strtok() function
提问by luminousmen
I need to write my function strtok. Below is my code. The problem is - I can't display result string. In code I use strcpy()and then display new array.
Is it possible to display string using just pointer *str?
我需要编写我的函数strtok。下面是我的代码。问题是 - 我无法显示结果字符串。在我使用的代码中strcpy(),然后显示新数组。是否可以仅使用指针显示字符串*str?
#include <stdio.h>
#include <string.h>
char* my_strtok(char* s, char* delm){
char W[100];
int i = 0, k = 0, j = 0;
char *ptr;
static char *Iterator;
ptr = s;
if (s == NULL){
s = Iterator;
}
while (s[i] != '#include <string.h>
static char *olds;
#undef strtok
#ifndef STRTOK
# define STRTOK strtok
#endif
/* Parse S into tokens separated by characters in DELIM.
If S is NULL, the last string strtok() was called with is
used. For example:
char s[] = "-abc-=-def";
x = strtok(s, "-"); // x = "abc"
x = strtok(NULL, "-="); // x = "def"
x = strtok(NULL, "="); // x = NULL
// s = "abc#include <string.h>
#undef strspn
#ifndef STRSPN
#define STRSPN strspn
#endif
/* Return the length of the maximum initial segment
of S which contains only characters in ACCEPT. */
size_t
STRSPN (const char *s, const char *accept)
{
const char *p;
const char *a;
size_t count = 0;
for (p = s; *p != '#include <string.h>
#undef strpbrk
#ifndef STRPBRK
#define STRPBRK strpbrk
#endif
/* Find the first occurrence in S of any character in ACCEPT. */
char *
STRPBRK (const char *s, const char *accept)
{
while (*s != '#include <sysdep.h>
.text
ENTRY (__rawmemchr)
movd %rsi, %xmm1
mov %rdi, %rcx
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
and , %rcx
pshufd #include <stdio.h>
#include <string.h>
#include <malloc.h>
char* my_strtok(char* s, char* delm)
{
static int currIndex = 0;
if(!s || !delm || s[currIndex] == '#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define DICT_LEN 256
int *create_delim_dict(char *delim)
{
int *d = (int*)malloc(sizeof(int)*DICT_LEN);
memset((void*)d, 0, sizeof(int)*DICT_LEN);
int i;
for(i=0; i< strlen(delim); i++) {
d[delim[i]] = 1;
}
return d;
}
char *my_strtok(char *str, char *delim)
{
static char *last, *to_free;
int *deli_dict = create_delim_dict(delim);
if(!deli_dict) {
return NULL;
}
if(str) {
last = (char*)malloc(strlen(str)+1);
if(!last) {
free(deli_dict);
}
to_free = last;
strcpy(last, str);
}
while(deli_dict[*last] && *last != 'char * strtok2(char *str, const char *delim)
{
static char *nxt; /* static variable used to advance the string to replace delimiters */
static int size; /* static variable used to count until the end of the string */
/* IMPORTANT: any advance to 'nxt' must be followed by a diminution of 'size', and vice verce */
int i; /* counter of delimiter(s) in string */
/* initialize the string when strtok2 is first calles and supplied with a valid string */
if(str != NULL)
{
nxt = str;
size = strlen(str);
}
/* if we havn't reached the end of the string, any other call to strtok2 with NULL will come here */
else if(size > 0)
{
nxt++; /* last run left nxt on a null terminator, so we have to advance it */
size--; /* any advancement must follow diminution of size */
str = nxt; /* string now points to the first char after the last delimiter */
}
/* if we reached the end of string, return NULL pointer */
else
{
str = NULL;
}
/* nxt is used to advance the string until a delimiter or a series of delimiters are encountered;
* it then stops on the last delimiter which has turned to NULL terminator
*/
while(*nxt)
{
i = strspn(nxt, delim);
while(i > 1) /* turns delimiters to NULL terminator (except the last one) */
{
*nxt = '#include<stdio.h>
#include<string.h>
#include<stdlib.h>
char *ret_nondelim(char *str1,char *str2);
char *my_strtok(char *str1,char *str2);
char *ret_noofbytes(char *str1,char *str2);
int main()
{
char str1[20];
char str2[20];
char *res;
printf("enter string one\n");
scanf("%s",str1);
printf("enter string two\n");
scanf("%s",str2);
res=my_strtok(str1,str2);
if(res)
printf("%s\n",res);
else
printf("returned NULL\n");
free(res);
res=my_strtok(NULL,str2);
if(res)
printf("%s\n",res);
else
printf("returned NULL\n");
free(res);
res=my_strtok(NULL,str2);
if(res)
printf("%s\n",res);
else
printf("returned NULL\n");
free(res);
}
char *my_strtok(char *str1,char *str2)
{
static char *str1_cpy;
static int i;
char *ptr;
int flag=0;
if(str1!=NULL)
{
str1_cpy=ret_nondelim(str1,str2); //get location of non delimiting byte
str1=ret_noofbytes(str1_cpy,str2); //scan and get location of delimitng byte
if((str1-str1_cpy)) //no of bytes = non delimiting location - delimting location
{
ptr=malloc(str1-str1_cpy+1); //malloc location and and store the string`enter code here`
int k;
for(k=0;k<(str1-str1_cpy);k++)
ptr[k]=str1_cpy[k];
ptr[k]='Here is the code which explains how strtok works in C.
#include <stdio.h>
#include <string.h>
#define SIZE_OF_STRING 50
#define SIZE_OF_DELIMITER 5
/* Make the temp_ptr as static, so it will hold the previous pointing address */
static char *temp_ptr = NULL;
char *string_token(char *str, char *delimiter)
{
char *final_ptr = NULL; `enter code here`
/* Flag has been defined as static to avoid the parent function loop
* runs infinitely after reaching end of string.
*/
static int flag = 0;
int i, j;
if (delimiter == NULL) {
return NULL;
}
/* If flag is 1, it will denote the end of the string */
if (flag == 1) {
return NULL;
}
/* The below condition is to avoid temp_ptr is getting assigned
* with NULL string from the parent function main. Without
* the below condition, NULL will be assigned to temp_ptr
* and final_ptr, so accessing these pointers will lead to
* segmentation fault.
*/
if (str != NULL) {
temp_ptr = str;
}
/* Before function call ends, temp_ptr should move to the next char,
* so we can't return the temp_ptr. Hence, we introduced a new pointer
* final_ptr which points to temp_ptr.
*/
final_ptr = temp_ptr;
printf("%s %d str: %s delimiter: %s length: %ld temp_ptr: %s strlen: %ld"
" final_ptr: %s \n",__func__, __LINE__, str, delimiter,
strlen(delimiter), temp_ptr, strlen(temp_ptr), final_ptr);
for (i = 0; i <= strlen(temp_ptr); i++)
{
for (j = 0; j < strlen(delimiter); j++) {
if (temp_ptr[i] == '##代码##') {
/* If the flag is not set, both the temp_ptr and flag_ptr
* will be holding string "Jasmine" which will make parent
* to call this function string_token infinitely.
*/
flag = 1;
return final_ptr;
}
if ((temp_ptr[i] == delimiter[j])) {
/* NULL character is assigned, so that final_ptr will return
* til NULL character. Here, final_ptr points to temp_ptr.
*/
temp_ptr[i] = '##代码##';
temp_ptr += i+1;
return final_ptr;
}
}
}
/* You will somehow end up here if for loop condition fails.
* If this function doesn't return any char pointer, it will
* lead to segmentation fault in the parent function.
*/
return NULL;
}
int main()
{
char str[SIZE_OF_STRING] = "shirley|Rose|Jasmine";
char *token = NULL;
char del[SIZE_OF_DELIMITER] = "|";
token = string_token(str, del);
while (token != NULL) {
printf("token %s\n", token);
token = string_token(NULL, del);
}
return 0;
}
https://shirleyanengineer.blogspot.com/2019/11/write-your-own-strtok-in-c.html
';
str1_cpy=str1; //save pointer for next iteration
return ptr; //return pointer
}
else
return NULL;
}
else
{
str1_cpy=ret_nondelim(str1_cpy,str2); //same as above but pass saved pointer
str1=ret_noofbytes(str1_cpy,str2);
if((str1-str1_cpy))
{
ptr=malloc(str1-str1_cpy+1);
int k;
for(k=0;k<(str1-str1_cpy);k++)
ptr[k]=str1_cpy[k];
ptr[k]='##代码##';
str1_cpy=str1; //save pointer for next iteration
return ptr;
}
else
return NULL;
}
}
char *ret_nondelim(char *str1,char *str2)
{
int flag=0;
for(;*(str1);)
{
for(int j=0;j<strlen(str2);j++)
{
if(*(str1)==*(str2+j)) //check if slected byte is non delimiting byte
{
str1++; //break if true go check next byte
break;
}
else
flag++; //shows that selected byte is none of the delimitng byte
}
if(flag==strlen(str2)) //(above comment)-so non delimiting byte found return that pointer to caller
break;
else
flag=0;
}
return str1;
}
char *ret_noofbytes(char *str1,char *str2)
{
int flag=0;
for(;*(str1);)
{
for(int k=0;k<strlen(str2);k++)
{
if(*(str2+k)==*(str1)) //check if selected bytes is delimiting byte
{
flag=1;
break; //break twice if true ie .flag==1
}
}
if(flag==1)
break;
else
str1++; //if not found go check next byte
}
return str1; //return the point where delimiting byte was found
}
';
nxt++;
size--;
i--;
} /* in the last delimiter we have to do something a */
if(1 == i) /* bit different we have to actually take nxt backwards */
{ /* one step, to break the while(*nxt) loop */
*nxt = '##代码##';
if(size > 1) /* if the delimiter is last char, don't do next lines */
{
nxt--;
size++; /* nxt is diminished so size increases */
}
}
nxt++; /* if no delimiter is found, advance nxt */
size--; /* in case of the last delimiter in a series, we took nxt */
} /* a step back, so now moving it a step forward means */
/* it rests on a NULL terminator */
return str;
}
') {
last++;
}
str = last;
if(*last == '##代码##') {
free(deli_dict);
free(to_free);
return NULL;
}
while (*last != '##代码##' && !deli_dict[*last]) {
last++;
}
*last = '##代码##';
last++;
free(deli_dict);
return str;
}
int main()
{
char * str = "- This, a sample string.";
char *del = " ,.-";
char *s = my_strtok(str, del);
while(s) {
printf("%s\n", s);
s = my_strtok(NULL, del);
}
return 0;
}
')
return NULL;
char *W = (char *)malloc(sizeof(char)*100);
int i = currIndex, k = 0, j = 0;
while (s[i] != '##代码##'){
j = 0;
while (delm[j] != '##代码##'){
if (s[i] != delm[j])
W[k] = s[i];
else goto It;
j++;
}
i++;
k++;
}
It:
W[i] = 0;
currIndex = i+1;
//Iterator = ++ptr;
return W;
}
int main(void)
{
char s[100] = "my name is khan";
char delm[] = " ";
//char newstr[100];
char *str = my_strtok(s, delm);
while(str){
printf("%s", str);
free(str);
str = my_strtok(s, delm);
}
return 0;
}
, %xmm1, %xmm1
cmp , %rcx
ja L(crosscache)
movdqu (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
/* Check if there is a match. */
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
add , %rdi
and $-16, %rdi
jmp L(loop_prolog)
.p2align 4
L(crosscache):
and , %rcx
and $-16, %rdi
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
/* Check if there is a match. */
pmovmskb %xmm0, %eax
/* Remove the leading bytes. */
sar %cl, %eax
test %eax, %eax
je L(unaligned_no_match)
/* Check which byte is a match. */
bsf %eax, %eax
add %rdi, %rax
add %rcx, %rax
ret
.p2align 4
L(unaligned_no_match):
add , %rdi
.p2align 4
L(loop_prolog):
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
movdqa 48(%rdi), %xmm4
pcmpeqb %xmm1, %xmm4
add , %rdi
pmovmskb %xmm4, %eax
test %eax, %eax
jnz L(matches0)
test ##代码##x3f, %rdi
jz L(align64_loop)
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
movdqa 48(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
add , %rdi
test %eax, %eax
jnz L(matches0)
and $-64, %rdi
.p2align 4
L(align64_loop):
movdqa (%rdi), %xmm0
movdqa 16(%rdi), %xmm2
movdqa 32(%rdi), %xmm3
movdqa 48(%rdi), %xmm4
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm1, %xmm2
pcmpeqb %xmm1, %xmm3
pcmpeqb %xmm1, %xmm4
pmaxub %xmm0, %xmm3
pmaxub %xmm2, %xmm4
pmaxub %xmm3, %xmm4
pmovmskb %xmm4, %eax
add , %rdi
test %eax, %eax
jz L(align64_loop)
sub , %rdi
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pcmpeqb 48(%rdi), %xmm1
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
pmovmskb %xmm1, %eax
bsf %eax, %eax
lea 48(%rdi, %rax), %rax
ret
.p2align 4
L(matches0):
bsf %eax, %eax
lea -16(%rax, %rdi), %rax
ret
.p2align 4
L(matches):
bsf %eax, %eax
add %rdi, %rax
ret
.p2align 4
L(matches16):
bsf %eax, %eax
lea 16(%rax, %rdi), %rax
ret
.p2align 4
L(matches32):
bsf %eax, %eax
lea 32(%rax, %rdi), %rax
ret
.p2align 4
L(return_null):
xor %rax, %rax
ret
END (__rawmemchr)
weak_alias (__rawmemchr, rawmemchr)
libc_hidden_builtin_def (__rawmemchr)
')
{
const char *a = accept;
while (*a != '##代码##')
if (*a++ == *s)
return (char *) s;
++s;
}
return NULL;
}
libc_hidden_builtin_def (strpbrk)
'; ++p)
{
for (a = accept; *a != '##代码##'; ++a)
if (*p == *a)
break;
if (*a == '##代码##')
return count;
else
++count;
}
return count;
}
libc_hidden_builtin_def (strspn)
=-def##代码##"
*/
char *
STRTOK (char *s, const char *delim)
{
char *token;
if (s == NULL)
s = olds;
/* Scan leading delimiters. */
s += strspn (s, delim);
if (*s == '##代码##')
{
olds = s;
return NULL;
}
/* Find the end of the token. */
token = s;
s = strpbrk (token, delim);
if (s == NULL)
/* This token finishes the string. */
olds = __rawmemchr (token, '##代码##');
else
{
/* Terminate the token and make OLDS point past it. */
*s = '##代码##';
olds = s + 1;
}
return token;
}
'){
j = 0;
while (delm[j] != '##代码##'){
if (s[i] != delm[j])
W[k] = s[i];
else goto It;
j++;
}
ptr++;
i++;
k++;
}
It:
W[i] = 0;
Iterator = ++ptr;
return W;
}
int main(void){
char s[100];
char delm[] = " ";
gets(s);
char newstr[100];
char *str = my_strtok(s, delm);
strcpy(newstr, str);
printf("%s", newstr);
return 0;
}
回答by Enzo Ferber
Bounty Disclaimer
赏金免责声明
Bounty: "Looking for an answer drawing from credible and/or official sources."
赏金:“从可靠和/或官方来源寻找答案。”
I think GNU GLibCconstitutes a credible and officialsource, right? You can download GLibC 2.22 hereas .tar.gz(25mb).
我认为GNU GLibC是一个可信的官方来源,对吧?您可以下载的glibc 2.22这里为.tar.gz(25MB)。
After you extract the sources:
提取来源后:
string/strtok.c
string/strtok.c
string/strspn.cstring/strspn.c##代码##
string/strpbrk.cstring/strpbrk.c##代码##
sysdeps/x86_64/rawmemchr.Ssysdeps/x86_64/rawmemchr.S##代码##
回答by coolVick
Internal implementation of strtok has already been discussed here:
strtok 的内部实现已经在这里讨论过:
How does strtok() split the string into tokens in C?
In your type of implementation ( calling it your type because it is quite different from the actual one), you have not allocated any memory dynamically to the local variable 'W'. So when you return it, it is not guaranteed that you will receive the string correctly because that memory which was locally allocated to 'W' is not reserved for it anymore in the calling function. Apart from this, there are many unnecessary variables used in the code. And also, it needs proper re-structuring. For your better understanding I have modified your code(keeping it in your style as far as possible) to do the required job:
在您的实现类型中(称其为您的类型,因为它与实际的类型完全不同),您没有为局部变量“W”动态分配任何内存。因此,当您返回它时,不能保证您会正确接收字符串,因为在调用函数中不再为其保留本地分配给 'W' 的内存。除此之外,代码中使用了许多不必要的变量。而且,它需要适当的重组。为了您更好地理解,我修改了您的代码(尽可能保持您的风格)以完成所需的工作:
##代码##回答by Kohn1001
Actually this one is my solution to the general case... when str is char* you can't write to it.
实际上,这是我对一般情况的解决方案……当 str 是 char* 时,您无法写入它。
Here it is:
这里是:
And here is the link to githubresource:
##代码##回答by David Refaeli
回答by Amar Nagendra
Here is an implementation increase the input buffers as per your needs and increase the no of calls to strtok with str1 as NULL to get back more tokens
这是一个实现,根据您的需要增加输入缓冲区,并增加对 strtok 的调用次数,将 str1 设置为 NULL 以获取更多令牌
##代码##
